marcelbinz commited on
Commit
1453861
·
verified ·
1 Parent(s): d9f0e5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -1,12 +1,20 @@
1
  import spaces
2
  import gradio as gr
3
  import torch
4
- from transformers import pipeline, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
5
  from peft import PeftModel
6
 
7
  MODEL_ID = "unsloth/Meta-Llama-3.1-70B-bnb-4bit"
8
  ADAPTER_ID = "marcelbinz/Llama-3.1-Centaur-70B-adapter"
9
 
 
 
 
 
 
 
 
 
10
  bnb_4bit_config = BitsAndBytesConfig(
11
  load_in_4bit=True,
12
  bnb_4bit_quant_type="nf4",
@@ -19,6 +27,7 @@ model_base = AutoModelForCausalLM.from_pretrained(
19
  MODEL_ID,
20
  device_map="auto",
21
  attn_implementation="flash_attention_2",
 
22
  quantization_config=bnb_4bit_config,
23
  )
24
 
 
1
  import spaces
2
  import gradio as gr
3
  import torch
4
+ from transformers import pipeline, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, AutoConfig
5
  from peft import PeftModel
6
 
7
  MODEL_ID = "unsloth/Meta-Llama-3.1-70B-bnb-4bit"
8
  ADAPTER_ID = "marcelbinz/Llama-3.1-Centaur-70B-adapter"
9
 
10
+ cfg = AutoConfig.from_pretrained(MODEL_ID)
11
+ cfg.rope_scaling = {
12
+ "type": "yarn",
13
+ "factor": 4.0,
14
+ "original_max_position_embeddings": 8192,
15
+ }
16
+ cfg.max_position_embeddings = 32768
17
+
18
  bnb_4bit_config = BitsAndBytesConfig(
19
  load_in_4bit=True,
20
  bnb_4bit_quant_type="nf4",
 
27
  MODEL_ID,
28
  device_map="auto",
29
  attn_implementation="flash_attention_2",
30
+ config=cfg,
31
  quantization_config=bnb_4bit_config,
32
  )
33