marcelbinz commited on
Commit
298c3b7
·
verified ·
1 Parent(s): 475a64e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -2,8 +2,10 @@ import spaces
2
  import gradio as gr
3
  import torch
4
  from transformers import pipeline, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
 
5
 
6
  MODEL_ID = "unsloth/Meta-Llama-3.1-70B-bnb-4bit"
 
7
 
8
  bnb_4bit_config = BitsAndBytesConfig(
9
  load_in_4bit=True,
@@ -13,13 +15,15 @@ bnb_4bit_config = BitsAndBytesConfig(
13
  )
14
 
15
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
16
- model = AutoModelForCausalLM.from_pretrained(
17
  MODEL_ID,
18
  device_map="auto",
19
  attn_implementation="flash_attention_2",
20
  quantization_config=bnb_4bit_config,
21
  )
22
 
 
 
23
  pipe = pipeline(
24
  "text-generation",
25
  model=model,
 
2
  import gradio as gr
3
  import torch
4
  from transformers import pipeline, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
5
+ from peft import PeftModel
6
 
7
  MODEL_ID = "unsloth/Meta-Llama-3.1-70B-bnb-4bit"
8
+ ADAPTER_ID = "marcelbinz/Llama-3.1-Centaur-70B-adapter"
9
 
10
  bnb_4bit_config = BitsAndBytesConfig(
11
  load_in_4bit=True,
 
15
  )
16
 
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
18
+ model_base = AutoModelForCausalLM.from_pretrained(
19
  MODEL_ID,
20
  device_map="auto",
21
  attn_implementation="flash_attention_2",
22
  quantization_config=bnb_4bit_config,
23
  )
24
 
25
+ model = PeftModel.from_pretrained(model_base, ADAPTER, device_map="auto")
26
+
27
  pipe = pipeline(
28
  "text-generation",
29
  model=model,