marcelbinz commited on
Commit
6c84060
·
verified ·
1 Parent(s): 44fb885

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -3
app.py CHANGED
@@ -1,13 +1,30 @@
1
  import spaces
2
  import gradio as gr
3
  import torch
4
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  pipe = pipeline(
7
  "text-generation",
8
- model="marcelbinz/Llama-3.1-Minitaur-8B",
 
9
  device_map="auto",
10
- torch_dtype=torch.bfloat16
11
  )
12
 
13
  @spaces.GPU
 
1
  import spaces
2
  import gradio as gr
3
  import torch
4
+ from transformers import pipeline, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
5
+
6
+ MODEL_ID = "marcelbinz/Llama-3.1-Centaur-70B"
7
+
8
+ bnb_4bit_config = BitsAndBytesConfig(
9
+ load_in_4bit=True,
10
+ bnb_4bit_quant_type="nf4",
11
+ bnb_4bit_compute_dtype=torch.bfloat16,
12
+ bnb_4bit_use_double_quant=True,
13
+ )
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ MODEL_ID,
18
+ device_map="auto",
19
+ attn_implementation="flash_attention_2",
20
+ quantization_config=bnb_4bit_config,
21
+ )
22
 
23
  pipe = pipeline(
24
  "text-generation",
25
+ model=model,
26
+ tokenizer=tokenizer,
27
  device_map="auto",
 
28
  )
29
 
30
  @spaces.GPU