VisoLearn commited on
Commit
bb20016
·
verified ·
1 Parent(s): 9ce1f1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -5,20 +5,25 @@ import torch
5
  from threading import Thread
6
  import bitsandbytes as bnb
7
 
8
- phi4_model_path = "Compumacy/OpenBioLLm-70B"
9
 
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
 
12
  # Load model with 4-bit quantization
 
 
 
 
 
 
 
 
 
 
13
  phi4_model = AutoModelForCausalLM.from_pretrained(
14
  phi4_model_path,
15
  device_map="auto",
16
- load_in_4bit=True, # Enable 4-bit quantization
17
- quantization_config={
18
- "bnb_4bit_compute_dtype": torch.float16,
19
- "bnb_4bit_use_double_quant": True,
20
- "bnb_4bit_quant_type": "nf4"
21
- }
22
  )
23
  phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
24
 
 
5
  from threading import Thread
6
  import bitsandbytes as bnb
7
 
8
+ phi4_model_path = "Daemontatox/Qwen3-14B-Griffon"
9
 
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
 
12
  # Load model with 4-bit quantization
13
+ from transformers import BitsAndBytesConfig
14
+
15
+ # Configure 4-bit quantization
16
+ quantization_config = BitsAndBytesConfig(
17
+ load_in_4bit=True,
18
+ bnb_4bit_compute_dtype=torch.float16,
19
+ bnb_4bit_use_double_quant=True,
20
+ bnb_4bit_quant_type="nf4"
21
+ )
22
+
23
  phi4_model = AutoModelForCausalLM.from_pretrained(
24
  phi4_model_path,
25
  device_map="auto",
26
+ quantization_config=quantization_config
 
 
 
 
 
27
  )
28
  phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
29