asadsandhu commited on
Commit
ffa32ca
·
1 Parent(s): 221212a
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -23,10 +23,18 @@ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
23
  model_id = "BioMistral/BioMistral-7B"
24
 
25
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
 
 
 
 
26
  generation_model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
 
 
28
  torch_dtype=torch.float32,
29
- device_map={"": "cpu"}
30
  )
31
 
32
  # ----------------------
 
23
  model_id = "BioMistral/BioMistral-7B"
24
 
25
  tokenizer = AutoTokenizer.from_pretrained(model_id)
26
+ bnb_config = BitsAndBytesConfig(
27
+ load_in_4bit=True,
28
+ bnb_4bit_quant_type="nf4",
29
+ bnb_4bit_use_double_quant=True,
30
+ bnb_4bit_compute_dtype=torch.float32,
31
+ )
32
+
33
  generation_model = AutoModelForCausalLM.from_pretrained(
34
  model_id,
35
+ device_map={"": "cpu"},
36
+ quantization_config=bnb_config,
37
  torch_dtype=torch.float32,
 
38
  )
39
 
40
  # ----------------------