asadsandhu commited on
Commit
ff20a1e
·
1 Parent(s): 03adce4

Model updated.

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -20,12 +20,11 @@ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
20
  # ----------------------
21
  # Load HuggingFace LLM (BioMistral-7B)
22
  # ----------------------
23
- model_id = "royalhaze/BioMistral-7B"
24
-
25
  bnb_config = BitsAndBytesConfig(
26
  load_in_4bit=True,
27
- bnb_4bit_use_double_quant=True,
28
  bnb_4bit_quant_type="nf4",
 
29
  bnb_4bit_compute_dtype=torch.float16,
30
  )
31
 
@@ -35,11 +34,12 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
35
 
36
  generation_model = AutoModelForCausalLM.from_pretrained(
37
  model_id,
38
- quantization_config=bnb_config if torch.cuda.is_available() else None,
39
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
40
  device_map="auto" if torch.cuda.is_available() else None
41
  ).to(device)
42
 
 
43
  # ----------------------
44
  # RAG Functions
45
  # ----------------------
 
20
  # ----------------------
21
  # Load HuggingFace LLM (BioMistral-7B)
22
  # ----------------------
23
+ model_id = "BioMistral/BioMistral-7B"
 
24
  bnb_config = BitsAndBytesConfig(
25
  load_in_4bit=True,
 
26
  bnb_4bit_quant_type="nf4",
27
+ bnb_4bit_use_double_quant=True,
28
  bnb_4bit_compute_dtype=torch.float16,
29
  )
30
 
 
34
 
35
  generation_model = AutoModelForCausalLM.from_pretrained(
36
  model_id,
37
+ quantization_config=bnb_config,
38
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
39
  device_map="auto" if torch.cuda.is_available() else None
40
  ).to(device)
41
 
42
+
43
  # ----------------------
44
  # RAG Functions
45
  # ----------------------