Spaces:

asadsandhu
/

RAGnosis

Sleeping

asadsandhu commited on Jul 8

Commit

ff20a1e

1 Parent(s): 03adce4

Model updated.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,12 +20,11 @@ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # ----------------------
 # Load HuggingFace LLM (BioMistral-7B)
 # ----------------------
-model_id = "royalhaze/BioMistral-7B"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
-    bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type="nf4",
     bnb_4bit_compute_dtype=torch.float16,
 )
@@ -35,11 +34,12 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
 generation_model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    quantization_config=bnb_config if torch.cuda.is_available() else None,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     device_map="auto" if torch.cuda.is_available() else None
 ).to(device)
 # ----------------------
 # RAG Functions
 # ----------------------

 # ----------------------
 # Load HuggingFace LLM (BioMistral-7B)
 # ----------------------
+model_id = "BioMistral/BioMistral-7B"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True,
     bnb_4bit_compute_dtype=torch.float16,
 )
 generation_model = AutoModelForCausalLM.from_pretrained(
     model_id,
+    quantization_config=bnb_config,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     device_map="auto" if torch.cuda.is_available() else None
 ).to(device)
 # ----------------------
 # RAG Functions
 # ----------------------