Spaces:

asadsandhu
/

RAGnosis

Running

App Files Files Community

asadsandhu commited on Jul 8

Commit

7ab76b7

1 Parent(s): 56dc0cd

Model changed.

Browse files

Files changed (1) hide show

app.py +11 -13

app.py CHANGED Viewed

@@ -18,26 +18,24 @@ index = faiss.read_index("faiss_index.bin")
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # ----------------------
-# Load HuggingFace LLM (BioMistral-7B)
 # ----------------------
 model_id = "BioMistral/BioMistral-7B"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",
     bnb_4bit_use_double_quant=True,
     bnb_4bit_compute_dtype=torch.float16,
 )
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 generation_model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    quantization_config=bnb_config,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto" if torch.cuda.is_available() else None
-).to(device)
 # ----------------------
 # RAG Functions
@@ -54,7 +52,7 @@ def build_prompt(query, retrieved_docs):
     context_text = "\n".join([
         f"- {doc['text']}" for _, doc in retrieved_docs.iterrows()
     ])
     prompt = f"""[INST] <<SYS>>
 You are a medical assistant trained on clinical reasoning data. Given the following patient query and related clinical observations, generate a diagnostic explanation or suggestion based on the context.
 <</SYS>>
@@ -71,7 +69,7 @@ You are a medical assistant trained on clinical reasoning data. Given the follow
     return prompt
 def generate_local_answer(prompt, max_new_tokens=512):
-    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     output = generation_model.generate(
         input_ids=input_ids,
         max_new_tokens=max_new_tokens,
@@ -93,7 +91,7 @@ def rag_chat(query):
     answer = generate_local_answer(prompt)
     return answer
-# Optional: CSS for improved UX
 custom_css = """
 textarea, .input_textbox {
     font-size: 1.05rem !important;
@@ -127,4 +125,4 @@ Enter a natural-language query describing your patient's condition to receive an
     submit_btn.click(fn=rag_chat, inputs=query_input, outputs=output)
-demo.launch()

 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # ----------------------
+# Load HuggingFace LLM (Nous-Hermes)
 # ----------------------
 model_id = "BioMistral/BioMistral-7B"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
     bnb_4bit_compute_dtype=torch.float16,
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 generation_model = AutoModelForCausalLM.from_pretrained(
     model_id,
+    torch_dtype=torch.float16,
+    device_map="auto",
+    quantization_config=bnb_config
+)
 # ----------------------
 # RAG Functions
     context_text = "\n".join([
         f"- {doc['text']}" for _, doc in retrieved_docs.iterrows()
     ])
     prompt = f"""[INST] <<SYS>>
 You are a medical assistant trained on clinical reasoning data. Given the following patient query and related clinical observations, generate a diagnostic explanation or suggestion based on the context.
 <</SYS>>
     return prompt
 def generate_local_answer(prompt, max_new_tokens=512):
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.cuda()
     output = generation_model.generate(
         input_ids=input_ids,
         max_new_tokens=max_new_tokens,
     answer = generate_local_answer(prompt)
     return answer
+# Optional: basic CSS to enhance layout
 custom_css = """
 textarea, .input_textbox {
     font-size: 1.05rem !important;
     submit_btn.click(fn=rag_chat, inputs=query_input, outputs=output)
+demo.launch(share=True)