Spaces:

asadsandhu
/

RAGnosis

Sleeping

asadsandhu commited on Jul 8

Commit

244e074

1 Parent(s): 7ab76b7

Updated.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ index = faiss.read_index("faiss_index.bin")
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # ----------------------
-# Load HuggingFace LLM (Nous-Hermes)
 # ----------------------
 model_id = "BioMistral/BioMistral-7B"
@@ -52,7 +52,6 @@ def build_prompt(query, retrieved_docs):
     context_text = "\n".join([
         f"- {doc['text']}" for _, doc in retrieved_docs.iterrows()
     ])
     prompt = f"""[INST] <<SYS>>
 You are a medical assistant trained on clinical reasoning data. Given the following patient query and related clinical observations, generate a diagnostic explanation or suggestion based on the context.
 <</SYS>>
@@ -68,8 +67,10 @@ You are a medical assistant trained on clinical reasoning data. Given the follow
 """
     return prompt
 def generate_local_answer(prompt, max_new_tokens=512):
-    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.cuda()
     output = generation_model.generate(
         input_ids=input_ids,
         max_new_tokens=max_new_tokens,
@@ -125,4 +126,5 @@ Enter a natural-language query describing your patient's condition to receive an
     submit_btn.click(fn=rag_chat, inputs=query_input, outputs=output)
-demo.launch(share=True)

 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # ----------------------
+# Load HuggingFace LLM (BioMistral-7B)
 # ----------------------
 model_id = "BioMistral/BioMistral-7B"
     context_text = "\n".join([
         f"- {doc['text']}" for _, doc in retrieved_docs.iterrows()
     ])
     prompt = f"""[INST] <<SYS>>
 You are a medical assistant trained on clinical reasoning data. Given the following patient query and related clinical observations, generate a diagnostic explanation or suggestion based on the context.
 <</SYS>>
 """
     return prompt
+# ✅ FIXED generate_local_answer
 def generate_local_answer(prompt, max_new_tokens=512):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     output = generation_model.generate(
         input_ids=input_ids,
         max_new_tokens=max_new_tokens,
     submit_btn.click(fn=rag_chat, inputs=query_input, outputs=output)
+# ✅ Use `share=False` inside Hugging Face Spaces
+demo.launch(share=False)