asadsandhu commited on
Commit
244e074
·
1 Parent(s): 7ab76b7
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -18,7 +18,7 @@ index = faiss.read_index("faiss_index.bin")
18
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
19
 
20
  # ----------------------
21
- # Load HuggingFace LLM (Nous-Hermes)
22
  # ----------------------
23
  model_id = "BioMistral/BioMistral-7B"
24
 
@@ -52,7 +52,6 @@ def build_prompt(query, retrieved_docs):
52
  context_text = "\n".join([
53
  f"- {doc['text']}" for _, doc in retrieved_docs.iterrows()
54
  ])
55
-
56
  prompt = f"""[INST] <<SYS>>
57
  You are a medical assistant trained on clinical reasoning data. Given the following patient query and related clinical observations, generate a diagnostic explanation or suggestion based on the context.
58
  <</SYS>>
@@ -68,8 +67,10 @@ You are a medical assistant trained on clinical reasoning data. Given the follow
68
  """
69
  return prompt
70
 
 
71
  def generate_local_answer(prompt, max_new_tokens=512):
72
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.cuda()
 
73
  output = generation_model.generate(
74
  input_ids=input_ids,
75
  max_new_tokens=max_new_tokens,
@@ -125,4 +126,5 @@ Enter a natural-language query describing your patient's condition to receive an
125
 
126
  submit_btn.click(fn=rag_chat, inputs=query_input, outputs=output)
127
 
128
- demo.launch(share=True)
 
 
18
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
19
 
20
  # ----------------------
21
+ # Load HuggingFace LLM (BioMistral-7B)
22
  # ----------------------
23
  model_id = "BioMistral/BioMistral-7B"
24
 
 
52
  context_text = "\n".join([
53
  f"- {doc['text']}" for _, doc in retrieved_docs.iterrows()
54
  ])
 
55
  prompt = f"""[INST] <<SYS>>
56
  You are a medical assistant trained on clinical reasoning data. Given the following patient query and related clinical observations, generate a diagnostic explanation or suggestion based on the context.
57
  <</SYS>>
 
67
  """
68
  return prompt
69
 
70
+ # ✅ FIXED generate_local_answer
71
  def generate_local_answer(prompt, max_new_tokens=512):
72
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
73
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
74
  output = generation_model.generate(
75
  input_ids=input_ids,
76
  max_new_tokens=max_new_tokens,
 
126
 
127
  submit_btn.click(fn=rag_chat, inputs=query_input, outputs=output)
128
 
129
+ # ✅ Use `share=False` inside Hugging Face Spaces
130
+ demo.launch(share=False)