Spaces:

asadsandhu
/

RAGnosis

Sleeping

App Files Files Community

asadsandhu commited on Jul 8

Commit

03adce4

1 Parent(s): 0619f86

Model Changes. Light weight model used.

Browse files

Files changed (1) hide show

app.py +9 -9

app.py CHANGED Viewed

@@ -18,9 +18,9 @@ index = faiss.read_index("faiss_index.bin")
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # ----------------------
-# Load HuggingFace LLM (Nous-Hermes)
 # ----------------------
-model_id = "NousResearch/Nous-Hermes-2-Mistral-7B-DPO"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
@@ -29,14 +29,15 @@ bnb_config = BitsAndBytesConfig(
     bnb_4bit_compute_dtype=torch.float16,
 )
-tokenizer = AutoTokenizer.from_pretrained(model_id)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 generation_model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto" if torch.cuda.is_available() else None,
-    quantization_config=bnb_config if torch.cuda.is_available() else None
 ).to(device)
 # ----------------------
@@ -54,7 +55,7 @@ def build_prompt(query, retrieved_docs):
     context_text = "\n".join([
         f"- {doc['text']}" for _, doc in retrieved_docs.iterrows()
     ])
     prompt = f"""[INST] <<SYS>>
 You are a medical assistant trained on clinical reasoning data. Given the following patient query and related clinical observations, generate a diagnostic explanation or suggestion based on the context.
 <</SYS>>
@@ -71,7 +72,6 @@ You are a medical assistant trained on clinical reasoning data. Given the follow
     return prompt
 def generate_local_answer(prompt, max_new_tokens=512):
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     output = generation_model.generate(
         input_ids=input_ids,
@@ -94,7 +94,7 @@ def rag_chat(query):
     answer = generate_local_answer(prompt)
     return answer
-# Optional: basic CSS to enhance layout
 custom_css = """
 textarea, .input_textbox {
     font-size: 1.05rem !important;
@@ -128,4 +128,4 @@ Enter a natural-language query describing your patient's condition to receive an
     submit_btn.click(fn=rag_chat, inputs=query_input, outputs=output)
-demo.launch(share=True)

 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # ----------------------
+# Load HuggingFace LLM (BioMistral-7B)
 # ----------------------
+model_id = "royalhaze/BioMistral-7B"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_compute_dtype=torch.float16,
 )
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained(model_id)
 generation_model = AutoModelForCausalLM.from_pretrained(
     model_id,
+    quantization_config=bnb_config if torch.cuda.is_available() else None,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    device_map="auto" if torch.cuda.is_available() else None
 ).to(device)
 # ----------------------
     context_text = "\n".join([
         f"- {doc['text']}" for _, doc in retrieved_docs.iterrows()
     ])
     prompt = f"""[INST] <<SYS>>
 You are a medical assistant trained on clinical reasoning data. Given the following patient query and related clinical observations, generate a diagnostic explanation or suggestion based on the context.
 <</SYS>>
     return prompt
 def generate_local_answer(prompt, max_new_tokens=512):
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     output = generation_model.generate(
         input_ids=input_ids,
     answer = generate_local_answer(prompt)
     return answer
+# Optional: CSS for improved UX
 custom_css = """
 textarea, .input_textbox {
     font-size: 1.05rem !important;
     submit_btn.click(fn=rag_chat, inputs=query_input, outputs=output)
+demo.launch()