Spaces:

techindia2025
/

medbot_2

Running on Zero

App Files Files Community

Thanush commited on May 22

Commit

a0597d0

1 Parent(s): c4447f4

Refactor prompt building in app.py to utilize full message sequence and enhance response generation context

Browse files

Files changed (1) hide show

app.py +18 -27

app.py CHANGED Viewed

@@ -55,17 +55,15 @@ print("Meditron model loaded successfully!")
 # Initialize LangChain memory
 memory = ConversationBufferMemory(return_messages=True)
-def build_llama2_prompt(system_prompt, history, user_input):
-    """Format the conversation history and user input for Llama-2 chat models."""
     prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
-    # Add conversation history
-    for user_msg, assistant_msg in history:
-        prompt += f"{user_msg} [/INST] {assistant_msg} </s><s>[INST] "
-    # Add the current user input
     prompt += f"{user_input} [/INST] "
     return prompt
 def get_meditron_suggestions(patient_info):
@@ -88,28 +86,21 @@ def get_meditron_suggestions(patient_info):
 @spaces.GPU
 def generate_response(message, history):
-    """Generate a response using both models."""
     # Save the latest user message and last assistant response to memory
     if history and len(history[-1]) == 2:
         memory.save_context({"input": history[-1][0]}, {"output": history[-1][1]})
     memory.save_context({"input": message}, {"output": ""})
-    # Build conversation history from memory
-    lc_history = []
-    user_msg = None
-    for msg in memory.chat_memory.messages:
-        if msg.type == "human":
-            user_msg = msg.content
-        elif msg.type == "ai" and user_msg is not None:
-            assistant_msg = msg.content
-            lc_history.append((user_msg, assistant_msg))
-            user_msg = None
-    # Build the prompt with LangChain memory history
-    prompt = build_llama2_prompt(SYSTEM_PROMPT, lc_history, message)
-    # Add summarization instruction after 4 turns
-    if len(lc_history) >= 4:
         prompt = prompt.replace("[/INST] ", "[/INST] Now summarize what you've learned and suggest when professional care may be needed. ")
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -131,9 +122,9 @@ def generate_response(message, history):
     llama_response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
     # After 4 turns, add medicine suggestions from Meditron
-    if len(lc_history) >= 4:
-        # Collect full patient conversation
-        full_patient_info = "\n".join([h[0] for h in lc_history] + [message]) + "\n\nSummary: " + llama_response
         # Get medicine suggestions
         medicine_suggestions = get_meditron_suggestions(full_patient_info)

 # Initialize LangChain memory
 memory = ConversationBufferMemory(return_messages=True)
+def build_llama2_prompt(system_prompt, messages, user_input):
+    """Format the conversation history and user input for Llama-2 chat models, using the full message sequence."""
     prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
+    for msg in messages:
+        if msg.type == "human":
+            prompt += f"{msg.content} [/INST] "
+        elif msg.type == "ai":
+            prompt += f"{msg.content} </s><s>[INST] "
     prompt += f"{user_input} [/INST] "
     return prompt
 def get_meditron_suggestions(patient_info):
 @spaces.GPU
 def generate_response(message, history):
+    """Generate a response using both models, with full context."""
     # Save the latest user message and last assistant response to memory
     if history and len(history[-1]) == 2:
         memory.save_context({"input": history[-1][0]}, {"output": history[-1][1]})
     memory.save_context({"input": message}, {"output": ""})
+    # Use the full message sequence from memory
+    messages = memory.chat_memory.messages
+    # Build the prompt with the full message sequence
+    prompt = build_llama2_prompt(SYSTEM_PROMPT, messages, message)
+    # Add summarization instruction after 4 turns (count human messages)
+    num_user_turns = sum(1 for m in messages if m.type == "human")
+    if num_user_turns >= 4:
         prompt = prompt.replace("[/INST] ", "[/INST] Now summarize what you've learned and suggest when professional care may be needed. ")
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     llama_response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
     # After 4 turns, add medicine suggestions from Meditron
+    if num_user_turns >= 4:
+        # Collect full patient conversation (all user messages)
+        full_patient_info = "\n".join([m.content for m in messages if m.type == "human"] + [message]) + "\n\nSummary: " + llama_response
         # Get medicine suggestions
         medicine_suggestions = get_meditron_suggestions(full_patient_info)