Spaces:
Runtime error
Runtime error
Thanush
commited on
Commit
·
c4447f4
1
Parent(s):
6196bed
Refactor app.py to integrate LangChain memory for conversation tracking and update requirements.txt for LangChain dependency
Browse files- app.py +33 -28
- requirements.txt +3 -0
app.py
CHANGED
|
@@ -2,12 +2,13 @@ import gradio as gr
|
|
| 2 |
import spaces
|
| 3 |
import torch
|
| 4 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
| 5 |
|
| 6 |
# Model configuration
|
| 7 |
LLAMA_MODEL = "meta-llama/Llama-2-7b-chat-hf"
|
| 8 |
MEDITRON_MODEL = "epfl-llm/meditron-7b"
|
| 9 |
|
| 10 |
-
SYSTEM_PROMPT = """You are a professional virtual doctor. Your goal is to collect detailed information about the user's health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
|
| 11 |
Ask 1-2 follow-up questions at a time to gather more details about:
|
| 12 |
- Detailed description of symptoms
|
| 13 |
- Duration (when did it start?)
|
|
@@ -51,9 +52,8 @@ meditron_model = AutoModelForCausalLM.from_pretrained(
|
|
| 51 |
)
|
| 52 |
print("Meditron model loaded successfully!")
|
| 53 |
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
patient_data = {}
|
| 57 |
|
| 58 |
def build_llama2_prompt(system_prompt, history, user_input):
|
| 59 |
"""Format the conversation history and user input for Llama-2 chat models."""
|
|
@@ -89,26 +89,31 @@ def get_meditron_suggestions(patient_info):
|
|
| 89 |
@spaces.GPU
|
| 90 |
def generate_response(message, history):
|
| 91 |
"""Generate a response using both models."""
|
| 92 |
-
#
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
# Add summarization instruction after 4 turns
|
| 107 |
-
if
|
| 108 |
prompt = prompt.replace("[/INST] ", "[/INST] Now summarize what you've learned and suggest when professional care may be needed. ")
|
| 109 |
-
|
| 110 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 111 |
-
|
| 112 |
# Generate the Llama-2 response
|
| 113 |
with torch.no_grad():
|
| 114 |
outputs = model.generate(
|
|
@@ -120,19 +125,19 @@ def generate_response(message, history):
|
|
| 120 |
do_sample=True,
|
| 121 |
pad_token_id=tokenizer.eos_token_id
|
| 122 |
)
|
| 123 |
-
|
| 124 |
# Decode and extract Llama-2's response
|
| 125 |
full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
|
| 126 |
llama_response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
|
| 127 |
-
|
| 128 |
# After 4 turns, add medicine suggestions from Meditron
|
| 129 |
-
if
|
| 130 |
# Collect full patient conversation
|
| 131 |
-
full_patient_info = "\n".join(
|
| 132 |
-
|
| 133 |
# Get medicine suggestions
|
| 134 |
medicine_suggestions = get_meditron_suggestions(full_patient_info)
|
| 135 |
-
|
| 136 |
# Format final response
|
| 137 |
final_response = (
|
| 138 |
f"{llama_response}\n\n"
|
|
@@ -140,7 +145,7 @@ def generate_response(message, history):
|
|
| 140 |
f"{medicine_suggestions}"
|
| 141 |
)
|
| 142 |
return final_response
|
| 143 |
-
|
| 144 |
return llama_response
|
| 145 |
|
| 146 |
# Create the Gradio interface
|
|
|
|
| 2 |
import spaces
|
| 3 |
import torch
|
| 4 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 5 |
+
from langchain.memory import ConversationBufferMemory
|
| 6 |
|
| 7 |
# Model configuration
|
| 8 |
LLAMA_MODEL = "meta-llama/Llama-2-7b-chat-hf"
|
| 9 |
MEDITRON_MODEL = "epfl-llm/meditron-7b"
|
| 10 |
|
| 11 |
+
SYSTEM_PROMPT = """You are a professional virtual doctor. Your goal is to collect detailed information about the user's Name,age,health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
|
| 12 |
Ask 1-2 follow-up questions at a time to gather more details about:
|
| 13 |
- Detailed description of symptoms
|
| 14 |
- Duration (when did it start?)
|
|
|
|
| 52 |
)
|
| 53 |
print("Meditron model loaded successfully!")
|
| 54 |
|
| 55 |
+
# Initialize LangChain memory
|
| 56 |
+
memory = ConversationBufferMemory(return_messages=True)
|
|
|
|
| 57 |
|
| 58 |
def build_llama2_prompt(system_prompt, history, user_input):
|
| 59 |
"""Format the conversation history and user input for Llama-2 chat models."""
|
|
|
|
| 89 |
@spaces.GPU
|
| 90 |
def generate_response(message, history):
|
| 91 |
"""Generate a response using both models."""
|
| 92 |
+
# Save the latest user message and last assistant response to memory
|
| 93 |
+
if history and len(history[-1]) == 2:
|
| 94 |
+
memory.save_context({"input": history[-1][0]}, {"output": history[-1][1]})
|
| 95 |
+
memory.save_context({"input": message}, {"output": ""})
|
| 96 |
+
|
| 97 |
+
# Build conversation history from memory
|
| 98 |
+
lc_history = []
|
| 99 |
+
user_msg = None
|
| 100 |
+
for msg in memory.chat_memory.messages:
|
| 101 |
+
if msg.type == "human":
|
| 102 |
+
user_msg = msg.content
|
| 103 |
+
elif msg.type == "ai" and user_msg is not None:
|
| 104 |
+
assistant_msg = msg.content
|
| 105 |
+
lc_history.append((user_msg, assistant_msg))
|
| 106 |
+
user_msg = None
|
| 107 |
+
|
| 108 |
+
# Build the prompt with LangChain memory history
|
| 109 |
+
prompt = build_llama2_prompt(SYSTEM_PROMPT, lc_history, message)
|
| 110 |
+
|
| 111 |
# Add summarization instruction after 4 turns
|
| 112 |
+
if len(lc_history) >= 4:
|
| 113 |
prompt = prompt.replace("[/INST] ", "[/INST] Now summarize what you've learned and suggest when professional care may be needed. ")
|
| 114 |
+
|
| 115 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 116 |
+
|
| 117 |
# Generate the Llama-2 response
|
| 118 |
with torch.no_grad():
|
| 119 |
outputs = model.generate(
|
|
|
|
| 125 |
do_sample=True,
|
| 126 |
pad_token_id=tokenizer.eos_token_id
|
| 127 |
)
|
| 128 |
+
|
| 129 |
# Decode and extract Llama-2's response
|
| 130 |
full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
|
| 131 |
llama_response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
|
| 132 |
+
|
| 133 |
# After 4 turns, add medicine suggestions from Meditron
|
| 134 |
+
if len(lc_history) >= 4:
|
| 135 |
# Collect full patient conversation
|
| 136 |
+
full_patient_info = "\n".join([h[0] for h in lc_history] + [message]) + "\n\nSummary: " + llama_response
|
| 137 |
+
|
| 138 |
# Get medicine suggestions
|
| 139 |
medicine_suggestions = get_meditron_suggestions(full_patient_info)
|
| 140 |
+
|
| 141 |
# Format final response
|
| 142 |
final_response = (
|
| 143 |
f"{llama_response}\n\n"
|
|
|
|
| 145 |
f"{medicine_suggestions}"
|
| 146 |
)
|
| 147 |
return final_response
|
| 148 |
+
|
| 149 |
return llama_response
|
| 150 |
|
| 151 |
# Create the Gradio interface
|
requirements.txt
CHANGED
|
@@ -21,3 +21,6 @@ aiofiles>=23.1.0
|
|
| 21 |
|
| 22 |
# For better tensor operations
|
| 23 |
numpy>=1.24.0
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# For better tensor operations
|
| 23 |
numpy>=1.24.0
|
| 24 |
+
|
| 25 |
+
# For LangChain memory
|
| 26 |
+
langchain>=0.1.0
|