Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,294 Bytes
b80af5b 71bcd31 9f6ac99 c4447f4 71bcd31 c4447f4 71bcd31 c4447f4 71bcd31 bdce857 71bcd31 aa89cd7 c4447f4 71bcd31 c4447f4 71bcd31 c4447f4 71bcd31 c4447f4 71bcd31 c4447f4 71bcd31 c4447f4 aa89cd7 c4447f4 aa89cd7 c4447f4 aa89cd7 c4447f4 aa89cd7 c4447f4 aa89cd7 b80af5b 71bcd31 6d5190c 71bcd31 aa89cd7 8b29c0d 71bcd31 8b29c0d 71bcd31 6d5190c b80af5b 71bcd31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import gradio as gr
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.memory import ConversationBufferMemory
# Model configuration
LLAMA_MODEL = "meta-llama/Llama-2-7b-chat-hf"
MEDITRON_MODEL = "epfl-llm/meditron-7b"
SYSTEM_PROMPT = """You are a professional virtual doctor. Your goal is to collect detailed information about the user's Name,age,health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
Ask 1-2 follow-up questions at a time to gather more details about:
- Detailed description of symptoms
- Duration (when did it start?)
- Severity (scale of 1-10)
- Aggravating or alleviating factors
- Related symptoms
- Medical history
- Current medications and allergies
After collecting sufficient information (4-5 exchanges), summarize findings and suggest when they should seek professional care. Do NOT make specific diagnoses or recommend specific treatments.
Respond empathetically and clearly. Always be professional and thorough."""
MEDITRON_PROMPT = """<|im_start|>system
You are a specialized medical assistant focusing ONLY on suggesting over-the-counter medicines and home remedies based on patient information.
Based on the following patient information, provide ONLY:
1. One specific over-the-counter medicine with proper adult dosing instructions
2. One practical home remedy that might help
3. Clear guidance on when to seek professional medical care
Be concise, practical, and focus only on general symptom relief. Do not diagnose. Include a disclaimer that you are not a licensed medical professional.
<|im_end|>
<|im_start|>user
Patient information: {patient_info}
<|im_end|>
<|im_start|>assistant
"""
print("Loading Llama-2 model...")
tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL)
model = AutoModelForCausalLM.from_pretrained(
LLAMA_MODEL,
torch_dtype=torch.float16,
device_map="auto"
)
print("Llama-2 model loaded successfully!")
print("Loading Meditron model...")
meditron_tokenizer = AutoTokenizer.from_pretrained(MEDITRON_MODEL)
meditron_model = AutoModelForCausalLM.from_pretrained(
MEDITRON_MODEL,
torch_dtype=torch.float16,
device_map="auto"
)
print("Meditron model loaded successfully!")
# Initialize LangChain memory
memory = ConversationBufferMemory(return_messages=True)
def build_llama2_prompt(system_prompt, history, user_input):
"""Format the conversation history and user input for Llama-2 chat models."""
prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
# Add conversation history
for user_msg, assistant_msg in history:
prompt += f"{user_msg} [/INST] {assistant_msg} </s><s>[INST] "
# Add the current user input
prompt += f"{user_input} [/INST] "
return prompt
def get_meditron_suggestions(patient_info):
"""Use Meditron model to generate medicine and remedy suggestions."""
prompt = MEDITRON_PROMPT.format(patient_info=patient_info)
inputs = meditron_tokenizer(prompt, return_tensors="pt").to(meditron_model.device)
with torch.no_grad():
outputs = meditron_model.generate(
inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
do_sample=True
)
suggestion = meditron_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
return suggestion
@spaces.GPU
def generate_response(message, history):
"""Generate a response using both models."""
# Save the latest user message and last assistant response to memory
if history and len(history[-1]) == 2:
memory.save_context({"input": history[-1][0]}, {"output": history[-1][1]})
memory.save_context({"input": message}, {"output": ""})
# Build conversation history from memory
lc_history = []
user_msg = None
for msg in memory.chat_memory.messages:
if msg.type == "human":
user_msg = msg.content
elif msg.type == "ai" and user_msg is not None:
assistant_msg = msg.content
lc_history.append((user_msg, assistant_msg))
user_msg = None
# Build the prompt with LangChain memory history
prompt = build_llama2_prompt(SYSTEM_PROMPT, lc_history, message)
# Add summarization instruction after 4 turns
if len(lc_history) >= 4:
prompt = prompt.replace("[/INST] ", "[/INST] Now summarize what you've learned and suggest when professional care may be needed. ")
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate the Llama-2 response
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Decode and extract Llama-2's response
full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
llama_response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
# After 4 turns, add medicine suggestions from Meditron
if len(lc_history) >= 4:
# Collect full patient conversation
full_patient_info = "\n".join([h[0] for h in lc_history] + [message]) + "\n\nSummary: " + llama_response
# Get medicine suggestions
medicine_suggestions = get_meditron_suggestions(full_patient_info)
# Format final response
final_response = (
f"{llama_response}\n\n"
f"--- MEDICATION AND HOME CARE SUGGESTIONS ---\n\n"
f"{medicine_suggestions}"
)
return final_response
return llama_response
# Create the Gradio interface
demo = gr.ChatInterface(
fn=generate_response,
title="Medical Assistant with Medicine Suggestions",
description="Tell me about your symptoms, and after gathering enough information, I'll suggest potential remedies.",
examples=[
"I have a cough and my throat hurts",
"I've been having headaches for a week",
"My stomach has been hurting since yesterday"
],
theme="soft"
)
if __name__ == "__main__":
demo.launch() |