Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,40 @@ import gradio as gr
|
|
2 |
import spaces
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
# Define model options
|
6 |
MODELS = {
|
7 |
"TinyLlama-1.1B": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
@@ -17,18 +51,15 @@ def load_model(model_name):
|
|
17 |
if model_name not in loaded_models:
|
18 |
print(f"Loading {model_name}...")
|
19 |
model_path = MODELS[model_name]
|
20 |
-
|
21 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
22 |
model = AutoModelForCausalLM.from_pretrained(
|
23 |
model_path,
|
24 |
torch_dtype="auto",
|
25 |
device_map="auto" # Use GPU if available
|
26 |
)
|
27 |
-
|
28 |
loaded_models[model_name] = model
|
29 |
loaded_tokenizers[model_name] = tokenizer
|
30 |
print(f"{model_name} loaded successfully!")
|
31 |
-
|
32 |
return loaded_models[model_name], loaded_tokenizers[model_name]
|
33 |
|
34 |
# Pre-load the smaller model to start with
|
@@ -41,14 +72,18 @@ def generate_response(message, history, model_choice):
|
|
41 |
# Load the selected model if not already loaded
|
42 |
model, tokenizer = load_model(model_choice)
|
43 |
|
44 |
-
# Format the prompt based on the history
|
45 |
-
|
|
|
|
|
46 |
for human, assistant in history:
|
47 |
-
|
48 |
-
|
|
|
|
|
49 |
|
50 |
# Generate the response
|
51 |
-
inputs = tokenizer(
|
52 |
outputs = model.generate(
|
53 |
inputs["input_ids"],
|
54 |
max_new_tokens=512,
|
@@ -57,12 +92,13 @@ def generate_response(message, history, model_choice):
|
|
57 |
do_sample=True,
|
58 |
)
|
59 |
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
|
|
|
60 |
return response.strip()
|
61 |
|
62 |
# Create the Gradio interface
|
63 |
with gr.Blocks() as demo:
|
64 |
-
gr.Markdown("#
|
65 |
-
gr.Markdown("
|
66 |
|
67 |
with gr.Row():
|
68 |
model_dropdown = gr.Dropdown(
|
|
|
2 |
import spaces
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
|
5 |
+
# Define the medical assistant system prompt
|
6 |
+
SYSTEM_PROMPT = """
|
7 |
+
You are a knowledgeable medical assistant. Follow these steps in order:
|
8 |
+
|
9 |
+
1) INITIAL ASSESSMENT: First, warmly greet the user and ask about their primary concern.
|
10 |
+
|
11 |
+
2) ASK FOLLOW-UP QUESTIONS: For any health concern mentioned, systematically gather information by asking 1-2 specific follow-up questions at a time about:
|
12 |
+
- Detailed description of symptoms
|
13 |
+
- Duration (when did it start?)
|
14 |
+
- Severity (scale of 1-10)
|
15 |
+
- Aggravating or alleviating factors
|
16 |
+
- Related symptoms
|
17 |
+
- Medical history
|
18 |
+
- Current medications and allergies
|
19 |
+
- Family history of similar conditions
|
20 |
+
|
21 |
+
3) SUMMARIZE FINDINGS: Once you have gathered sufficient information (at least 4-5 exchanges with the user), organize what you've learned into clear categories:
|
22 |
+
- Symptoms
|
23 |
+
- Duration
|
24 |
+
- Severity
|
25 |
+
- Possible Causes
|
26 |
+
- Medications/Allergies
|
27 |
+
- Family History
|
28 |
+
|
29 |
+
4) PROVIDE RECOMMENDATIONS: Only after gathering comprehensive information, suggest:
|
30 |
+
- One specific OTC medicine with proper adult dosing
|
31 |
+
- One practical home remedy
|
32 |
+
- When they should seek professional medical care
|
33 |
+
|
34 |
+
5) END WITH DISCLAIMER: Always end with a clear medical disclaimer that you are not a licensed medical professional and your suggestions are not a substitute for professional medical advice.
|
35 |
+
|
36 |
+
IMPORTANT: Do not skip ahead to recommendations without gathering comprehensive information through multiple exchanges. Your primary goal is information gathering through thoughtful questions.
|
37 |
+
"""
|
38 |
+
|
39 |
# Define model options
|
40 |
MODELS = {
|
41 |
"TinyLlama-1.1B": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
|
|
51 |
if model_name not in loaded_models:
|
52 |
print(f"Loading {model_name}...")
|
53 |
model_path = MODELS[model_name]
|
|
|
54 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
55 |
model = AutoModelForCausalLM.from_pretrained(
|
56 |
model_path,
|
57 |
torch_dtype="auto",
|
58 |
device_map="auto" # Use GPU if available
|
59 |
)
|
|
|
60 |
loaded_models[model_name] = model
|
61 |
loaded_tokenizers[model_name] = tokenizer
|
62 |
print(f"{model_name} loaded successfully!")
|
|
|
63 |
return loaded_models[model_name], loaded_tokenizers[model_name]
|
64 |
|
65 |
# Pre-load the smaller model to start with
|
|
|
72 |
# Load the selected model if not already loaded
|
73 |
model, tokenizer = load_model(model_choice)
|
74 |
|
75 |
+
# Format the prompt based on the history and system prompt
|
76 |
+
formatted_prompt = SYSTEM_PROMPT + "\n\n"
|
77 |
+
|
78 |
+
# Add conversation history
|
79 |
for human, assistant in history:
|
80 |
+
formatted_prompt += f"User: {human}\nAssistant: {assistant}\n"
|
81 |
+
|
82 |
+
# Add the current message
|
83 |
+
formatted_prompt += f"User: {message}\nAssistant:"
|
84 |
|
85 |
# Generate the response
|
86 |
+
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
|
87 |
outputs = model.generate(
|
88 |
inputs["input_ids"],
|
89 |
max_new_tokens=512,
|
|
|
92 |
do_sample=True,
|
93 |
)
|
94 |
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
|
95 |
+
|
96 |
return response.strip()
|
97 |
|
98 |
# Create the Gradio interface
|
99 |
with gr.Blocks() as demo:
|
100 |
+
gr.Markdown("# Medical Assistant Chatbot")
|
101 |
+
gr.Markdown("This chatbot uses LLM models to provide medical information and assistance. Please note that this is not a substitute for professional medical advice.")
|
102 |
|
103 |
with gr.Row():
|
104 |
model_dropdown = gr.Dropdown(
|