File size: 9,465 Bytes
fa7e3c5
afa4eb5
 
fa7e3c5
 
8140621
afa4eb5
 
 
 
fa7e3c5
afa4eb5
 
 
8140621
f6ac5ae
8140621
fa7e3c5
8140621
 
196c072
afa4eb5
 
 
 
 
580e705
196c072
afa4eb5
196c072
b5ca495
196c072
afa4eb5
196c072
afa4eb5
196c072
afa4eb5
f6ac5ae
6d70605
f6ac5ae
 
 
 
 
 
 
 
 
6d70605
fa7e3c5
 
afa4eb5
fa7e3c5
 
afa4eb5
 
fa7e3c5
1794ce2
afa4eb5
fa7e3c5
afa4eb5
 
 
fa7e3c5
 
8140621
afa4eb5
 
fa7e3c5
f6ac5ae
 
afa4eb5
f6ac5ae
 
afa4eb5
 
 
f6ac5ae
 
8140621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196c072
fa7e3c5
196c072
 
 
 
fa7e3c5
 
afa4eb5
 
 
 
 
 
 
 
 
 
 
 
196c072
afa4eb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa7e3c5
f6ac5ae
 
afa4eb5
fa7e3c5
 
 
afa4eb5
fa7e3c5
580e705
 
 
 
196c072
fa7e3c5
 
196c072
fa7e3c5
 
196c072
fa7e3c5
196c072
 
 
 
fa7e3c5
8140621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7336213
8140621
 
 
afa4eb5
8140621
 
 
afa4eb5
 
8140621
 
 
fa7e3c5
8140621
 
 
 
 
fa7e3c5
7336213
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import torch
from threading import Thread
import time

phi4_model_path = "Intelligent-Internet/II-Medical-8B"

device = "cuda:0" if torch.cuda.is_available() else "cpu"

phi4_model = AutoModelForCausalLM.from_pretrained(phi4_model_path, device_map="auto", torch_dtype="auto")
phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)

# This is our streaming generator function that yields partial results
@spaces.GPU(duration=60)
def generate_streaming_response(user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, history):
    if not user_message.strip():
        yield history, history
        return

    model = phi4_model
    tokenizer = phi4_tokenizer
    start_tag = "<|im_start|>"
    sep_tag = "<|im_sep|>"
    end_tag = "<|im_end|>"

    system_message = """You are a medical assistant AI designed to help diagnose symptoms, explain possible conditions, and recommend next steps. You must be cautious, thorough, and explain medical reasoning step-by-step. Structure your answer in two sections: 

<think> In this section, reason through the symptoms by considering patient history, differential diagnoses, relevant physiological mechanisms, and possible investigations. Explain your thought process step-by-step. </think> 

In the Solution section, summarize your working diagnosis, differential options, and suggest what to do next (e.g., tests, referral, lifestyle changes). Always clarify that this is not a replacement for a licensed medical professional.

Use LaTeX for any formulas or values (e.g., $\\text{BMI} = \\frac{\\text{weight (kg)}}{\\text{height (m)}^2}$). 

Now, analyze the following case:"""

    # Build conversation history in the format the model expects
    prompt = f"{start_tag}system{sep_tag}{system_message}{end_tag}"
    
    # Convert chat history format from the Gradio Chatbot format to prompt format
    for user_msg, bot_msg in history:
        if user_msg:
            prompt += f"{start_tag}user{sep_tag}{user_msg}{end_tag}"
        if bot_msg:
            prompt += f"{start_tag}assistant{sep_tag}{bot_msg}{end_tag}"
    
    # Add the current user message
    prompt += f"{start_tag}user{sep_tag}{user_message}{end_tag}{start_tag}assistant{sep_tag}"

    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
    generation_kwargs = {
        "input_ids": inputs["input_ids"],
        "attention_mask": inputs["attention_mask"],
        "max_new_tokens": int(max_tokens),
        "do_sample": True,
        "temperature": float(temperature),
        "top_k": int(top_k),
        "top_p": float(top_p),
        "repetition_penalty": float(repetition_penalty),
        "streamer": streamer,
    }

    # Start generation in a separate thread
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    # Create a new history with the current user message
    new_history = history.copy() + [[user_message, ""]]
    
    # Collect the generated response
    assistant_response = ""
    for new_token in streamer:
        cleaned_token = new_token.replace("<|im_start|>", "").replace("<|im_sep|>", "").replace("<|im_end|>", "")
        assistant_response += cleaned_token
        # Update the last message in history with the current response
        new_history[-1][1] = assistant_response.strip()
        yield new_history, new_history
        # Add a small sleep to control the streaming rate
        time.sleep(0.01)
    
    # Return the final state after streaming is completed
    yield new_history, new_history

# This is our non-streaming wrapper function for buttons that don't support streaming
def process_input(user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, history):
    generator = generate_streaming_response(user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, history)
    # Get the final result by exhausting the generator
    result = None
    for result in generator:
        pass
    return result

example_messages = {
    "Headache case": "A 35-year-old female presents with a throbbing headache, nausea, and sensitivity to light. It started on one side of her head and worsens with activity. No prior trauma.",
    "Chest pain": "A 58-year-old male presents with chest tightness radiating to his left arm, shortness of breath, and sweating. Symptoms began while climbing stairs.",
    "Abdominal pain": "A 24-year-old complains of right lower quadrant abdominal pain, nausea, and mild fever. The pain started around the belly button and migrated.",
    "BMI calculation": "A patient weighs 85 kg and is 1.75 meters tall. Calculate the BMI and interpret whether it's underweight, normal, overweight, or obese."
}

css = """
.markdown-body .katex { 
    font-size: 1.2em; 
}
.markdown-body .katex-display { 
    margin: 1em 0; 
    overflow-x: auto;
    overflow-y: hidden;
}
"""

with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
    gr.Markdown("# Medical Diagnostic Assistant\nThis AI assistant helps analyze symptoms and provide preliminary diagnostic reasoning using LaTeX-rendered medical formulas where needed.")

    gr.HTML("""
    <script>
    if (typeof window.MathJax === 'undefined') {
        const script = document.createElement('script');
        script.src = 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML';
        script.async = true;
        document.head.appendChild(script);
        window.MathJax = {
            tex2jax: {
                inlineMath: [['$', '$']],
                displayMath: [['$$', '$$']],
                processEscapes: true
            },
            showProcessingMessages: false,
            messageStyle: 'none'
        };
    }
    function rerender() {
        if (window.MathJax && window.MathJax.Hub) {
            window.MathJax.Hub.Queue(['Typeset', window.MathJax.Hub]);
        }
    }
    setInterval(rerender, 1000);
    </script>
    """)

    chatbot = gr.Chatbot(label="Chat", render_markdown=True, show_copy_button=True)
    history = gr.State([])

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Settings")
            max_tokens_slider = gr.Slider(64, 32768, step=1024, value=4096, label="Max Tokens")
            with gr.Accordion("Advanced Settings", open=False):
                temperature_slider = gr.Slider(0.1, 2.0, value=0.8, label="Temperature")
                top_k_slider = gr.Slider(1, 100, step=1, value=50, label="Top-k")
                top_p_slider = gr.Slider(0.1, 1.0, value=0.95, label="Top-p")
                repetition_penalty_slider = gr.Slider(1.0, 2.0, value=1.0, label="Repetition Penalty")

        with gr.Column(scale=4):
            with gr.Row():
                user_input = gr.Textbox(label="Describe symptoms or ask a medical question", placeholder="Type your message here...", scale=3)
                submit_button = gr.Button("Send", variant="primary", scale=1)
                clear_button = gr.Button("Clear", scale=1)
            gr.Markdown("**Try these examples:**")
            with gr.Row():
                example1 = gr.Button("Headache case")
                example2 = gr.Button("Chest pain")
                example3 = gr.Button("Abdominal pain")
                example4 = gr.Button("BMI calculation")

    # Set up the streaming interface
    def on_submit(message, history, max_tokens, temperature, top_k, top_p, repetition_penalty):
        # Return the modified history that includes the new user message
        modified_history = history + [[message, ""]]
        return "", modified_history, modified_history

    def on_stream(history, max_tokens, temperature, top_k, top_p, repetition_penalty):
        if not history:
            return history
        
        # Get the last user message from history
        user_message = history[-1][0]
        
        # Start a fresh history without the last entry
        prev_history = history[:-1]
        
        # Generate streaming responses
        for new_history, _ in generate_streaming_response(
            user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, prev_history
        ):
            yield new_history

    # Connect the submission event
    submit_button.click(
        fn=on_submit,
        inputs=[user_input, history, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider],
        outputs=[user_input, chatbot, history]
    ).then(
        fn=on_stream,
        inputs=[history, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider],
        outputs=chatbot
    )

    # Handle examples
    def set_example(example_text):
        return gr.update(value=example_text)

    clear_button.click(fn=lambda: ([], []), inputs=None, outputs=[chatbot, history])
    example1.click(fn=lambda: set_example(example_messages["Headache case"]), inputs=None, outputs=user_input)
    example2.click(fn=lambda: set_example(example_messages["Chest pain"]), inputs=None, outputs=user_input)
    example3.click(fn=lambda: set_example(example_messages["Abdominal pain"]), inputs=None, outputs=user_input)
    example4.click(fn=lambda: set_example(example_messages["BMI calculation"]), inputs=None, outputs=user_input)

demo.launch(ssr_mode=False)