MedicallAssistant

Running on Zero

App Files Files Community

VisoLearn commited on 18 days ago

Commit

8140621

verified ·

1 Parent(s): f6ac5ae

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -19

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import torch
 from threading import Thread
 phi4_model_path = "Intelligent-Internet/II-Medical-8B"
@@ -11,10 +12,12 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
 phi4_model = AutoModelForCausalLM.from_pretrained(phi4_model_path, device_map="auto", torch_dtype="auto")
 phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
 @spaces.GPU(duration=60)
-def generate_response(user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, history):
     if not user_message.strip():
-        return history, history
     model = phi4_model
     tokenizer = phi4_tokenizer
@@ -60,6 +63,7 @@ Now, analyze the following case:"""
         "streamer": streamer,
     }
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
@@ -73,10 +77,21 @@ Now, analyze the following case:"""
         assistant_response += cleaned_token
         # Update the last message in history with the current response
         new_history[-1][1] = assistant_response.strip()
-    # Return the updated history
-    return new_history, new_history
 example_messages = {
     "Headache case": "A 35-year-old female presents with a throbbing headache, nausea, and sensitivity to light. It started on one side of her head and worsens with activity. No prior trauma.",
@@ -150,23 +165,47 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
                 example3 = gr.Button("Abdominal pain")
                 example4 = gr.Button("BMI calculation")
-    # Use click instead of stream
     submit_button.click(
-        fn=generate_response,
-        inputs=[user_input, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider,
-                repetition_penalty_slider, history],
-        outputs=[chatbot, history]
     ).then(
-        fn=lambda: gr.update(value=""),
-        inputs=None,
-        outputs=user_input
     )
-    clear_button.click(fn=lambda: ([], []), inputs=None, outputs=[chatbot, history])
-    example1.click(lambda: gr.update(value=example_messages["Headache case"]), None, user_input)
-    example2.click(lambda: gr.update(value=example_messages["Chest pain"]), None, user_input)
-    example3.click(lambda: gr.update(value=example_messages["Abdominal pain"]), None, user_input)
-    example4.click(lambda: gr.update(value=example_messages["BMI calculation"]), None, user_input)
 demo.launch(ssr_mode=False)

 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import torch
 from threading import Thread
+import time
 phi4_model_path = "Intelligent-Internet/II-Medical-8B"
 phi4_model = AutoModelForCausalLM.from_pretrained(phi4_model_path, device_map="auto", torch_dtype="auto")
 phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
+# This is our streaming generator function that yields partial results
 @spaces.GPU(duration=60)
+def generate_streaming_response(user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, history):
     if not user_message.strip():
+        yield history, history
+        return
     model = phi4_model
     tokenizer = phi4_tokenizer
         "streamer": streamer,
     }
+    # Start generation in a separate thread
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
         assistant_response += cleaned_token
         # Update the last message in history with the current response
         new_history[-1][1] = assistant_response.strip()
+        yield new_history, new_history
+        # Add a small sleep to control the streaming rate
+        time.sleep(0.01)
+    # Return the final state after streaming is completed
+    yield new_history, new_history
+# This is our non-streaming wrapper function for buttons that don't support streaming
+def process_input(user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, history):
+    generator = generate_streaming_response(user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, history)
+    # Get the final result by exhausting the generator
+    result = None
+    for result in generator:
+        pass
+    return result
 example_messages = {
     "Headache case": "A 35-year-old female presents with a throbbing headache, nausea, and sensitivity to light. It started on one side of her head and worsens with activity. No prior trauma.",
                 example3 = gr.Button("Abdominal pain")
                 example4 = gr.Button("BMI calculation")
+    # Set up the streaming interface
+    def on_submit(message, history, max_tokens, temperature, top_k, top_p, repetition_penalty):
+        # Return the modified history that includes the new user message
+        modified_history = history + [[message, ""]]
+        return "", modified_history, modified_history
+    def on_stream(history, max_tokens, temperature, top_k, top_p, repetition_penalty):
+        if not history:
+            return history
+        # Get the last user message from history
+        user_message = history[-1][0]
+        # Start a fresh history without the last entry
+        prev_history = history[:-1]
+        # Generate streaming responses
+        for new_history, _ in generate_streaming_response(
+            user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, prev_history
+        ):
+            yield new_history
+    # Connect the submission event
     submit_button.click(
+        fn=on_submit,
+        inputs=[user_input, history, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider],
+        outputs=[user_input, chatbot, history]
     ).then(
+        fn=on_stream,
+        inputs=[history, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider],
+        outputs=chatbot
     )
+    # Handle examples
+    def set_example(example_text):
+        return gr.update(value=example_text)
+    clear_button.click(fn=lambda: ([], []), inputs=None, outputs=[chatbot, history])
+    example1.click(fn=lambda: set_example(example_messages["Headache case"]), inputs=None, outputs=user_input)
+    example2.click(fn=lambda: set_example(example_messages["Chest pain"]), inputs=None, outputs=user_input)
+    example3.click(fn=lambda: set_example(example_messages["Abdominal pain"]), inputs=None, outputs=user_input)
+    example4.click(fn=lambda: set_example(example_messages["BMI calculation"]), inputs=None, outputs=user_input)
 demo.launch(ssr_mode=False)