import gradio as gr import spaces from transformers import AutoModelForCausalLM, AutoTokenizer # Define the medical assistant system prompt SYSTEM_PROMPT = """ You are a knowledgeable medical assistant. Follow these steps in order: 1) INITIAL ASSESSMENT: First, warmly greet the user and ask about their primary concern. 2) ASK FOLLOW-UP QUESTIONS: For any health concern mentioned, systematically gather information by asking 1-2 specific follow-up questions at a time about: - Detailed description of symptoms - Duration (when did it start?) - Severity (scale of 1-10) - Aggravating or alleviating factors - Related symptoms - Medical history - Current medications and allergies - Family history of similar conditions 3) SUMMARIZE FINDINGS: Once you have gathered sufficient information (at least 4-5 exchanges with the user), organize what you've learned into clear categories: - Symptoms - Duration - Severity - Possible Causes - Medications/Allergies - Family History 4) PROVIDE RECOMMENDATIONS: Only after gathering comprehensive information, suggest: - One specific OTC medicine with proper adult dosing - One practical home remedy - When they should seek professional medical care 5) END WITH DISCLAIMER: Always end with a clear medical disclaimer that you are not a licensed medical professional and your suggestions are not a substitute for professional medical advice. IMPORTANT: Do not skip ahead to recommendations without gathering comprehensive information through multiple exchanges. Your primary goal is information gathering through thoughtful questions. """ # Define model options MODELS = { "TinyLlama-1.1B": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "Llama-2-7b": "meta-llama/Llama-2-7b-chat-hf" } # Global variables to store loaded models and tokenizers loaded_models = {} loaded_tokenizers = {} def load_model(model_name): """Load model and tokenizer if not already loaded""" if model_name not in loaded_models: print(f"Loading {model_name}...") model_path = MODELS[model_name] tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype="auto", device_map="auto" # Use GPU if available ) loaded_models[model_name] = model loaded_tokenizers[model_name] = tokenizer print(f"{model_name} loaded successfully!") return loaded_models[model_name], loaded_tokenizers[model_name] # Pre-load the smaller model to start with print("Pre-loading TinyLlama model...") load_model("TinyLlama-1.1B") @spaces.GPU # Required by ZeroGPU! def generate_response(message, history, model_choice): """Generate a response from the selected model""" # Load the selected model if not already loaded model, tokenizer = load_model(model_choice) # Format the prompt based on the history and system prompt formatted_prompt = SYSTEM_PROMPT + "\n\n" # Add conversation history for human, assistant in history: formatted_prompt += f"User: {human}\nAssistant: {assistant}\n" # Add the current message formatted_prompt += f"User: {message}\nAssistant:" # Generate the response inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) outputs = model.generate( inputs["input_ids"], max_new_tokens=512, temperature=0.7, top_p=0.9, do_sample=True, ) response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) return response.strip() # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown("# Medical Assistant Chatbot") gr.Markdown("This chatbot uses LLM models to provide medical information and assistance. Please note that this is not a substitute for professional medical advice.") with gr.Row(): model_dropdown = gr.Dropdown( choices=list(MODELS.keys()), value="TinyLlama-1.1B", label="Select Model" ) chatbot = gr.ChatInterface( fn=lambda message, history, model_choice: generate_response(message, history, model_choice), additional_inputs=[model_dropdown], ) if __name__ == "__main__": demo.launch()