Spaces:

techindia2025
/

medbot_2

Runtime error

App Files Files Community

Thanush commited on May 23

Commit

031a3f5

1 Parent(s): 1bcbb86

Implement medical consultation app with LangChain memory management and model integration

Browse files

Files changed (9) hide show

app.py +2 -377
medbot/__init__.py +1 -0
medbot/config.py +2 -0
medbot/handlers.py +56 -0
medbot/interface.py +28 -0
medbot/memory.py +79 -0
medbot/model.py +46 -0
medbot/prompts.py +21 -0
medbot/utils.py +5 -0

app.py CHANGED Viewed

@@ -1,380 +1,5 @@
-import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import spaces
-from langchain.memory import ConversationBufferWindowMemory
-from langchain.schema import HumanMessage, AIMessage
-import json
-from datetime import datetime
-# Model configuration - Using correct Me-LLaMA model identifier
-ME_LLAMA_MODEL = "clinicalnlplab/me-llama-13b"
-# System prompts for different phases
-CONSULTATION_PROMPT = """You are a professional virtual doctor. Your goal is to collect detailed information about the user's health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
-Ask 1-2 follow-up questions at a time to gather more details about:
-- Detailed description of symptoms
-- Duration (when did it start?)
-- Severity (scale of 1-10)
-- Aggravating or alleviating factors
-- Related symptoms
-- Medical history
-- Current medications and allergies
-After collecting sufficient information (4-5 exchanges), summarize findings and suggest when they should seek professional care. Do NOT make specific diagnoses or recommend specific treatments.
-Respond empathetically and clearly. Always be professional and thorough."""
-MEDICINE_PROMPT = """You are a specialized medical assistant. Based on the patient information gathered, provide:
-1. One specific over-the-counter medicine with proper adult dosing instructions
-2. One practical home remedy that might help
-3. Clear guidance on when to seek professional medical care
-Be concise, practical, and focus only on general symptom relief. Do not diagnose. Include a disclaimer that you are not a licensed medical professional.
-Patient information: {patient_info}
-Previous conversation context: {memory_context}"""
-# Global variables
-me_llama_model = None
-me_llama_tokenizer = None
-conversation_turns = 0
-patient_data = []
-# LangChain Memory Configuration
-class MedicalMemoryManager:
-    def __init__(self, k=10):  # Keep last 10 conversation turns
-        self.conversation_memory = ConversationBufferWindowMemory(k=k, return_messages=True)
-        self.patient_context = {
-            "symptoms": [],
-            "medical_history": [],
-            "medications": [],
-            "allergies": [],
-            "lifestyle_factors": [],
-            "timeline": [],
-            "severity_scores": {},
-            "session_start": datetime.now().isoformat()
-        }
-    def add_interaction(self, human_input, ai_response):
-        """Add human-AI interaction to memory"""
-        self.conversation_memory.chat_memory.add_user_message(human_input)
-        self.conversation_memory.chat_memory.add_ai_message(ai_response)
-        # Extract and categorize medical information
-        self._extract_medical_info(human_input)
-    def _extract_medical_info(self, user_input):
-        """Extract medical information from user input and categorize it"""
-        user_lower = user_input.lower()
-        # Extract symptoms (simple keyword matching)
-        symptom_keywords = ["pain", "ache", "hurt", "sore", "cough", "fever", "nausea",
-                           "headache", "dizzy", "tired", "fatigue", "vomit", "swollen",
-                           "rash", "itch", "burn", "cramp", "bleed", "shortness of breath"]
-        for keyword in symptom_keywords:
-            if keyword in user_lower and keyword not in [s.lower() for s in self.patient_context["symptoms"]]:
-                self.patient_context["symptoms"].append(user_input)
-                break
-        # Extract timeline information
-        time_keywords = ["days", "weeks", "months", "hours", "yesterday", "today", "started", "began"]
-        if any(keyword in user_lower for keyword in time_keywords):
-            self.patient_context["timeline"].append(user_input)
-        # Extract severity (look for numbers 1-10)
-        import re
-        severity_match = re.search(r'\b([1-9]|10)\b.*(?:pain|severity|scale)', user_lower)
-        if severity_match:
-            self.patient_context["severity_scores"][datetime.now().isoformat()] = severity_match.group(1)
-        # Extract medications
-        med_keywords = ["taking", "medication", "medicine", "pills", "prescribed", "drug"]
-        if any(keyword in user_lower for keyword in med_keywords):
-            self.patient_context["medications"].append(user_input)
-        # Extract allergies
-        allergy_keywords = ["allergic", "allergy", "allergies", "reaction"]
-        if any(keyword in user_lower for keyword in allergy_keywords):
-            self.patient_context["allergies"].append(user_input)
-    def get_memory_context(self):
-        """Get formatted memory context for the model"""
-        messages = self.conversation_memory.chat_memory.messages
-        context = []
-        for msg in messages[-6:]:  # Last 6 messages (3 exchanges)
-            if isinstance(msg, HumanMessage):
-                context.append(f"Patient: {msg.content}")
-            elif isinstance(msg, AIMessage):
-                context.append(f"Doctor: {msg.content}")
-        return "\n".join(context)
-    def get_patient_summary(self):
-        """Get structured patient information summary"""
-        summary = {
-            "conversation_turns": len(self.conversation_memory.chat_memory.messages) // 2,
-            "session_duration": datetime.now().isoformat(),
-            "key_symptoms": self.patient_context["symptoms"][-3:],  # Last 3 symptoms mentioned
-            "timeline_info": self.patient_context["timeline"][-2:],  # Last 2 timeline mentions
-            "medications": self.patient_context["medications"],
-            "allergies": self.patient_context["allergies"],
-            "severity_scores": self.patient_context["severity_scores"]
-        }
-        return json.dumps(summary, indent=2)
-    def reset_session(self):
-        """Reset memory for new consultation"""
-        self.conversation_memory.clear()
-        self.patient_context = {
-            "symptoms": [],
-            "medical_history": [],
-            "medications": [],
-            "allergies": [],
-            "lifestyle_factors": [],
-            "timeline": [],
-            "severity_scores": {},
-            "session_start": datetime.now().isoformat()
-        }
-# Initialize memory manager
-memory_manager = MedicalMemoryManager()
-def build_me_llama_prompt(system_prompt, history, user_input):
-    """Format the conversation for Me-LLaMA chat model with memory context."""
-    # Get memory context from LangChain
-    memory_context = memory_manager.get_memory_context()
-    # Enhance system prompt with memory context
-    enhanced_system_prompt = f"{system_prompt}\n\nPrevious conversation context:\n{memory_context}"
-    # Use standard Llama-2 chat format since Me-LLaMA is based on Llama-2
-    prompt = f"<s>[INST] <<SYS>>\n{enhanced_system_prompt}\n<</SYS>>\n\n"
-    # Add only recent history to avoid token limit issues
-    recent_history = history[-3:] if len(history) > 3 else history
-    for user_msg, assistant_msg in recent_history:
-        prompt += f"{user_msg} [/INST] {assistant_msg} </s><s>[INST] "
-    # Add the current user input
-    prompt += f"{user_input} [/INST] "
-    return prompt
-@spaces.GPU
-def load_model_if_needed():
-    """Load Me-LLaMA model only when GPU is available."""
-    global me_llama_model, me_llama_tokenizer
-    if me_llama_model is None:
-        print("Loading Me-LLaMA 13B model...")
-        try:
-            me_llama_tokenizer = AutoTokenizer.from_pretrained(
-                ME_LLAMA_MODEL,
-                trust_remote_code=True
-            )
-            me_llama_model = AutoModelForCausalLM.from_pretrained(
-                ME_LLAMA_MODEL,
-                torch_dtype=torch.float16,
-                device_map="auto",
-                trust_remote_code=True
-            )
-            print("Me-LLaMA 13B model loaded successfully!")
-        except Exception as e:
-            print(f"Error loading model: {e}")
-            # Fallback to a working medical model
-            print("Falling back to Llama-2-7b-chat-hf...")
-            me_llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
-            me_llama_model = AutoModelForCausalLM.from_pretrained(
-                "meta-llama/Llama-2-7b-chat-hf",
-                torch_dtype=torch.float16,
-                device_map="auto"
-            )
-            print("Fallback model loaded successfully!")
-@spaces.GPU
-def generate_medicine_suggestions(patient_info, memory_context):
-    """Use Me-LLaMA to generate medicine and remedy suggestions with memory context."""
-    load_model_if_needed()
-    # Create a prompt with both patient info and memory context
-    prompt = f"<s>[INST] {MEDICINE_PROMPT.format(patient_info=patient_info, memory_context=memory_context)} [/INST] "
-    inputs = me_llama_tokenizer(prompt, return_tensors="pt")
-    # Move inputs to the same device as the model
-    if torch.cuda.is_available():
-        inputs = {k: v.to(me_llama_model.device) for k, v in inputs.items()}
-    with torch.no_grad():
-        outputs = me_llama_model.generate(
-            inputs["input_ids"],
-            attention_mask=inputs["attention_mask"],
-            max_new_tokens=300,
-            temperature=0.7,
-            top_p=0.9,
-            do_sample=True,
-            pad_token_id=me_llama_tokenizer.eos_token_id
-        )
-    suggestion = me_llama_tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
-    return suggestion
-@spaces.GPU
-def generate_response(message, history):
-    """Generate response using Me-LLaMA with LangChain memory management."""
-    global conversation_turns, patient_data
-    try:
-        # Load model if needed
-        load_model_if_needed()
-        # Track conversation turns
-        conversation_turns += 1
-        # Store patient data (legacy support)
-        patient_data.append(message)
-        # Phase 1-3: Information gathering with memory
-        if conversation_turns < 4:
-            # Build consultation prompt with memory context
-            prompt = build_me_llama_prompt(CONSULTATION_PROMPT, history, message)
-            inputs = me_llama_tokenizer(prompt, return_tensors="pt")
-            # Move inputs to the same device as the model
-            if torch.cuda.is_available():
-                inputs = {k: v.to(me_llama_model.device) for k, v in inputs.items()}
-            # Generate consultation response
-            with torch.no_grad():
-                outputs = me_llama_model.generate(
-                    inputs["input_ids"],
-                    attention_mask=inputs["attention_mask"],
-                    max_new_tokens=400,
-                    temperature=0.7,
-                    top_p=0.9,
-                    do_sample=True,
-                    pad_token_id=me_llama_tokenizer.eos_token_id
-                )
-            # Decode response
-            full_response = me_llama_tokenizer.decode(outputs[0], skip_special_tokens=False)
-            response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
-            # Add interaction to memory
-            memory_manager.add_interaction(message, response)
-            return response
-        # Phase 4+: Summary and medicine suggestions with full memory context
-        else:
-            # Get comprehensive patient summary from memory
-            patient_summary = memory_manager.get_patient_summary()
-            memory_context = memory_manager.get_memory_context()
-            # First, get summary from consultation with memory context
-            summary_prompt = build_me_llama_prompt(
-                CONSULTATION_PROMPT + "\n\nNow provide a comprehensive summary based on all the information gathered. Include when professional care may be needed.",
-                history,
-                message
-            )
-            inputs = me_llama_tokenizer(summary_prompt, return_tensors="pt")
-            if torch.cuda.is_available():
-                inputs = {k: v.to(me_llama_model.device) for k, v in inputs.items()}
-            # Generate summary
-            with torch.no_grad():
-                outputs = me_llama_model.generate(
-                    inputs["input_ids"],
-                    attention_mask=inputs["attention_mask"],
-                    max_new_tokens=400,
-                    temperature=0.7,
-                    top_p=0.9,
-                    do_sample=True,
-                    pad_token_id=me_llama_tokenizer.eos_token_id
-                )
-            summary_response = me_llama_tokenizer.decode(outputs[0], skip_special_tokens=False)
-            summary = summary_response.split('[/INST]')[-1].split('</s>')[0].strip()
-            # Get medicine suggestions using memory context
-            full_patient_info = f"Patient Summary: {patient_summary}\n\nDetailed Summary: {summary}"
-            medicine_suggestions = generate_medicine_suggestions(full_patient_info, memory_context)
-            # Combine both responses
-            final_response = (
-                f"**COMPREHENSIVE MEDICAL SUMMARY:**\n{summary}\n\n"
-                f"**MEDICATION AND HOME CARE SUGGESTIONS:**\n{medicine_suggestions}\n\n"
-                f"**PATIENT CONTEXT SUMMARY:**\n{patient_summary}\n\n"
-                f"**DISCLAIMER:** This is AI-generated advice for informational purposes only. Please consult a licensed healthcare provider for proper medical diagnosis and treatment."
-            )
-            # Add final interaction to memory
-            memory_manager.add_interaction(message, final_response)
-            return final_response
-    except Exception as e:
-        error_msg = f"I apologize, but I'm experiencing technical difficulties. Please try again. Error: {str(e)}"
-        # Still try to add to memory even on error
-        try:
-            memory_manager.add_interaction(message, error_msg)
-        except:
-            pass
-        return error_msg
-def reset_consultation():
-    """Reset the consultation and memory for a new patient."""
-    global conversation_turns, patient_data, memory_manager
-    conversation_turns = 0
-    patient_data = []
-    memory_manager.reset_session()
-    return "New consultation started. Please tell me about your symptoms or health concerns."
-# Create the Gradio interface with memory reset option
-with gr.Blocks(theme="soft") as demo:
-    gr.Markdown("# 🏥 Complete Medical Assistant - Me-LLaMA 13B with Memory")
-    gr.Markdown("Comprehensive medical consultation powered by Me-LLaMA 13B with LangChain memory management. One model handles both consultation and medicine suggestions with full context awareness.")
-    with gr.Row():
-        with gr.Column(scale=4):
-            chatbot = gr.Chatbot(height=500)
-            msg = gr.Textbox(
-                placeholder="Tell me about your symptoms or health concerns...",
-                label="Your Message"
-            )
-        with gr.Column(scale=1):
-            reset_btn = gr.Button("🔄 Start New Consultation", variant="secondary")
-            gr.Markdown("**Memory Features:**\n- Tracks symptoms & timeline\n- Remembers medications & allergies\n- Maintains conversation context\n- Provides comprehensive summaries")
-    # Examples
-    gr.Examples(
-        examples=[
-            "I have a persistent cough and sore throat for 3 days",
-            "I've been having severe headaches and feel dizzy",
-            "My stomach hurts and I feel nauseous after eating"
-        ],
-        inputs=msg
-    )
-    # Event handlers
-    def respond(message, chat_history):
-        bot_message = generate_response(message, chat_history)
-        chat_history.append((message, bot_message))
-        return "", chat_history
-    def reset_chat():
-        reset_msg = reset_consultation()
-        return [(None, reset_msg)], ""
-    msg.submit(respond, [msg, chatbot], [msg, chatbot])
-    reset_btn.click(reset_chat, [], [chatbot, msg])
 if __name__ == "__main__":
     demo.launch()

+from medbot.interface import build_interface
 if __name__ == "__main__":
+    demo = build_interface()
     demo.launch()

medbot/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

medbot/config.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ME_LLAMA_MODEL = "clinicalnlplab/me-llama-13b"
2	+ FALLBACK_MODEL = "meta-llama/Llama-2-7b-chat-hf"

medbot/handlers.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from .model import ModelManager
+from .memory import MedicalMemoryManager
+from .prompts import CONSULTATION_PROMPT, MEDICINE_PROMPT
+model_manager = ModelManager()
+memory_manager = MedicalMemoryManager()
+conversation_turns = 0
+def build_me_llama_prompt(system_prompt, history, user_input):
+    memory_context = memory_manager.get_memory_context()
+    enhanced_system_prompt = f"{system_prompt}\n\nPrevious conversation context:\n{memory_context}"
+    prompt = f"<s>[INST] <<SYS>>\n{enhanced_system_prompt}\n<</SYS>>\n\n"
+    recent_history = history[-3:] if len(history) > 3 else history
+    for user_msg, assistant_msg in recent_history:
+        prompt += f"{user_msg} [/INST] {assistant_msg} </s><s>[INST] "
+    prompt += f"{user_input} [/INST] "
+    return prompt
+def respond(message, chat_history):
+    global conversation_turns
+    conversation_turns += 1
+    if conversation_turns < 4:
+        prompt = build_me_llama_prompt(CONSULTATION_PROMPT, chat_history, message)
+        response = model_manager.generate(prompt)
+        memory_manager.add_interaction(message, response)
+        chat_history.append((message, response))
+        return "", chat_history
+    else:
+        patient_summary = memory_manager.get_patient_summary()
+        memory_context = memory_manager.get_memory_context()
+        summary_prompt = build_me_llama_prompt(
+            CONSULTATION_PROMPT + "\n\nNow provide a comprehensive summary based on all the information gathered. Include when professional care may be needed.",
+            chat_history,
+            message
+        )
+        summary = model_manager.generate(summary_prompt)
+        full_patient_info = f"Patient Summary: {patient_summary}\n\nDetailed Summary: {summary}"
+        med_prompt = f"<s>[INST] {MEDICINE_PROMPT.format(patient_info=full_patient_info, memory_context=memory_context)} [/INST] "
+        medicine_suggestions = model_manager.generate(med_prompt, max_new_tokens=300)
+        final_response = (
+            f"**COMPREHENSIVE MEDICAL SUMMARY:**\n{summary}\n\n"
+            f"**MEDICATION AND HOME CARE SUGGESTIONS:**\n{medicine_suggestions}\n\n"
+            f"**PATIENT CONTEXT SUMMARY:**\n{patient_summary}\n\n"
+            f"**DISCLAIMER:** This is AI-generated advice for informational purposes only. Please consult a licensed healthcare provider for proper medical diagnosis and treatment."
+        )
+        memory_manager.add_interaction(message, final_response)
+        chat_history.append((message, final_response))
+        return "", chat_history
+def reset_chat():
+    global conversation_turns
+    conversation_turns = 0
+    memory_manager.reset_session()
+    reset_msg = "New consultation started. Please tell me about your symptoms or health concerns."
+    return [(None, reset_msg)], ""

medbot/interface.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import gradio as gr
+from .handlers import respond, reset_chat
+def build_interface():
+    with gr.Blocks(theme="soft") as demo:
+        gr.Markdown("# 🏥 Complete Medical Assistant - Me-LLaMA 13B with Memory")
+        gr.Markdown("Comprehensive medical consultation powered by Me-LLaMA 13B with LangChain memory management. One model handles both consultation and medicine suggestions with full context awareness.")
+        with gr.Row():
+            with gr.Column(scale=4):
+                chatbot = gr.Chatbot(height=500)
+                msg = gr.Textbox(
+                    placeholder="Tell me about your symptoms or health concerns...",
+                    label="Your Message"
+                )
+            with gr.Column(scale=1):
+                reset_btn = gr.Button("🔄 Start New Consultation", variant="secondary")
+                gr.Markdown("**Memory Features:**\n- Tracks symptoms & timeline\n- Remembers medications & allergies\n- Maintains conversation context\n- Provides comprehensive summaries")
+        gr.Examples(
+            examples=[
+                "I have a persistent cough and sore throat for 3 days",
+                "I've been having severe headaches and feel dizzy",
+                "My stomach hurts and I feel nauseous after eating"
+            ],
+            inputs=msg
+        )
+        msg.submit(respond, [msg, chatbot], [msg, chatbot])
+        reset_btn.click(reset_chat, [], [chatbot, msg])
+    return demo

medbot/memory.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from langchain.memory import ConversationBufferWindowMemory
+from langchain.schema import HumanMessage, AIMessage
+from datetime import datetime
+import json
+import re
+class MedicalMemoryManager:
+    def __init__(self, k=10):
+        self.conversation_memory = ConversationBufferWindowMemory(k=k, return_messages=True)
+        self.patient_context = {
+            "symptoms": [],
+            "medical_history": [],
+            "medications": [],
+            "allergies": [],
+            "lifestyle_factors": [],
+            "timeline": [],
+            "severity_scores": {},
+            "session_start": datetime.now().isoformat()
+        }
+    def add_interaction(self, human_input, ai_response):
+        self.conversation_memory.chat_memory.add_user_message(human_input)
+        self.conversation_memory.chat_memory.add_ai_message(ai_response)
+        self._extract_medical_info(human_input)
+    def _extract_medical_info(self, user_input):
+        user_lower = user_input.lower()
+        symptom_keywords = ["pain", "ache", "hurt", "sore", "cough", "fever", "nausea", "headache", "dizzy", "tired", "fatigue", "vomit", "swollen", "rash", "itch", "burn", "cramp", "bleed", "shortness of breath"]
+        for keyword in symptom_keywords:
+            if keyword in user_lower and keyword not in [s.lower() for s in self.patient_context["symptoms"]]:
+                self.patient_context["symptoms"].append(user_input)
+                break
+        time_keywords = ["days", "weeks", "months", "hours", "yesterday", "today", "started", "began"]
+        if any(keyword in user_lower for keyword in time_keywords):
+            self.patient_context["timeline"].append(user_input)
+        severity_match = re.search(r'\b([1-9]|10)\b.*(?:pain|severity|scale)', user_lower)
+        if severity_match:
+            self.patient_context["severity_scores"][datetime.now().isoformat()] = severity_match.group(1)
+        med_keywords = ["taking", "medication", "medicine", "pills", "prescribed", "drug"]
+        if any(keyword in user_lower for keyword in med_keywords):
+            self.patient_context["medications"].append(user_input)
+        allergy_keywords = ["allergic", "allergy", "allergies", "reaction"]
+        if any(keyword in user_lower for keyword in allergy_keywords):
+            self.patient_context["allergies"].append(user_input)
+    def get_memory_context(self):
+        messages = self.conversation_memory.chat_memory.messages
+        context = []
+        for msg in messages[-6:]:
+            if isinstance(msg, HumanMessage):
+                context.append(f"Patient: {msg.content}")
+            elif isinstance(msg, AIMessage):
+                context.append(f"Doctor: {msg.content}")
+        return "\n".join(context)
+    def get_patient_summary(self):
+        summary = {
+            "conversation_turns": len(self.conversation_memory.chat_memory.messages) // 2,
+            "session_duration": datetime.now().isoformat(),
+            "key_symptoms": self.patient_context["symptoms"][-3:],
+            "timeline_info": self.patient_context["timeline"][-2:],
+            "medications": self.patient_context["medications"],
+            "allergies": self.patient_context["allergies"],
+            "severity_scores": self.patient_context["severity_scores"]
+        }
+        return json.dumps(summary, indent=2)
+    def reset_session(self):
+        self.conversation_memory.clear()
+        self.patient_context = {
+            "symptoms": [],
+            "medical_history": [],
+            "medications": [],
+            "allergies": [],
+            "lifestyle_factors": [],
+            "timeline": [],
+            "severity_scores": {},
+            "session_start": datetime.now().isoformat()
+        }

medbot/model.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from .config import ME_LLAMA_MODEL, FALLBACK_MODEL
+class ModelManager:
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+    def load(self):
+        if self.model is not None and self.tokenizer is not None:
+            return
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(ME_LLAMA_MODEL, trust_remote_code=True)
+            self.model = AutoModelForCausalLM.from_pretrained(
+                ME_LLAMA_MODEL,
+                torch_dtype=torch.float16,
+                device_map="auto",
+                trust_remote_code=True
+            )
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            print("Falling back to Llama-2-7b-chat-hf...")
+            self.tokenizer = AutoTokenizer.from_pretrained(FALLBACK_MODEL)
+            self.model = AutoModelForCausalLM.from_pretrained(
+                FALLBACK_MODEL,
+                torch_dtype=torch.float16,
+                device_map="auto"
+            )
+    def generate(self, prompt, max_new_tokens=400, temperature=0.7, top_p=0.9):
+        self.load()
+        inputs = self.tokenizer(prompt, return_tensors="pt")
+        if torch.cuda.is_available():
+            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = self.model.generate(
+                inputs["input_ids"],
+                attention_mask=inputs["attention_mask"],
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+        return self.tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)

medbot/prompts.py ADDED Viewed

	@@ -0,0 +1,21 @@

+CONSULTATION_PROMPT = '''You are a professional virtual doctor. Your goal is to collect detailed information about the user's health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
+Ask 1-2 follow-up questions at a time to gather more details about:
+- Detailed description of symptoms
+- Duration (when did it start?)
+- Severity (scale of 1-10)
+- Aggravating or alleviating factors
+- Related symptoms
+- Medical history
+- Current medications and allergies
+After collecting sufficient information (4-5 exchanges), summarize findings and suggest when they should seek professional care. Do NOT make specific diagnoses or recommend specific treatments.
+Respond empathetically and clearly. Always be professional and thorough.'''
+MEDICINE_PROMPT = '''You are a specialized medical assistant. Based on the patient information gathered, provide:
+1. One specific over-the-counter medicine with proper adult dosing instructions
+2. One practical home remedy that might help
+3. Clear guidance on when to seek professional medical care
+Be concise, practical, and focus only on general symptom relief. Do not diagnose. Include a disclaimer that you are not a licensed medical professional.
+Patient information: {patient_info}
+Previous conversation context: {memory_context}'''

medbot/utils.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Utility functions for medbot
+def extract_symptoms(text):
+    # Placeholder for advanced symptom extraction logic
+    return []