import io import os import re from datetime import datetime, timedelta import numpy as np import pandas as pd import requests import streamlit as st from dotenv import load_dotenv from fuzzywuzzy import fuzz # Make sure fuzzywuzzy is installed (pip install fuzzywuzzy python-levenshtein) from groq import Groq from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas # Import database functions directly here. # These functions should NOT import anything from app.py or user_management.py # If you don't have database.py, you'll need to create it with these functions. # For demonstration, I'll assume they exist. try: from database import get_user_history, register_user, user_exists, check_user_credentials, save_user_history # Assuming get_user_history, register_user, user_exists, check_user_credentials, save_user_history are in database.py except ImportError: st.error("Could not import database functions. Please ensure database.py exists and contains required functions.") # Define dummy functions to allow the app to run without a real database.py for now def get_user_history(user_id): st.warning("Database function 'get_user_history' not implemented. History will not be persistent.") return [] def register_user(user_id, full_name, dob, email, password): st.warning("Database function 'register_user' not implemented. User registration will not be persistent.") return True # Simulate success def user_exists(user_id): st.warning("Database function 'user_exists' not implemented. User existence check will not work.") return False # Simulate user not existing def check_user_credentials(user_id, password): st.warning("Database function 'check_user_credentials' not implemented. Login will not work.") return False # Simulate login failure def save_user_history(user_id, symptoms, predicted_diseases): st.warning("Database function 'save_user_history' not implemented. History saving will not be persistent.") pass # Import user management functions from user_management.py # These functions will now receive st.session_state as an argument from user_management import render_user_management_sidebar, save_history_to_db_if_logged_in # Import utility functions try: from utils import extract_keyword # Ensure utils.py has the extract_keyword function except ImportError: st.error("Could not import utility functions. Please ensure utils.py exists and contains required functions.") def extract_keyword(text, keywords): # Dummy implementation if utils.py is missing for kw in keywords: if kw.lower() in text.lower(): return kw return "Unknown" # Load environment variables from .env.local for local development. # On Hugging Face Spaces, st.secrets will be used. load_dotenv(dotenv_path=".env.local") # --- Configuration and Initial Setup (MUST BE FIRST) --- st.set_page_config(page_title="MediBot - Health Assistant", page_icon="🏥", layout="wide") # Initialize Groq client groq_client = None GROQ_AVAILABLE = False try: # Prefer st.secrets for Hugging Face Spaces deployment GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") if not GROQ_API_KEY: # Fallback to environment variable for local development if not in secrets GROQ_API_KEY = os.getenv("GROQ_API_KEY") if GROQ_API_KEY: groq_client = Groq(api_key=GROQ_API_KEY) GROQ_AVAILABLE = True else: st.error("Groq API Key not found. Groq chatbot will not be available.") except Exception as e: st.error(f"Error initializing Groq client: {e}. Groq chatbot will not be available.") # Initialize Hugging Face Inference API client details for DATEXIS/CORe-clinical-diagnosis-prediction HF_MODEL_AVAILABLE = False HF_API_TOKEN = None try: # Assuming 'med_model' is the name of your Hugging Face API key in st.secrets HF_API_TOKEN = st.secrets.get("med_model") if not HF_API_TOKEN: # Fallback to environment variable for local development HF_API_TOKEN = os.getenv("MED_MODEL") # Using MED_MODEL for consistency with environment variables if HF_API_TOKEN: HF_MODEL_AVAILABLE = True else: st.warning("Hugging Face 'med_model' API Key not found. Clinical diagnosis assessment will not be available.") except Exception as e: st.warning(f"Error retrieving Hugging Face API key: {e}. Clinical diagnosis assessment will not be available.") # Initialize session state variables if "chat_history" not in st.session_state: st.session_state.chat_history = [] if "feedback" not in st.session_state: st.session_state.feedback = [] if "show_welcome" not in st.session_state: st.session_state.show_welcome = True if "chat_input_value" not in st.session_state: # To clear the text area after submission st.session_state.chat_input_value = "" if "last_chat_response" not in st.session_state: # To persist the last chat response st.session_state.last_chat_response = "" if "feedback_input" not in st.session_state: # For clearing feedback text area st.session_state.feedback_input = "" # Ensure user_id is initialized for saving history if "user_id" not in st.session_state: st.session_state.user_id = None if "logged_in_user" not in st.session_state: # This will hold the user_id after successful login st.session_state.logged_in_user = None # --- HARDCODED DATA LOADING FROM CSVs --- @st.cache_data # Cache this function to avoid reloading data on every rerun def load_csv_data(): try: # These paths assume the CSVs are directly in the same directory as app.py dataset_df = pd.read_csv('dataset.csv').fillna('') # Fill NaN with empty string description_df = pd.read_csv('symptom_Description.csv').fillna('') precaution_df = pd.read_csv('symptom_precaution.csv').fillna('') severity_df = pd.read_csv('Symptom-severity.csv').fillna('') # Load symptom severity # Prepare data for quick lookup # Dataset mapping diseases to their symptoms disease_symptoms_map = {} for index, row in dataset_df.iterrows(): disease = row['Disease'] # Get all symptoms for this disease, filtering out empty strings and 'Disease' column itself symptoms = [s.strip().replace('_', ' ') for s in row.values[1:] if s.strip()] disease_symptoms_map[disease] = symptoms # Disease descriptions map disease_description_map = {row['Disease']: row['Description'] for index, row in description_df.iterrows()} # Disease precautions map disease_precaution_map = {row['Disease']: [p.strip() for p in row.values[1:] if p.strip()] for index, row in precaution_df.iterrows()} # Symptom severity map # Ensure symptom names are consistent (e.g., lowercase and spaces instead of underscores) symptom_severity_map = {row['Symptom'].strip().replace('_', ' ').lower(): row['weight'] for index, row in severity_df.iterrows()} # Extract all unique symptoms for the multiselect all_unique_symptoms = sorted(list(set(symptom for symptoms_list in disease_symptoms_map.values() for symptom in symptoms_list))) return disease_symptoms_map, disease_description_map, disease_precaution_map, all_unique_symptoms, symptom_severity_map except FileNotFoundError as e: st.error(f"Error: Required CSV file not found. Make sure 'dataset.csv', 'symptom_Description.csv', 'symptom_precaution.csv', and 'Symptom-severity.csv' are in the correct directory. Details: {e}") st.stop() # Stop the app if crucial files are missing except Exception as e: st.error(f"Error loading CSV data: {e}") st.stop() # Stop the app if data loading fails disease_symptoms_map, disease_description_map, disease_precaution_map, hardcoded_symptoms, symptom_severity_map = load_csv_data() # --- Custom CSS for extensive UI improvements --- st.markdown(""" """, unsafe_allow_html=True) # --- Groq API Response Function (updated system prompt) --- def get_groq_response(user_query, severity_label="Undetermined Severity", model="llama-3.3-70b-versatile"): """ Function to get a response from Groq API for health questions. Augments the system prompt with severity information if available. """ if not GROQ_AVAILABLE or groq_client is None: return "AI assistant is not available." # Augment the system prompt with severity information from clinical diagnosis model # Note: 'severity_label' comes from the symptom checker. If asking a direct question, # it might default to "Undetermined Severity" unless explicitly passed from a prior analysis. system_prompt = ( "You are an experienced physician specialized in diagnosis and clinical decision-making. " "Your primary role is to analyze presented symptoms or health queries and provide a differential diagnosis along with evidence-based recommendations for next steps. " f"Based on initial analysis, the perceived severity of the user's condition is: {severity_label}. " "Adjust your tone and recommendations accordingly, emphasizing urgency if severity is 'High Severity'.\n\n" "When a user describes symptoms or asks a health question, you should:\n\n" "1. **Prioritized Differential Diagnosis**: Provide a prioritized list of possible diagnoses based on the information given, indicating their relative likelihood or confidence (e.g., 'most likely', 'possible', 'less likely').\n" "2. **Reasoning**: Briefly explain the clinical reasoning for each diagnosis, referencing common clinical features, pathophysiology, or typical presentations.\n" "3. **Recommended Investigations**: Suggest appropriate next diagnostic tests or investigations to confirm or rule out these conditions.\n\n" "4. **Initial Management/Treatment**: Propose evidence-based initial management or general treatment options (e.g., symptomatic relief, lifestyle changes, over-the-counter suggestions).\n" "5. **Red Flags/Urgency**: Clearly highlight any red flags or warning signs that would require immediate emergency care or urgent specialist referral.\n\n" "Always maintain a professional, empathetic, and medically accurate tone. Present information clearly, using bullet points or numbered lists where appropriate. " "Crucially, always conclude your response by strongly advising the user to consult a qualified healthcare professional for a definitive diagnosis and personalized treatment plan, " "as this AI is for informational purposes only and not a substitute for professional medical advice." ) try: chat_completion = groq_client.chat.completions.create( messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_query}, ], model=model, temperature=0.7, max_tokens=800, # Increased max_tokens to allow for more detailed responses top_p=1, stop=None, stream=False, ) return chat_completion.choices[0].message.content except Exception as e: st.error(f"Groq API call failed: {e}") return "Sorry, I am unable to process that request at the moment due to an AI service error." # --- Clinical Diagnosis Model Integration (DATEXIS/CORe-clinical-diagnosis-prediction) --- def get_diagnosis_and_severity_from_model(symptoms_text): """ Function to get diagnosis predictions and infer severity from DATEXIS/CORe-clinical-diagnosis-prediction using Hugging Face Inference API. """ if not HF_MODEL_AVAILABLE or not HF_API_TOKEN: st.warning("Hugging Face API key not available. Cannot assess severity.") return "Severity Assessment Unavailable (API Key Missing)", [] API_URL = "https://api-inference.huggingface.co/models/DATEXIS/CORe-clinical-diagnosis-prediction" headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} payload = {"inputs": symptoms_text} try: response = requests.post(API_URL, headers=headers, json=payload) response.raise_for_status() result = response.json() raw_predicted_diagnoses = [] threshold = 0.5 if result and isinstance(result, list) and len(result) > 0 and isinstance(result[0], list): for prediction_set in result: for item in prediction_set: if item['score'] >= threshold: raw_predicted_diagnoses.append(item['label']) filtered_diagnoses = [] # EXPANDED AND REFINED GENERIC FILTER KEYWORDS generic_filter_keywords = [ 'unspecified', 'acute', 'chronic', 'use', 'status', 'other', 'not elsewhere classified', 'no diagnosis', 'history of', 'finding', 'problem', 'syndrome', 'disease', 'disorder', 'condition', 'code', 'category', 'episode', 'complication', 'sequelae', 'factor', 'manifestation', 'procedure', 'examination', 'observation', 'symptoms', 'sign', 'unconfirmed', 'type', 'group', 'normal', 'unknown', 'level', 'positive', 'negative', 'patient', 'value', 'test', 'result', 'diagnosis', 'kidney', 'stage', 'without', 'essential', 'with', 'due to', 'related to', 'of', 'organ', 'function', 'system', 'body', 'region', 'clinical', 'consideration', 'presence', 'absence', 'mild', 'moderate', 'severe', 'manifesting', 'affecting', 'affect', 'area', 'part', 'general', 'specific', 'diagnosis of', 'history of', 'finding of', 'problem of', 'type of', 'group of', 'unlikely', 'possible', 'likely', 'primary', 'secondary', 'and', 'or', 'by', 'for', 'in', 'on', 'symptom', 'sign', 'pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage', # Additional common non-diagnostic terms from ICD or general medical language 'i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', # Roman numerals 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', # Single letters 'longterm', 'shortterm', 'controlled', 'uncontrolled', 'recurrent', 'intermittent', 'persistent', 'follow-up', 'observation', 'screening', 'encounter', 'admission', 'discharge', 'acute on chronic', 'other specified', 'not otherwise specified' ] for diagnosis_label in raw_predicted_diagnoses: lower_diag = diagnosis_label.lower().strip() is_generic = False # Rule 1: Check against expanded generic filter keywords for generic_kw in generic_filter_keywords: # Use regex for whole word matching to avoid partial matches (e.g., 'use' matching 'house') if re.fullmatch(r'\b' + re.escape(generic_kw) + r'\b', lower_diag): is_generic = True break if is_generic: continue # Skip to next diagnosis if it's a generic keyword # Rule 2: Filter out very short numerical or alphanumeric strings (e.g., 'ii', 'a1') if len(lower_diag) <= 2 and (lower_diag.replace('.', '').isdigit() or lower_diag.isalnum()): is_generic = True if is_generic: continue # Rule 3: Filter out terms that are purely numerical codes (e.g., '250.00', 'E11.9') # This is a heuristic, as some numbers might be part of a valid diagnosis if re.fullmatch(r'[\d\.]+', lower_diag) and len(lower_diag) < 7: # e.g., "250.00" or "E11.9" is_generic = True if is_generic: continue # Rule 4: Filter out terms that are single words and are very common, non-specific symptoms # (already covered by generic_filter_keywords, but an explicit check for robustness) if len(lower_diag.split()) == 1 and lower_diag in ['pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage']: is_generic = True if is_generic: continue # If none of the above rules flagged it as generic, add to filtered list filtered_diagnoses.append(diagnosis_label) filtered_diagnoses = list(dict.fromkeys(filtered_diagnoses)) # Remove duplicates if len(filtered_diagnoses) > 5: filtered_diagnoses = filtered_diagnoses[:5] # Limit to top 5 if not filtered_diagnoses: return "Overall Symptom Severity (AI Assessment): Undetermined Severity", [] severity_map_keywords = { "High Severity": ['heart attack', 'stroke', 'failure', 'hemorrhage', 'cancer', 'acute respiratory', 'sepsis', 'cardiac arrest', 'severe', 'malignant', 'emergency', 'rupture', 'infarction', 'coma', 'shock', 'decompensation', 'crisis', 'perforation', 'ischemia', 'embolism', 'aneurysm', 'critical'], "Moderate Severity": ['hypertension', 'diabetes', 'pneumonia', 'infection', 'chronic', 'inflammation', 'moderate', 'insufficiency', 'fracture', 'ulcer', 'hepatitis', 'renal', 'vascular', 'disease', 'disorder', 'syndrome', 'acute', 'bronchitis', 'appendicitis', 'gallstones', 'pancreatitis', 'lupus', 'arthritis'], "Low Severity": ['headache', 'pain', 'mild', 'allergy', 'fever', 'cough', 'common cold', 'dermatitis', 'arthritis', 'influenza', 'viral', 'sprain', 'strain', 'gastritis', 'sore throat', 'conjunctivitis', 'sinusitis', 'bruise', 'rash', 'minor'] } overall_severity = "Low Severity" for diagnosis_label in filtered_diagnoses: diag_lower = diagnosis_label.lower() if any(keyword in diag_lower for keyword in severity_map_keywords["High Severity"]): return "Overall Symptom Severity (AI Assessment): High Severity", filtered_diagnoses if any(keyword in diag_lower for keyword in severity_map_keywords["Moderate Severity"]): if overall_severity == "Low Severity": overall_severity = "Moderate Severity" # Keep looping to see if a High Severity diagnosis is found later in the filtered list # Don't return here if a Moderate is found, continue to check for High return f"Overall Symptom Severity (AI Assessment): {overall_severity}", filtered_diagnoses except requests.exceptions.RequestException as e: st.error(f"Network or API Error during clinical diagnosis model call: {e}. Check API key and internet connection.") return "Severity Assessment Unavailable (Network Error)", [] except Exception as e: st.error(f"Error processing clinical diagnosis model response: {e}. Unexpected data format or mapping issue.") return "Severity Assessment Unavailable (Processing Error)", [] # --- Functions for PDF generation (retained and adjusted for DB history) --- def generate_pdf_report(history_data): buffer = io.BytesIO() c = canvas.Canvas(buffer, pagesize=letter) width, height = letter c.setFont("Helvetica-Bold", 16) c.drawString(50, height - 50, "MediBot Health Report") c.setFont("Helvetica", 10) c.drawString(50, height - 70, f"Report Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") y = height - 100 for entry in history_data: # history_data comes as list of tuples: (history_id, symptoms, predicted_diseases, query_timestamp) timestamp, symptoms_text, predicted_diseases = entry[3], entry[1], entry[2] if y < 100: c.showPage() c.setFont("Helvetica-Bold", 16) c.drawString(50, height - 50, "MediBot Health Report (Continued)") c.setFont("Helvetica", 10) y = height - 100 c.setFont("Helvetica-Bold", 10) c.drawString(50, y, f"Timestamp: {timestamp.strftime('%Y-%m-%d %H:%M:%S')}") y -= 15 c.setFont("Helvetica", 10) symptoms_line = f"Symptoms/Question: {symptoms_text}" insight_line = f"Insight/Response: {predicted_diseases}" textobject = c.beginText(50, y) textobject.setFont("Helvetica", 10) # Helper to wrap text within PDF def draw_wrapped_text(text_obj, text, max_width): nonlocal y words = text.split(' ') current_line_words = [] for word_idx, word in enumerate(words): temp_line = ' '.join(current_line_words + [word]) if c.stringWidth(temp_line, "Helvetica", 10) < max_width: current_line_words.append(word) else: text_obj.textLine(' '.join(current_line_words)) y -= 12 # Line height current_line_words = [word] # Handle very long words that exceed max_width if c.stringWidth(word, "Helvetica", 10) >= max_width: # If a single word is too long, break it chunk_size = 50 # Arbitrary chunk size for breaking long words for i in range(0, len(word), chunk_size): text_obj.textLine(word[i:i + chunk_size]) y -= 12 current_line_words = [] # Reset after a long word is broken continue if current_line_words: # Draw any remaining words text_obj.textLine(' '.join(current_line_words)) y -= 12 # Line height draw_wrapped_text(textobject, symptoms_line, width - 100) draw_wrapped_text(textobject, insight_line, width - 100) c.drawText(textobject) y -= 20 c.save() buffer.seek(0) return buffer # --- Symptom Checker Logic (Now using hardcoded data and severity) --- def get_disease_info_from_csv(selected_symptoms: list, disease_symptoms_map, disease_description_map, disease_precaution_map, symptom_severity_map) -> list[tuple[str, str, list[str]]]: """ Finds potential diseases, descriptions, and precautions based on selected symptoms using the hardcoded CSV data, incorporating symptom severity. Returns a list of tuples: (disease_name, description, [precautions]) """ matching_diseases_with_scores = {} # Stores disease -> weighted_score # Normalize selected symptoms for consistent matching normalized_selected_symptoms = [s.strip().lower() for s in selected_symptoms] for disease, symptoms_list in disease_symptoms_map.items(): # Convert the symptoms_list from map to lowercase for comparison normalized_disease_symptoms = [s.lower() for s in symptoms_list] weighted_score = 0 for selected_symptom in normalized_selected_symptoms: if selected_symptom in normalized_disease_symptoms: # Add severity weight if available, otherwise a default of 1 # The .get() method is safe if a symptom is in dataset.csv but not in Symptom-severity.csv weight = symptom_severity_map.get(selected_symptom, 1) weighted_score += weight if weighted_score > 0: matching_diseases_with_scores[disease] = weighted_score # Sort diseases by the weighted score (highest first) sorted_matching_diseases = sorted(matching_diseases_with_scores.items(), key=lambda item: item[1], reverse=True) results = [] # Limit to top 5 most matching diseases (or fewer if less than 5 matches) for disease_name, _ in sorted_matching_diseases[:5]: description = disease_description_map.get(disease_name, "No description available.") precautions = disease_precaution_map.get(disease_name, ["No precautions available."]) results.append((disease_name, description, precautions)) return results # --- Function to parse insight text for analytics --- def parse_insight_text_for_conditions(insight_text: str) -> list[str]: """ Parses the combined insight text from history to extract condition names. Expected format: "Severity. Dataset Conditions: Cond1, Cond2; AI Suggestions: AICond1, AIC2" or just "Severity. No specific insights found." """ conditions = [] # Regex to find "Dataset Conditions: ..." and "AI Suggestions: ..." parts dataset_match = re.search(r"Dataset Conditions:\s*([^;]+)", insight_text) ai_match = re.search(r"AI Suggestions:\s*(.+)", insight_text) if dataset_match: dataset_str = dataset_match.group(1).strip() if dataset_str and dataset_str.lower() != "no specific insights found": conditions.extend([c.strip() for c in dataset_str.split(',') if c.strip()]) if ai_match: ai_str = ai_match.group(1).strip() if ai_str and ai_str.lower() != "no specific insights found" and ai_str.lower() != "ai assistant is not available.": conditions.extend([c.strip() for c in ai_str.split(',') if c.strip()]) # Remove duplicates and return return list(set(conditions)) # --- Main Application Logic --- # Ensure feedback_input is initialized if not already (redundant if done at start, but harmless) if "feedback_input" not in st.session_state: st.session_state.feedback_input = "" if st.session_state.show_welcome: # A more visually appealing welcome page st.markdown("

MediBot - Your Personal Health Assistant 🏥

", unsafe_allow_html=True) st.markdown("""

Explore possible causes and precautions for your symptoms and get answers to health-related questions using advanced AI. Your health journey, simplified and supported.

""", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) if st.button("Get Started", key="welcome_button"): st.session_state.show_welcome = False st.rerun() # Force rerun to ensure app reloads to main page st.markdown("
", unsafe_allow_html=True) st.markdown("""

Important Disclaimer: This app provides preliminary health information based on symptoms and AI analysis. It is not a substitute for professional medical advice, diagnosis, or treatment. Always consult a qualified healthcare provider for a definitive diagnosis and personalized treatment plan, as this AI is for informational purposes only and not a substitute for professional medical advice.

""", unsafe_allow_html=True) else: # Displaying disclaimer at the top of the main app view st.markdown("
" "

" "Disclaimer: This app provides preliminary health information based on symptoms and AI analysis. " "It is not a substitute for professional medical advice, diagnosis, or treatment. Always consult a qualified healthcare provider." "

" "
", unsafe_allow_html=True) # Sidebar for User Management with st.sidebar: st.header("User Management") # Call the refactored user management function, passing st.session_state render_user_management_sidebar(st.session_state) st.markdown("---") st.header("Navigation") # Add a button to clear chat history if st.button("Clear Chat History", help="Clears all messages from the current session."): st.session_state.chat_history = [] st.session_state.last_chat_response = "" st.session_state.chat_input_value = "" st.success("Chat history cleared!") # PDF Download Button - available if user is logged in and has history if st.session_state.get("user_id"): user_history = get_user_history(st.session_state["user_id"]) # Directly call get_user_history if user_history: pdf_buffer = generate_pdf_report(user_history) st.download_button( label="Download Health Report (PDF)", data=pdf_buffer, file_name="medibot_health_report.pdf", mime="application/pdf", help="Download your chat and symptom checker history as a PDF report." ) else: st.info("No history to download yet. Interact with MediBot to generate a report.") else: st.info("Log in to download your health report.") st.markdown("

MediBot - Your Health Assistant 🩺

", unsafe_allow_html=True) # Top-level tabs: Home, History, Feedback, About, Insights # Re-ordered tabs slightly to put Insights closer to History main_tab_home, main_tab_history, main_tab_insights, main_tab_feedback, main_tab_about = st.tabs(["Home", "History", "Insights", "Feedback", "About"]) with main_tab_home: st.markdown("

How can I help you today?

", unsafe_allow_html=True) # Nested tabs for Symptom Checker and Chat with MediBot tab_symptom_checker, tab_chatbot = st.tabs(["Symptom Checker", "Chat with MediBot"]) with tab_symptom_checker: st.markdown("

Select from common symptoms to get insights:

", unsafe_allow_html=True) selected_symptoms = st.multiselect( "Select your symptoms:", options=hardcoded_symptoms, # Use symptoms derived from CSV default=[], key="symptom_select", help="Choose one or more symptoms you are experiencing." ) if st.button("Get Health Insight", key="diagnose_button"): if not selected_symptoms: st.error("Please select at least one symptom.") else: with st.spinner("Analyzing symptoms..."): # Get overall severity and predicted diagnoses from the Hugging Face model combined_symptoms_text = ", ".join(selected_symptoms) severity_assessment_text, predicted_diagnoses_from_model = get_diagnosis_and_severity_from_model(combined_symptoms_text) # Get relevant disease info from your CSV datasets diseases_from_csv = get_disease_info_from_csv(selected_symptoms, disease_symptoms_map, disease_description_map, disease_precaution_map, symptom_severity_map) st.markdown(f"**{severity_assessment_text}**", unsafe_allow_html=True) st.markdown("

Possible Conditions and Insights:

", unsafe_allow_html=True) # Unified Heading # --- Process and display CSV-based diseases first --- if diseases_from_csv: st.markdown("

From our Medical Knowledge Base:

", unsafe_allow_html=True) for disease_name, description, precautions_list in diseases_from_csv: with st.expander(disease_name): st.write(f"**Description**: {description}") st.write("**Precautions**:") if precautions_list: for precaution in precautions_list: if precaution: # Only display non-empty precautions st.write(f"- {precaution}") else: st.write("- No specific precautions listed.") st.markdown("---") # Separator for each disease in expander else: st.info("No common conditions found for these symptoms in our dataset. Please try adding more specific symptoms or switch to 'Chat with MediBot' for a general query.") # --- Integrate relevant AI Model diagnoses not covered by CSV --- additional_ai_diagnoses = [] # Create a set of normalized disease names from CSV for quick lookup and fuzzy matching csv_disease_names_normalized = {d[0].lower() for d in diseases_from_csv} for model_diag in predicted_diagnoses_from_model: model_diag_normalized = model_diag.lower() is_covered_by_csv = False # Check for strong fuzzy match with CSV diseases for csv_diag_name in csv_disease_names_normalized: # Use token_set_ratio for better matching of multi-word terms, less sensitive to word order if fuzz.token_set_ratio(model_diag_normalized, csv_diag_name) > 85: # High threshold for a strong match is_covered_by_csv = True break if not is_covered_by_csv: additional_ai_diagnoses.append(model_diag) if additional_ai_diagnoses: st.markdown("

Additional AI-Suggested Clinical Considerations:

", unsafe_allow_html=True) st.write("The AI model suggests the following clinical terms based on your symptoms:") for diag in additional_ai_diagnoses: st.write(f"- **{diag}**: This is a medical term that the AI found relevant. Please consult a healthcare professional for more details.") st.info("These are general medical terms and require professional interpretation. They may not have specific descriptions or precautions available in our dataset.") elif not diseases_from_csv: # Only if no CSV diseases AND no unique AI diagnoses st.info("The AI model could not confidently predict specific or unique diagnoses from the provided symptoms. Try different symptoms or consult a doctor.") st.write("---") # Final Separator # --- Save to history (adjusting content to reflect unified response) --- history_content_parts = [] if diseases_from_csv: history_content_parts.append("Dataset Conditions: " + ", ".join([d[0] for d in diseases_from_csv])) if additional_ai_diagnoses: history_content_parts.append("AI Suggestions: " + ", ".join(additional_ai_diagnoses)) # Combine all insight for history predicted_diseases_str_for_history = f"{severity_assessment_text.replace('Overall Symptom Severity (AI Assessment): ', '')}. " + "; ".join(history_content_parts) if not history_content_parts: predicted_diseases_str_for_history = f"{severity_assessment_text.replace('Overall Symptom Severity (AI Assessment): ', '')}. No specific insights found." if st.session_state.get("user_id"): # Call the refactored save history function save_history_to_db_if_logged_in(st.session_state.user_id, ", ".join(selected_symptoms), predicted_diseases_str_for_history) st.success("Analysis saved to your history.") else: st.info("Log in to save this analysis to your history.") with tab_chatbot: # Ask a Health Question (Chatbot) st.markdown("

Describe your symptoms or ask a general health question:

", unsafe_allow_html=True) # Use st.form without clear_on_submit=True to keep text in the area with st.form("chat_form"): # Removed clear_on_submit=True user_question = st.text_area( "Input your issue:", value=st.session_state.chat_input_value, # Use session state for value key="chat_input_widget", # Unique widget key placeholder="e.g., I am having a severe fever and body aches. Or, what are the causes of high blood pressure?", height=120 # Slightly taller text area ) chat_submit_button = st.form_submit_button("Ask MediBot") if chat_submit_button: if user_question: # For the chatbot, if it's a direct question, severity is undetermined # unless passed from a preceding symptom check. # For simplicity, if not from symptom checker, pass "Undetermined Severity". severity_for_groq_prompt = "Undetermined Severity" with st.spinner("MediBot is thinking..."): groq_answer = get_groq_response(user_question, severity_label=severity_for_groq_prompt) st.markdown("**MediBot's Answer:**") st.write(groq_answer) # Save Q&A to chat history (if desired, not necessarily DB) st.session_state.chat_history.append({"user": user_question, "bot": groq_answer}) # Save to database history if logged in if st.session_state.get("user_id"): # Prepend "Question: " to the input for history tracking save_history_to_db_if_logged_in(st.session_state.user_id, f"Question: {user_question}", groq_answer) st.success("Your question and answer have been saved to history.") else: st.info("Log in to save this Q&A to your history.") else: st.warning("Please type your question to get an answer.") with main_tab_history: st.header("Your Health History") user_id = st.session_state.get("user_id") if user_id: st.info("This section shows your saved symptom analyses and health questions.") history_data = get_user_history(user_id) # Fetch history from database if history_data: st.subheader("Past Interactions:") # Display history in reverse chronological order using expanders for entry in reversed(history_data): timestamp, symptoms_text, insight_text = entry[3], entry[1], entry[2] # Determine summary title based on the nature of the input summary_title_prefix = "" expander_icon = "" # Initialize icon here if symptoms_text.startswith("Question: "): keyword = extract_keyword(symptoms_text, hardcoded_symptoms) summary_title_prefix = f"Question: {keyword}" if keyword != "Unknown" else "General Question" expander_icon = "💬" else: # Symptom checker entry (🩺) summary_conditions = parse_insight_text_for_conditions(insight_text) if summary_conditions: summary_title_prefix = ", ".join(summary_conditions[:3]) if len(summary_conditions) > 3: summary_title_prefix += "..." else: # MODIFIED LOGIC: If no conditions identified, try to use input symptoms input_symptoms_match = re.search(r"Symptoms/Question: (.*)", symptoms_text) # Adjusted regex for "Symptoms/Question:" if input_symptoms_match: extracted_input_symptoms = input_symptoms_match.group(1).strip() # Clean up potential extra spaces/commas, take first few if too many clean_symptoms = [s.strip() for s in extracted_input_symptoms.split(',') if s.strip()] if clean_symptoms: summary_title_prefix = ", ".join(clean_symptoms[:3]) if len(clean_symptoms) > 3: summary_title_prefix += "..." else: summary_title_prefix = "No conditions identified" # Fallback if no symptoms extracted else: summary_title_prefix = "No conditions identified" # Fallback if input format is unexpected expander_icon = "🩺" # MODIFIED LINE: Use Markdown heading (e.g., ###) for larger font and bolding expander_label = f"### **{timestamp.strftime('%Y-%m-%d %H:%M')}** - {expander_icon} *{summary_title_prefix}*" with st.expander(expander_label): st.write(f"**Your Input:** {symptoms_text}") st.write(f"**MediBot's Insight:** {insight_text}") # PDF Download button pdf_buffer = generate_pdf_report(history_data) st.download_button( label="Download History as PDF", data=pdf_buffer, file_name="MediBot_Health_Report.pdf", mime="application/pdf" ) else: st.info("No history found for your account. Start interacting with MediBot!") else: st.warning("Please log in to view your health history.") with main_tab_insights: st.header("Your Health Insights") user_id = st.session_state.get("user_id") if user_id: st.info("This section provides insights into the conditions identified in your past interactions.") history_data = get_user_history(user_id) if history_data: # Filter data for the last 30 days thirty_days_ago = datetime.now() - timedelta(days=30) recent_history = [ entry for entry in history_data if entry[3] and entry[3] > thirty_days_ago # entry[3] is query_timestamp ] if recent_history: all_conditions = [] for entry in recent_history: # entry[2] is predicted_diseases string conditions_from_entry = parse_insight_text_for_conditions(entry[2]) all_conditions.extend(conditions_from_entry) if all_conditions: # Count occurrences of each condition condition_counts = pd.Series(all_conditions).value_counts().reset_index() condition_counts.columns = ['Condition', 'Count'] st.subheader("Most Frequent Conditions in Last 30 Days:") # Display as a bar chart st.bar_chart(condition_counts, x='Condition', y='Count', use_container_width=True) st.write("This chart shows how many times each condition (from both dataset and AI suggestions) appeared in your analyses over the past 30 days.") else: st.info("No specific conditions were identified in your recent history (last 30 days).") else: st.info("You have no health interactions in the last 30 days to generate insights.") else: st.info("No history found for your account. Start interacting with MediBot to see insights!") else: st.warning("Please log in to view your health insights.") with main_tab_feedback: st.header("Share Your Feedback") st.write("We appreciate your feedback to improve MediBot.") with st.form("feedback_form", clear_on_submit=True): feedback_text = st.text_area("Your feedback:", height=150, key="feedback_text_area") rating = st.slider("Rate your experience (1-5 stars):", 1, 5, 3, key="feedback_rating") feedback_submit_button = st.form_submit_button("Submit Feedback") if feedback_submit_button: if feedback_text: # In a real app, you would save this to a database st.session_state.feedback.append({"text": feedback_text, "rating": rating, "timestamp": datetime.now()}) st.success("Thank you for your feedback! It has been submitted.") st.session_state.feedback_input = "" # Clear after submission else: st.warning("Please enter your feedback before submitting.") with main_tab_about: st.header("About MediBot") st.write(""" MediBot is a health assistant designed to provide general information based on your symptoms and answer health-related questions. It utilizes various AI models and medical datasets to offer insights. **Technology Stack:** - **Streamlit**: For the interactive web application interface. - **Groq API**: Powers the conversational AI for health questions using high-performance language models (e.g., Llama-3), enabling rapid and relevant responses. - **Hugging Face Inference API**: Integrates with specialized medical text classification models, specifically `DATEXIS/CORe-clinical-diagnosis-prediction`, to predict possible clinical diagnoses (often mapping to ICD-9 codes) from symptom inputs. - **Pandas**: For efficient data handling and processing of local CSV medical datasets (symptoms, descriptions, precautions). - **ReportLab**: For generating downloadable PDF reports of your personalized health history. - **Database**: PostgreSQL is used for efficient handling of user's data, interaction history and diagnosis record keeping. **Datasets Used:** - Symptom-to-Disease mapping, Descriptions, and Precautions sourced from: "Disease Symptom Prediction" dataset available on Kaggle. [https://www.kaggle.com/datasets/itachi9604/disease-symptom-description-dataset](https://www.kaggle.com/datasets/itachi9604/disease-symptom-description-dataset) *Credit to Itachi9604 for compiling this valuable dataset.* **Important Disclaimer**: This application is for informational purposes only and should not be used as a substitute for professional medical advice, diagnosis, or treatment. Always consult with a qualified healthcare provider for any health concerns or before making any decisions related to your health. """) st.markdown("[Learn more about Streamlit](https://streamlit.io/)") st.markdown("[Learn more about Groq](https://groq.com/)") st.markdown("[Learn more about Hugging Face](https://huggingface.co/)") st.markdown("---") st.markdown("For professional medical advice, always consult a qualified healthcare provider.")