import io import os import re from datetime import datetime, timedelta import numpy as np import pandas as pd import requests import streamlit as st from dotenv import load_dotenv from fuzzywuzzy import fuzz # Make sure fuzzywuzzy is installed (pip install fuzzywuzzy python-levenshtein) from groq import Groq from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas # Import database functions directly here. # These functions should NOT import anything from app.py or user_management.py # If you don't have database.py, you'll need to create it with these functions. # For demonstration, I'll assume they exist. try: from database import get_user_history, register_user, user_exists, check_user_credentials, save_user_history # Assuming get_user_history, register_user, user_exists, check_user_credentials, save_user_history are in database.py except ImportError: st.error("Could not import database functions. Please ensure database.py exists and contains required functions.") # Define dummy functions to allow the app to run without a real database.py for now def get_user_history(user_id): st.warning("Database function 'get_user_history' not implemented. History will not be persistent.") return [] def register_user(user_id, full_name, dob, email, password): st.warning("Database function 'register_user' not implemented. User registration will not be persistent.") return True # Simulate success def user_exists(user_id): st.warning("Database function 'user_exists' not implemented. User existence check will not work.") return False # Simulate user not existing def check_user_credentials(user_id, password): st.warning("Database function 'check_user_credentials' not implemented. Login will not work.") return False # Simulate login failure def save_user_history(user_id, symptoms, predicted_diseases): st.warning("Database function 'save_user_history' not implemented. History saving will not be persistent.") pass # Import user management functions from user_management.py # These functions will now receive st.session_state as an argument from user_management import render_user_management_sidebar, save_history_to_db_if_logged_in # Import utility functions try: from utils import extract_keyword # Ensure utils.py has the extract_keyword function except ImportError: st.error("Could not import utility functions. Please ensure utils.py exists and contains required functions.") def extract_keyword(text, keywords): # Dummy implementation if utils.py is missing for kw in keywords: if kw.lower() in text.lower(): return kw return "Unknown" # Load environment variables from .env.local for local development. # On Hugging Face Spaces, st.secrets will be used. load_dotenv(dotenv_path=".env.local") # --- Configuration and Initial Setup (MUST BE FIRST) --- st.set_page_config(page_title="MediBot - Health Assistant", page_icon="🏥", layout="wide") # Initialize Groq client groq_client = None GROQ_AVAILABLE = False try: # Prefer st.secrets for Hugging Face Spaces deployment GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") if not GROQ_API_KEY: # Fallback to environment variable for local development if not in secrets GROQ_API_KEY = os.getenv("GROQ_API_KEY") if GROQ_API_KEY: groq_client = Groq(api_key=GROQ_API_KEY) GROQ_AVAILABLE = True else: st.error("Groq API Key not found. Groq chatbot will not be available.") except Exception as e: st.error(f"Error initializing Groq client: {e}. Groq chatbot will not be available.") # Initialize Hugging Face Inference API client details for DATEXIS/CORe-clinical-diagnosis-prediction HF_MODEL_AVAILABLE = False HF_API_TOKEN = None try: # Assuming 'med_model' is the name of your Hugging Face API key in st.secrets HF_API_TOKEN = st.secrets.get("med_model") if not HF_API_TOKEN: # Fallback to environment variable for local development HF_API_TOKEN = os.getenv("MED_MODEL") # Using MED_MODEL for consistency with environment variables if HF_API_TOKEN: HF_MODEL_AVAILABLE = True else: st.warning("Hugging Face 'med_model' API Key not found. Clinical diagnosis assessment will not be available.") except Exception as e: st.warning(f"Error retrieving Hugging Face API key: {e}. Clinical diagnosis assessment will not be available.") # Initialize session state variables if "chat_history" not in st.session_state: st.session_state.chat_history = [] if "feedback" not in st.session_state: st.session_state.feedback = [] if "show_welcome" not in st.session_state: st.session_state.show_welcome = True if "chat_input_value" not in st.session_state: # To clear the text area after submission st.session_state.chat_input_value = "" if "last_chat_response" not in st.session_state: # To persist the last chat response st.session_state.last_chat_response = "" if "feedback_input" not in st.session_state: # For clearing feedback text area st.session_state.feedback_input = "" # Ensure user_id is initialized for saving history if "user_id" not in st.session_state: st.session_state.user_id = None if "logged_in_user" not in st.session_state: # This will hold the user_id after successful login st.session_state.logged_in_user = None # --- HARDCODED DATA LOADING FROM CSVs --- @st.cache_data # Cache this function to avoid reloading data on every rerun def load_csv_data(): try: # These paths assume the CSVs are directly in the same directory as app.py dataset_df = pd.read_csv('dataset.csv').fillna('') # Fill NaN with empty string description_df = pd.read_csv('symptom_Description.csv').fillna('') precaution_df = pd.read_csv('symptom_precaution.csv').fillna('') severity_df = pd.read_csv('Symptom-severity.csv').fillna('') # Load symptom severity # Prepare data for quick lookup # Dataset mapping diseases to their symptoms disease_symptoms_map = {} for index, row in dataset_df.iterrows(): disease = row['Disease'] # Get all symptoms for this disease, filtering out empty strings and 'Disease' column itself symptoms = [s.strip().replace('_', ' ') for s in row.values[1:] if s.strip()] disease_symptoms_map[disease] = symptoms # Disease descriptions map disease_description_map = {row['Disease']: row['Description'] for index, row in description_df.iterrows()} # Disease precautions map disease_precaution_map = {row['Disease']: [p.strip() for p in row.values[1:] if p.strip()] for index, row in precaution_df.iterrows()} # Symptom severity map # Ensure symptom names are consistent (e.g., lowercase and spaces instead of underscores) symptom_severity_map = {row['Symptom'].strip().replace('_', ' ').lower(): row['weight'] for index, row in severity_df.iterrows()} # Extract all unique symptoms for the multiselect all_unique_symptoms = sorted(list(set(symptom for symptoms_list in disease_symptoms_map.values() for symptom in symptoms_list))) return disease_symptoms_map, disease_description_map, disease_precaution_map, all_unique_symptoms, symptom_severity_map except FileNotFoundError as e: st.error(f"Error: Required CSV file not found. Make sure 'dataset.csv', 'symptom_Description.csv', 'symptom_precaution.csv', and 'Symptom-severity.csv' are in the correct directory. Details: {e}") st.stop() # Stop the app if crucial files are missing except Exception as e: st.error(f"Error loading CSV data: {e}") st.stop() # Stop the app if data loading fails disease_symptoms_map, disease_description_map, disease_precaution_map, hardcoded_symptoms, symptom_severity_map = load_csv_data() # --- Custom CSS for extensive UI improvements --- st.markdown(""" """, unsafe_allow_html=True) # --- Groq API Response Function (updated system prompt) --- def get_groq_response(user_query, severity_label="Undetermined Severity", model="llama-3.3-70b-versatile"): """ Function to get a response from Groq API for health questions. Augments the system prompt with severity information if available. """ if not GROQ_AVAILABLE or groq_client is None: return "AI assistant is not available." # Augment the system prompt with severity information from clinical diagnosis model # Note: 'severity_label' comes from the symptom checker. If asking a direct question, # it might default to "Undetermined Severity" unless explicitly passed from a prior analysis. system_prompt = ( "You are an experienced physician specialized in diagnosis and clinical decision-making. " "Your primary role is to analyze presented symptoms or health queries and provide a differential diagnosis along with evidence-based recommendations for next steps. " f"Based on initial analysis, the perceived severity of the user's condition is: {severity_label}. " "Adjust your tone and recommendations accordingly, emphasizing urgency if severity is 'High Severity'.\n\n" "When a user describes symptoms or asks a health question, you should:\n\n" "1. **Prioritized Differential Diagnosis**: Provide a prioritized list of possible diagnoses based on the information given, indicating their relative likelihood or confidence (e.g., 'most likely', 'possible', 'less likely').\n" "2. **Reasoning**: Briefly explain the clinical reasoning for each diagnosis, referencing common clinical features, pathophysiology, or typical presentations.\n" "3. **Recommended Investigations**: Suggest appropriate next diagnostic tests or investigations to confirm or rule out these conditions.\n\n" "4. **Initial Management/Treatment**: Propose evidence-based initial management or general treatment options (e.g., symptomatic relief, lifestyle changes, over-the-counter suggestions).\n" "5. **Red Flags/Urgency**: Clearly highlight any red flags or warning signs that would require immediate emergency care or urgent specialist referral.\n\n" "Always maintain a professional, empathetic, and medically accurate tone. Present information clearly, using bullet points or numbered lists where appropriate. " "Crucially, always conclude your response by strongly advising the user to consult a qualified healthcare professional for a definitive diagnosis and personalized treatment plan, " "as this AI is for informational purposes only and not a substitute for professional medical advice." ) try: chat_completion = groq_client.chat.completions.create( messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_query}, ], model=model, temperature=0.7, max_tokens=800, # Increased max_tokens to allow for more detailed responses top_p=1, stop=None, stream=False, ) return chat_completion.choices[0].message.content except Exception as e: st.error(f"Groq API call failed: {e}") return "Sorry, I am unable to process that request at the moment due to an AI service error." # --- Clinical Diagnosis Model Integration (DATEXIS/CORe-clinical-diagnosis-prediction) --- def get_diagnosis_and_severity_from_model(symptoms_text): """ Function to get diagnosis predictions and infer severity from DATEXIS/CORe-clinical-diagnosis-prediction using Hugging Face Inference API. """ if not HF_MODEL_AVAILABLE or not HF_API_TOKEN: st.warning("Hugging Face API key not available. Cannot assess severity.") return "Severity Assessment Unavailable (API Key Missing)", [] API_URL = "https://api-inference.huggingface.co/models/DATEXIS/CORe-clinical-diagnosis-prediction" headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} payload = {"inputs": symptoms_text} try: response = requests.post(API_URL, headers=headers, json=payload) response.raise_for_status() result = response.json() raw_predicted_diagnoses = [] threshold = 0.5 if result and isinstance(result, list) and len(result) > 0 and isinstance(result[0], list): for prediction_set in result: for item in prediction_set: if item['score'] >= threshold: raw_predicted_diagnoses.append(item['label']) filtered_diagnoses = [] # EXPANDED AND REFINED GENERIC FILTER KEYWORDS generic_filter_keywords = [ 'unspecified', 'acute', 'chronic', 'use', 'status', 'other', 'not elsewhere classified', 'no diagnosis', 'history of', 'finding', 'problem', 'syndrome', 'disease', 'disorder', 'condition', 'code', 'category', 'episode', 'complication', 'sequelae', 'factor', 'manifestation', 'procedure', 'examination', 'observation', 'symptoms', 'sign', 'unconfirmed', 'type', 'group', 'normal', 'unknown', 'level', 'positive', 'negative', 'patient', 'value', 'test', 'result', 'diagnosis', 'kidney', 'stage', 'without', 'essential', 'with', 'due to', 'related to', 'of', 'organ', 'function', 'system', 'body', 'region', 'clinical', 'consideration', 'presence', 'absence', 'mild', 'moderate', 'severe', 'manifesting', 'affecting', 'affect', 'area', 'part', 'general', 'specific', 'diagnosis of', 'history of', 'finding of', 'problem of', 'type of', 'group of', 'unlikely', 'possible', 'likely', 'primary', 'secondary', 'and', 'or', 'by', 'for', 'in', 'on', 'symptom', 'sign', 'pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage', # Additional common non-diagnostic terms from ICD or general medical language 'i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', # Roman numerals 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', # Single letters 'longterm', 'shortterm', 'controlled', 'uncontrolled', 'recurrent', 'intermittent', 'persistent', 'follow-up', 'observation', 'screening', 'encounter', 'admission', 'discharge', 'acute on chronic', 'other specified', 'not otherwise specified' ] for diagnosis_label in raw_predicted_diagnoses: lower_diag = diagnosis_label.lower().strip() is_generic = False # Rule 1: Check against expanded generic filter keywords for generic_kw in generic_filter_keywords: # Use regex for whole word matching to avoid partial matches (e.g., 'use' matching 'house') if re.fullmatch(r'\b' + re.escape(generic_kw) + r'\b', lower_diag): is_generic = True break if is_generic: continue # Skip to next diagnosis if it's a generic keyword # Rule 2: Filter out very short numerical or alphanumeric strings (e.g., 'ii', 'a1') if len(lower_diag) <= 2 and (lower_diag.replace('.', '').isdigit() or lower_diag.isalnum()): is_generic = True if is_generic: continue # Rule 3: Filter out terms that are purely numerical codes (e.g., '250.00', 'E11.9') # This is a heuristic, as some numbers might be part of a valid diagnosis if re.fullmatch(r'[\d\.]+', lower_diag) and len(lower_diag) < 7: # e.g., "250.00" or "E11.9" is_generic = True if is_generic: continue # Rule 4: Filter out terms that are single words and are very common, non-specific symptoms # (already covered by generic_filter_keywords, but an explicit check for robustness) if len(lower_diag.split()) == 1 and lower_diag in ['pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage']: is_generic = True if is_generic: continue # If none of the above rules flagged it as generic, add to filtered list filtered_diagnoses.append(diagnosis_label) filtered_diagnoses = list(dict.fromkeys(filtered_diagnoses)) # Remove duplicates if len(filtered_diagnoses) > 5: filtered_diagnoses = filtered_diagnoses[:5] # Limit to top 5 if not filtered_diagnoses: return "Overall Symptom Severity (AI Assessment): Undetermined Severity", [] severity_map_keywords = { "High Severity": ['heart attack', 'stroke', 'failure', 'hemorrhage', 'cancer', 'acute respiratory', 'sepsis', 'cardiac arrest', 'severe', 'malignant', 'emergency', 'rupture', 'infarction', 'coma', 'shock', 'decompensation', 'crisis', 'perforation', 'ischemia', 'embolism', 'aneurysm', 'critical'], "Moderate Severity": ['hypertension', 'diabetes', 'pneumonia', 'infection', 'chronic', 'inflammation', 'moderate', 'insufficiency', 'fracture', 'ulcer', 'hepatitis', 'renal', 'vascular', 'disease', 'disorder', 'syndrome', 'acute', 'bronchitis', 'appendicitis', 'gallstones', 'pancreatitis', 'lupus', 'arthritis'], "Low Severity": ['headache', 'pain', 'mild', 'allergy', 'fever', 'cough', 'common cold', 'dermatitis', 'arthritis', 'influenza', 'viral', 'sprain', 'strain', 'gastritis', 'sore throat', 'conjunctivitis', 'sinusitis', 'bruise', 'rash', 'minor'] } overall_severity = "Low Severity" for diagnosis_label in filtered_diagnoses: diag_lower = diagnosis_label.lower() if any(keyword in diag_lower for keyword in severity_map_keywords["High Severity"]): return "Overall Symptom Severity (AI Assessment): High Severity", filtered_diagnoses if any(keyword in diag_lower for keyword in severity_map_keywords["Moderate Severity"]): if overall_severity == "Low Severity": overall_severity = "Moderate Severity" # Keep looping to see if a High Severity diagnosis is found later in the filtered list # Don't return here if a Moderate is found, continue to check for High return f"Overall Symptom Severity (AI Assessment): {overall_severity}", filtered_diagnoses except requests.exceptions.RequestException as e: st.error(f"Network or API Error during clinical diagnosis model call: {e}. Check API key and internet connection.") return "Severity Assessment Unavailable (Network Error)", [] except Exception as e: st.error(f"Error processing clinical diagnosis model response: {e}. Unexpected data format or mapping issue.") return "Severity Assessment Unavailable (Processing Error)", [] # --- Functions for PDF generation (retained and adjusted for DB history) --- def generate_pdf_report(history_data): buffer = io.BytesIO() c = canvas.Canvas(buffer, pagesize=letter) width, height = letter c.setFont("Helvetica-Bold", 16) c.drawString(50, height - 50, "MediBot Health Report") c.setFont("Helvetica", 10) c.drawString(50, height - 70, f"Report Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") y = height - 100 for entry in history_data: # history_data comes as list of tuples: (history_id, symptoms, predicted_diseases, query_timestamp) timestamp, symptoms_text, predicted_diseases = entry[3], entry[1], entry[2] if y < 100: c.showPage() c.setFont("Helvetica-Bold", 16) c.drawString(50, height - 50, "MediBot Health Report (Continued)") c.setFont("Helvetica", 10) y = height - 100 c.setFont("Helvetica-Bold", 10) c.drawString(50, y, f"Timestamp: {timestamp.strftime('%Y-%m-%d %H:%M:%S')}") y -= 15 c.setFont("Helvetica", 10) symptoms_line = f"Symptoms/Question: {symptoms_text}" insight_line = f"Insight/Response: {predicted_diseases}" textobject = c.beginText(50, y) textobject.setFont("Helvetica", 10) # Helper to wrap text within PDF def draw_wrapped_text(text_obj, text, max_width): nonlocal y words = text.split(' ') current_line_words = [] for word_idx, word in enumerate(words): temp_line = ' '.join(current_line_words + [word]) if c.stringWidth(temp_line, "Helvetica", 10) < max_width: current_line_words.append(word) else: text_obj.textLine(' '.join(current_line_words)) y -= 12 # Line height current_line_words = [word] # Handle very long words that exceed max_width if c.stringWidth(word, "Helvetica", 10) >= max_width: # If a single word is too long, break it chunk_size = 50 # Arbitrary chunk size for breaking long words for i in range(0, len(word), chunk_size): text_obj.textLine(word[i:i + chunk_size]) y -= 12 current_line_words = [] # Reset after a long word is broken continue if current_line_words: # Draw any remaining words text_obj.textLine(' '.join(current_line_words)) y -= 12 # Line height draw_wrapped_text(textobject, symptoms_line, width - 100) draw_wrapped_text(textobject, insight_line, width - 100) c.drawText(textobject) y -= 20 c.save() buffer.seek(0) return buffer # --- Symptom Checker Logic (Now using hardcoded data and severity) --- def get_disease_info_from_csv(selected_symptoms: list, disease_symptoms_map, disease_description_map, disease_precaution_map, symptom_severity_map) -> list[tuple[str, str, list[str]]]: """ Finds potential diseases, descriptions, and precautions based on selected symptoms using the hardcoded CSV data, incorporating symptom severity. Returns a list of tuples: (disease_name, description, [precautions]) """ matching_diseases_with_scores = {} # Stores disease -> weighted_score # Normalize selected symptoms for consistent matching normalized_selected_symptoms = [s.strip().lower() for s in selected_symptoms] for disease, symptoms_list in disease_symptoms_map.items(): # Convert the symptoms_list from map to lowercase for comparison normalized_disease_symptoms = [s.lower() for s in symptoms_list] weighted_score = 0 for selected_symptom in normalized_selected_symptoms: if selected_symptom in normalized_disease_symptoms: # Add severity weight if available, otherwise a default of 1 # The .get() method is safe if a symptom is in dataset.csv but not in Symptom-severity.csv weight = symptom_severity_map.get(selected_symptom, 1) weighted_score += weight if weighted_score > 0: matching_diseases_with_scores[disease] = weighted_score # Sort diseases by the weighted score (highest first) sorted_matching_diseases = sorted(matching_diseases_with_scores.items(), key=lambda item: item[1], reverse=True) results = [] # Limit to top 5 most matching diseases (or fewer if less than 5 matches) for disease_name, _ in sorted_matching_diseases[:5]: description = disease_description_map.get(disease_name, "No description available.") precautions = disease_precaution_map.get(disease_name, ["No precautions available."]) results.append((disease_name, description, precautions)) return results # --- Function to parse insight text for analytics --- def parse_insight_text_for_conditions(insight_text: str) -> list[str]: """ Parses the combined insight text from history to extract condition names. Expected format: "Severity. Dataset Conditions: Cond1, Cond2; AI Suggestions: AICond1, AIC2" or just "Severity. No specific insights found." """ conditions = [] # Regex to find "Dataset Conditions: ..." and "AI Suggestions: ..." parts dataset_match = re.search(r"Dataset Conditions:\s*([^;]+)", insight_text) ai_match = re.search(r"AI Suggestions:\s*(.+)", insight_text) if dataset_match: dataset_str = dataset_match.group(1).strip() if dataset_str and dataset_str.lower() != "no specific insights found": conditions.extend([c.strip() for c in dataset_str.split(',') if c.strip()]) if ai_match: ai_str = ai_match.group(1).strip() if ai_str and ai_str.lower() != "no specific insights found" and ai_str.lower() != "ai assistant is not available.": conditions.extend([c.strip() for c in ai_str.split(',') if c.strip()]) # Remove duplicates and return return list(set(conditions)) # --- Main Application Logic --- # Ensure feedback_input is initialized if not already (redundant if done at start, but harmless) if "feedback_input" not in st.session_state: st.session_state.feedback_input = "" if st.session_state.show_welcome: # A more visually appealing welcome page st.markdown("
Explore possible causes and precautions for your symptoms and get answers to health-related questions using advanced AI. Your health journey, simplified and supported.
Important Disclaimer: This app provides preliminary health information based on symptoms and AI analysis. It is not a substitute for professional medical advice, diagnosis, or treatment. Always consult a qualified healthcare provider for a definitive diagnosis and personalized treatment plan, as this AI is for informational purposes only and not a substitute for professional medical advice.
" "Disclaimer: This app provides preliminary health information based on symptoms and AI analysis. " "It is not a substitute for professional medical advice, diagnosis, or treatment. Always consult a qualified healthcare provider." "
" "