# File: logic/nlp_report.py import pandas as pd from huggingface_hub import hf_hub_download from llama_cpp import Llama import logging MODEL_REPO = "bartowski/deepcogito_cogito-v1-preview-llama-8B-GGUF" MODEL_FILENAME = "deepcogito_cogito-v1-preview-llama-8B-Q8_0.gguf" model_path = hf_hub_download( repo_id=MODEL_REPO, filename=MODEL_FILENAME, cache_dir="models" ) llm = Llama( model_path=model_path, n_ctx=32048, chat_format="chatml", verbose=False ) def normalize_bool(value): if isinstance(value, str): value = value.strip().lower() return value in ["yes", "true", "1"] return bool(value) def generate_patient_summary(patient_row, tone="executive", override_prompt=None): try: gaps = patient_row.get('care_gaps', []) gain = patient_row.get('projected_gain', 0.0) score = patient_row.get('risk_score', 0.0) social_flags = [] if normalize_bool(patient_row.get("Lives_Alone")): social_flags.append("lives alone") if normalize_bool(patient_row.get("Housing_Instability")): social_flags.append("experiencing housing instability") if normalize_bool(patient_row.get("Transportation_Access")): social_flags.append("has limited transportation access") if normalize_bool(patient_row.get("Food_Insecurity")): social_flags.append("faces food insecurity") if not normalize_bool(patient_row.get("Primary_Care_Established")): social_flags.append("does not have a primary care provider") if not normalize_bool(patient_row.get("FollowUp_Scheduled")): social_flags.append("has no follow-up scheduled") social_context = "; ".join(social_flags) if social_flags else "no major social risk factors identified" base_prompt = override_prompt or f""" You are a healthcare analyst creating summaries for value-based care programs. Tone: {tone} Patient ID: {patient_row.get('patient_id')} Risk Score: {score} Projected Revenue Gain: ${gain} Care Gaps Identified: {', '.join(gaps)} Social Risk Factors: {social_context} Write a concise and insightful clinical summary, including key gaps and social considerations. Offer targeted recommendations. """ response = llm.create_chat_completion( messages=[{"role": "user", "content": base_prompt}] ) if "choices" in response and response["choices"]: return response["choices"][0]["message"]["content"] else: return "[Error] No summary returned." except Exception as e: logging.error(f"LLM error: {e}") return "[Error generating summary]" # Additional: Quality validation helper for UI feedback def summarize_data_quality(df): issues = [] required_cols = ['patient_id', 'age', 'gender', 'hcc_codes'] for col in required_cols: if col not in df.columns: issues.append(f"Missing column: {col}") elif df[col].isnull().any(): issues.append(f"Null values in column: {col}") percent_missing = df.isnull().mean().round(2) * 100 high_missing = percent_missing[percent_missing > 30].to_dict() if high_missing: for col, pct in high_missing.items(): issues.append(f"Over 30% missing in: {col} ({pct:.0f}%)") summary_df = pd.DataFrame({"Issues": issues}) return summary_df