File size: 3,400 Bytes
d685a3e
 
dbff29a
d685a3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a09e831
d685a3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ba6365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File: logic/nlp_report.py

import pandas as pd
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import logging

MODEL_REPO = "bartowski/deepcogito_cogito-v1-preview-llama-8B-GGUF"
MODEL_FILENAME = "deepcogito_cogito-v1-preview-llama-8B-Q8_0.gguf"

model_path = hf_hub_download(
    repo_id=MODEL_REPO,
    filename=MODEL_FILENAME,
    cache_dir="models"
)

llm = Llama(
    model_path=model_path,
    n_ctx=32048,
    chat_format="chatml",
    verbose=False
)

def normalize_bool(value):
    if isinstance(value, str):
        value = value.strip().lower()
        return value in ["yes", "true", "1"]
    return bool(value)

def generate_patient_summary(patient_row, tone="executive", override_prompt=None):
    try:
        gaps = patient_row.get('care_gaps', [])
        gain = patient_row.get('projected_gain', 0.0)
        score = patient_row.get('risk_score', 0.0)

        social_flags = []
        if normalize_bool(patient_row.get("Lives_Alone")):
            social_flags.append("lives alone")
        if normalize_bool(patient_row.get("Housing_Instability")):
            social_flags.append("experiencing housing instability")
        if normalize_bool(patient_row.get("Transportation_Access")):
            social_flags.append("has limited transportation access")
        if normalize_bool(patient_row.get("Food_Insecurity")):
            social_flags.append("faces food insecurity")
        if not normalize_bool(patient_row.get("Primary_Care_Established")):
            social_flags.append("does not have a primary care provider")
        if not normalize_bool(patient_row.get("FollowUp_Scheduled")):
            social_flags.append("has no follow-up scheduled")

        social_context = "; ".join(social_flags) if social_flags else "no major social risk factors identified"

        base_prompt = override_prompt or f"""
You are a healthcare analyst creating summaries for value-based care programs.
Tone: {tone}

Patient ID: {patient_row.get('patient_id')}
Risk Score: {score}
Projected Revenue Gain: ${gain}
Care Gaps Identified: {', '.join(gaps)}
Social Risk Factors: {social_context}

Write a concise and insightful clinical summary, including key gaps and social considerations. Offer targeted recommendations.
"""

        response = llm.create_chat_completion(
            messages=[{"role": "user", "content": base_prompt}]
        )

        if "choices" in response and response["choices"]:
            return response["choices"][0]["message"]["content"]
        else:
            return "[Error] No summary returned."
    except Exception as e:
        logging.error(f"LLM error: {e}")
        return "[Error generating summary]"

# Additional: Quality validation helper for UI feedback

def summarize_data_quality(df):
    issues = []
    required_cols = ['patient_id', 'age', 'gender', 'hcc_codes']
    for col in required_cols:
        if col not in df.columns:
            issues.append(f"Missing column: {col}")
        elif df[col].isnull().any():
            issues.append(f"Null values in column: {col}")

    percent_missing = df.isnull().mean().round(2) * 100
    high_missing = percent_missing[percent_missing > 30].to_dict()
    if high_missing:
        for col, pct in high_missing.items():
            issues.append(f"Over 30% missing in: {col} ({pct:.0f}%)")

    summary_df = pd.DataFrame({"Issues": issues})
    return summary_df