Spaces:
Sleeping
Sleeping
# File: logic/nlp_report.py | |
import pandas as pd | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
import logging | |
MODEL_REPO = "bartowski/deepcogito_cogito-v1-preview-llama-8B-GGUF" | |
MODEL_FILENAME = "deepcogito_cogito-v1-preview-llama-8B-Q8_0.gguf" | |
model_path = hf_hub_download( | |
repo_id=MODEL_REPO, | |
filename=MODEL_FILENAME, | |
cache_dir="models" | |
) | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=32048, | |
chat_format="chatml", | |
verbose=False | |
) | |
def normalize_bool(value): | |
if isinstance(value, str): | |
value = value.strip().lower() | |
return value in ["yes", "true", "1"] | |
return bool(value) | |
def generate_patient_summary(patient_row, tone="executive", override_prompt=None): | |
try: | |
gaps = patient_row.get('care_gaps', []) | |
gain = patient_row.get('projected_gain', 0.0) | |
score = patient_row.get('risk_score', 0.0) | |
social_flags = [] | |
if normalize_bool(patient_row.get("Lives_Alone")): | |
social_flags.append("lives alone") | |
if normalize_bool(patient_row.get("Housing_Instability")): | |
social_flags.append("experiencing housing instability") | |
if normalize_bool(patient_row.get("Transportation_Access")): | |
social_flags.append("has limited transportation access") | |
if normalize_bool(patient_row.get("Food_Insecurity")): | |
social_flags.append("faces food insecurity") | |
if not normalize_bool(patient_row.get("Primary_Care_Established")): | |
social_flags.append("does not have a primary care provider") | |
if not normalize_bool(patient_row.get("FollowUp_Scheduled")): | |
social_flags.append("has no follow-up scheduled") | |
social_context = "; ".join(social_flags) if social_flags else "no major social risk factors identified" | |
base_prompt = override_prompt or f""" | |
You are a healthcare analyst creating summaries for value-based care programs. | |
Tone: {tone} | |
Patient ID: {patient_row.get('patient_id')} | |
Risk Score: {score} | |
Projected Revenue Gain: ${gain} | |
Care Gaps Identified: {', '.join(gaps)} | |
Social Risk Factors: {social_context} | |
Write a concise and insightful clinical summary, including key gaps and social considerations. Offer targeted recommendations. | |
""" | |
response = llm.create_chat_completion( | |
messages=[{"role": "user", "content": base_prompt}] | |
) | |
if "choices" in response and response["choices"]: | |
return response["choices"][0]["message"]["content"] | |
else: | |
return "[Error] No summary returned." | |
except Exception as e: | |
logging.error(f"LLM error: {e}") | |
return "[Error generating summary]" | |
# Additional: Quality validation helper for UI feedback | |
def summarize_data_quality(df): | |
issues = [] | |
required_cols = ['patient_id', 'age', 'gender', 'hcc_codes'] | |
for col in required_cols: | |
if col not in df.columns: | |
issues.append(f"Missing column: {col}") | |
elif df[col].isnull().any(): | |
issues.append(f"Null values in column: {col}") | |
percent_missing = df.isnull().mean().round(2) * 100 | |
high_missing = percent_missing[percent_missing > 30].to_dict() | |
if high_missing: | |
for col, pct in high_missing.items(): | |
issues.append(f"Over 30% missing in: {col} ({pct:.0f}%)") | |
summary_df = pd.DataFrame({"Issues": issues}) | |
return summary_df | |