Spaces:

Multiple123
/

Meige

Sleeping

App Files Files Community

Multiple123 commited on Aug 14

Commit

f2b3a5b

verified ·

1 Parent(s): 7aa125b

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -81

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py (robust, server-safe)
 import gradio as gr
 import pandas as pd
 import joblib
@@ -11,21 +11,37 @@ import warnings
 warnings.filterwarnings("ignore")
 # ====== 模型与背景数据 ======
-MODEL_PATH = "models/SVM_pipeline.pkl"
 BG_PATH    = "data/bg.csv"
-# 模型最终需要的10个特征（顺序必须与训练一致）
-feature_names = ["HGB", "HDL_C", "DBIL", "AST_ALT", "UA", "GFR", "PNI", "HALP", "AAPR", "conuts"]
 # 加载模型与背景
 pipeline = joblib.load(MODEL_PATH)
 bg_df = pd.read_csv(BG_PATH)
-bg_array = bg_df[feature_names].to_numpy(dtype=np.float64)
-# 预测函数（供 KernelExplainer 调用）
 def _predict_proba_nd(x_nd: np.ndarray) -> np.ndarray:
-    df = pd.DataFrame(x_nd, columns=feature_names)
-    return pipeline.predict_proba(df)
 # 只初始化一次 explainer（性能更稳）
 explainer = shap.KernelExplainer(_predict_proba_nd, bg_array)
@@ -34,57 +50,73 @@ def _render_force_plot(base_val: float, shap_1d: np.ndarray, feat_1d: np.ndarray
     """返回 matplotlib Figure（旧接口，服务器端稳定）"""
     plt.close('all')
     shap.force_plot(
-        base_val, np.asarray(shap_1d).reshape(-1), np.asarray(feat_1d).reshape(-1),
-        feature_names=list(fnames), matplotlib=True, show=False
     )
     fig = plt.gcf()
     fig.set_size_inches(8, 4)
     plt.tight_layout()
     return fig
-def predict_and_explain(
-    HGB, HDL_C, DBIL, AST_ALT, UA, GFR,
-    ALB, LYM, PLT, ALP, CHOL,
-    nsamples=200
-):
     status = []
     try:
-        # ---- 1) 衍生指标（由原始输入计算）----
-        try:
-            HGB  = float(HGB);   HDL_C = float(HDL_C); DBIL = float(DBIL); AST_ALT = float(AST_ALT)
-            UA   = float(UA);    GFR   = float(GFR)
-            ALB  = float(ALB);   LYM   = float(LYM);   PLT   = float(PLT)
-            ALP  = float(ALP);   CHOL  = float(CHOL)
-        except Exception:
-            return None, None, "Error: some inputs are not numeric."
-        # 防极端值（避免除0）
-        if PLT <= 0 or ALP <= 0:
-            return None, None, "Error: PLT and ALP must be > 0."
         PNI  = ALB + 5.0 * LYM
-        HALP = HGB * ALB * LYM / PLT
         AAPR = ALB / ALP
-        conuts = (
-            (0 if ALB >= 35 else 2 if ALB >= 30 else 4 if ALB >= 25 else 6) +
-            (0 if LYM >= 1.6 else 1 if LYM >= 1.2 else 2 if LYM >= 0.8 else 3) +
-            (0 if CHOL >= 4.65 else 1 if CHOL >= 3.10 else 2 if CHOL >= 2.59 else 3)
-        )
-        x_row = np.array([[HGB, HDL_C, DBIL, AST_ALT, UA, GFR, PNI, HALP, AAPR, conuts]], dtype=np.float64)
-        status.append(f"Derived: PNI={PNI:.3f}, HALP={HALP:.3f}, AAPR={AAPR:.3f}, CONUTS={conuts}")
-        # ---- 2) 概率 ----
-        prob = float(pipeline.predict_proba(pd.DataFrame(x_row, columns=feature_names))[0, 1])
-        status.append(f"Pred prob computed: {prob:.3f}")
-        # ---- 3) SHAP 计算 ----
         ns = int(nsamples) if nsamples is not None else 200
         shap_out = explainer.shap_values(x_row, nsamples=ns)
-        # 统一提取“正类”一维向量
         if isinstance(shap_out, list):
-            sv = np.asarray(shap_out[1], dtype=np.float64)
-            if sv.ndim == 2:
                 sv = sv[0, :]
         else:
             sv = np.asarray(shap_out, dtype=np.float64)
@@ -94,77 +126,75 @@ def predict_and_explain(
                 sv = sv[0, :]
             else:
                 sv = sv.reshape(-1)
-        status.append(f"SHAP 1D shape: {sv.shape}; features: {x_row.shape[1:]}")
-        # base value 取正类
         ev = explainer.expected_value
         if isinstance(ev, (list, np.ndarray)):
             ev = np.asarray(ev).reshape(-1)
-            base_val = float(ev[1] if len(ev) > 1 else ev[0])
         else:
             base_val = float(ev)
-        # ---- 4) 绘图：优先力图；失败则条形图兜底 ----
         try:
-            fig = _render_force_plot(base_val, sv, x_row[0, :], feature_names)
             status.append("Rendered force plot (matplotlib).")
-            return round(prob, 3), fig, "\n".join(status)
         except Exception as e_force:
             status.append(f"Force-plot failed: {repr(e_force)}; fallback=bar")
-        order = np.argsort(np.abs(sv))[::-1]
-        topk = order[:min(10, sv.shape[0])]
-        plt.close('all')
-        fig = plt.figure(figsize=(8, 5), dpi=160)
-        plt.barh(np.array(feature_names)[topk], sv[topk])
-        plt.xlabel("SHAP value")
-        plt.title("Top features (single-sample contribution)")
-        plt.gca().invert_yaxis()
-        plt.tight_layout()
-        status.append("Rendered bar fallback.")
-        return round(prob, 3), fig, "\n".join(status)
     except Exception as e:
         return None, None, f"Fatal error: {repr(e)}"
-# ====== 示例：一组“原始指标”可复现你之前的 PNI/HALP/AAPR/CONUTS ======
-# 对应：PNI=44, HALP≈60.8, AAPR≈0.486, CONUTS=4
-example_values = [167, 1.76, 8.6, 0.97, 310, 75, 33, 2.2, 164, 68, 2.8, 200]
-# 顺序：HGB, HDL_C, DBIL, AST_ALT, UA, GFR, ALB, LYM, PLT, ALP, CHOL, nsamples
 # ====== Gradio 界面 ======
 with gr.Blocks() as demo:
     gr.Markdown(
         "### Meige Risk Prediction (SVM) with SHAP Explanation\n"
-        "Enter **original clinical indicators**; the app will derive PNI/HALP/AAPR/CONUTS internally.\n\n"
-        "**Units**: HGB (g/L), HDL‑C (mmol/L), DBIL (μmol/L), AST/ALT (ratio), UA (μmol/L), "
-        "GFR (mL/min/1.73 m²), ALB (g/L), LYM (×10⁹/L), PLT (×10⁹/L), ALP (U/L), CHOL (mmol/L)."
     )
     with gr.Row():
         with gr.Column(scale=1):
             inputs = [
-                gr.Number(label="HGB (g/L)"),
-                gr.Number(label="HDL-C (mmol/L)"),
-                gr.Number(label="DBIL (μmol/L)"),
-                gr.Number(label="AST/ALT"),
-                gr.Number(label="UA (μmol/L)"),
-                gr.Number(label="GFR (mL/min/1.73 m²)"),
                 gr.Number(label="ALB (g/L)"),
                 gr.Number(label="LYM (×10⁹/L)"),
-                gr.Number(label="PLT (×10⁹/L)"),
                 gr.Number(label="ALP (U/L)"),
-                gr.Number(label="CHOL (mmol/L)")
             ]
-            ns_slider = gr.Slider(100, 400, value=200, step=50, label="SHAP nsamples")
             btn_fill = gr.Button("Fill Example")
             btn_predict = gr.Button("Predict")
         with gr.Column(scale=1):
-            out_prob = gr.Number(label="Predicted Probability")
             out_plot = gr.Plot(label="SHAP Force Plot (fallback: bar)")
-            out_log  = gr.Textbox(label="Status", lines=6)
     def _fill_example():
         return tuple(example_values)

+# app.py (7-feature aligned, server-safe)
 import gradio as gr
 import pandas as pd
 import joblib
 warnings.filterwarnings("ignore")
 # ====== 模型与背景数据 ======
+MODEL_PATH = "models/svm_pipeline.joblib"
 BG_PATH    = "data/bg.csv"
+# 模型最终需要的 7 个特征（顺序必须与训练一致）
+FEATURES = ["ALB", "TP", "TBA", "AST_ALT", "CREA", "PNI", "AAPR"]
 # 加载模型与背景
 pipeline = joblib.load(MODEL_PATH)
 bg_df = pd.read_csv(BG_PATH)
+missing_bg = [c for c in FEATURES if c not in bg_df.columns]
+if missing_bg:
+    raise ValueError(f"背景集缺少列: {missing_bg}")
+bg_array = bg_df[FEATURES].to_numpy(dtype=np.float64)
+# 预测函数（供 KernelExplainer 调用）——返回正类概率/分数
 def _predict_proba_nd(x_nd: np.ndarray) -> np.ndarray:
+    df = pd.DataFrame(x_nd, columns=FEATURES)
+    # 若模型有 predict_proba：取正类概率；否则退回 decision_function / predict
+    if hasattr(pipeline, "predict_proba"):
+        proba = pipeline.predict_proba(df)
+        # 确定正类索引（假定正类标签为 1；若不是，请在此处修改）
+        classes_ = getattr(pipeline, "classes_", None)
+        pos_idx = int(np.where(classes_ == 1)[0][0]) if classes_ is not None else 1
+        return proba[:, pos_idx]
+    elif hasattr(pipeline, "decision_function"):
+        score = pipeline.decision_function(df)
+        return score if isinstance(score, np.ndarray) else np.asarray(score)
+    else:
+        pred = pipeline.predict(df)
+        return pred if isinstance(pred, np.ndarray) else np.asarray(pred)
 # 只初始化一次 explainer（性能更稳）
 explainer = shap.KernelExplainer(_predict_proba_nd, bg_array)
     """返回 matplotlib Figure（旧接口，服务器端稳定）"""
     plt.close('all')
     shap.force_plot(
+        base_val,
+        np.asarray(shap_1d).reshape(-1),
+        np.asarray(feat_1d).reshape(-1),
+        feature_names=list(fnames),
+        matplotlib=True, show=False
     )
     fig = plt.gcf()
     fig.set_size_inches(8, 4)
     plt.tight_layout()
     return fig
+def _coerce_float(x):
+    return float(x) if x is not None and x != "" else np.nan
+def predict_and_explain(ALB, TP, TBA, AST_ALT, CREA, LYM, ALP, nsamples=200):
     status = []
     try:
+        # ---- 1) 取数并校验 ----
+        ALB     = _coerce_float(ALB)
+        TP      = _coerce_float(TP)
+        TBA     = _coerce_float(TBA)
+        AST_ALT = _coerce_float(AST_ALT)
+        CREA    = _coerce_float(CREA)
+        LYM     = _coerce_float(LYM)
+        ALP     = _coerce_float(ALP)
+        vals = [ALB, TP, TBA, AST_ALT, CREA, LYM, ALP]
+        if any(np.isnan(v) for v in vals):
+            return None, None, "Error: 所有输入必须为数值且不可缺失。"
+        if ALP <= 0:
+            return None, None, "Error: ALP 必须 > 0（用于计算 AAPR=ALB/ALP）。"
+        # ---- 2) 衍生指标 ----
         PNI  = ALB + 5.0 * LYM
         AAPR = ALB / ALP
+        status.append(f"Derived: PNI={PNI:.3f}, AAPR={AAPR:.3f}")
+        # ---- 3) 组装最终 7 特征并预测 ----
+        x_row = np.array([[ALB, TP, TBA, AST_ALT, CREA, PNI, AAPR]], dtype=np.float64)
+        if hasattr(pipeline, "predict_proba"):
+            classes_ = getattr(pipeline, "classes_", None)
+            pos_idx = int(np.where(classes_ == 1)[0][0]) if classes_ is not None else 1
+            prob = float(pipeline.predict_proba(pd.DataFrame(x_row, columns=FEATURES))[0, pos_idx])
+            status.append(f"Pred prob: {prob:.3f}")
+        else:
+            # 若无概率，给出分数
+            score = float(
+                pipeline.decision_function(pd.DataFrame(x_row, columns=FEATURES))[0]
+            ) if hasattr(pipeline, "decision_function") else float(
+                pipeline.predict(pd.DataFrame(x_row, columns=FEATURES))[0]
+            )
+            prob = score
+            status.append(f"Pred score: {score:.3f}")
+        # ---- 4) SHAP 计算 ----
         ns = int(nsamples) if nsamples is not None else 200
         shap_out = explainer.shap_values(x_row, nsamples=ns)
+        # 统一提取“一维贡献向量”
         if isinstance(shap_out, list):
+            # 二分类：list 长度=2，取正类
+            classes_ = getattr(pipeline, "classes_", None)
+            pos_idx = int(np.where(classes_ == 1)[0][0]) if classes_ is not None else 1
+            sv = np.asarray(shap_out[pos_idx], dtype=np.float64)
+            if sv.ndim == 2:  # (1, n_features)
                 sv = sv[0, :]
         else:
             sv = np.asarray(shap_out, dtype=np.float64)
                 sv = sv[0, :]
             else:
                 sv = sv.reshape(-1)
+        status.append(f"SHAP vector shape: {sv.shape}")
+        # base value
         ev = explainer.expected_value
         if isinstance(ev, (list, np.ndarray)):
             ev = np.asarray(ev).reshape(-1)
+            classes_ = getattr(pipeline, "classes_", None)
+            pos_idx = int(np.where(classes_ == 1)[0][0]) if classes_ is not None else 1
+            base_val = float(ev[pos_idx if len(ev) > pos_idx else 0])
         else:
             base_val = float(ev)
+        # ---- 5) 绘图（优先 force，失败退条形图）----
         try:
+            fig = _render_force_plot(base_val, sv, x_row[0, :], FEATURES)
             status.append("Rendered force plot (matplotlib).")
+            return round(float(prob), 3), fig, "\n".join(status)
         except Exception as e_force:
             status.append(f"Force-plot failed: {repr(e_force)}; fallback=bar")
+            order = np.argsort(np.abs(sv))[::-1]
+            topk = order[:min(7, sv.shape[0])]
+            plt.close('all')
+            fig = plt.figure(figsize=(8, 5), dpi=160)
+            plt.barh(np.array(FEATURES)[topk], sv[topk])
+            plt.xlabel("SHAP value")
+            plt.title("Top features (single-sample contribution)")
+            plt.gca().invert_yaxis()
+            plt.tight_layout()
+            status.append("Rendered bar fallback.")
+            return round(float(prob), 3), fig, "\n".join(status)
     except Exception as e:
         return None, None, f"Fatal error: {repr(e)}"
+# ====== 示例输入（仅 7 项 + nsamples）======
+example_values = [38.0, 68.0, 6.5, 1.0, 75.0, 1.2, 80.0, 200]
+# 顺序：ALB, TP, TBA, AST_ALT, CREA, LYM, ALP, nsamples
+# 注：上例将派生 PNI=ALB+5*LYM=44、AAPR=ALB/ALP=0.475，与训练对齐
 # ====== Gradio 界面 ======
 with gr.Blocks() as demo:
     gr.Markdown(
         "### Meige Risk Prediction (SVM) with SHAP Explanation\n"
+        "输入 **ALB, TP, TBA, AST/ALT, CREA, LYM, ALP**；应用会内部计算 **PNI=ALB+5×LYM** 与 **AAPR=ALB/ALP**，"
+        "并以这 7 个最终特征喂给模型和 SHAP。\n\n"
+        "**Units**: ALB(g/L), TP(g/L), TBA(μmol/L), AST/ALT(ratio), CREA(μmol/L), "
+        "LYM(×10⁹/L), ALP(U/L)."
     )
     with gr.Row():
         with gr.Column(scale=1):
             inputs = [
                 gr.Number(label="ALB (g/L)"),
+                gr.Number(label="TP (g/L)"),
+                gr.Number(label="TBA (μmol/L)"),
+                gr.Number(label="AST/ALT"),
+                gr.Number(label="CREA (μmol/L)"),
                 gr.Number(label="LYM (×10⁹/L)"),
                 gr.Number(label="ALP (U/L)"),
             ]
+            ns_slider = gr.Slider(100, 500, value=200, step=50, label="SHAP nsamples")
             btn_fill = gr.Button("Fill Example")
             btn_predict = gr.Button("Predict")
         with gr.Column(scale=1):
+            out_prob = gr.Number(label="Predicted Probability / Score")
             out_plot = gr.Plot(label="SHAP Force Plot (fallback: bar)")
+            out_log  = gr.Textbox(label="Status", lines=8)
     def _fill_example():
         return tuple(example_values)