Spaces:

Allanatrix
/

NexaEvals

Running

App Files Files Community

Allanatrix commited on 25 days ago

Commit

16d37fe

verified ·

1 Parent(s): 891a83a

Create app.py

Browse files

Files changed (1) hide show

app.py +178 -0

app.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import gradio as gr
+import pandas as pd
+import matplotlib.pyplot as plt
+# ─── 1. BENCHMARK DATA ──────────────────────────────────────────────────────────
+# Nested dict: Domain → { Model Name → {metric_name: value, …, "SOTA_<metric>": value } }
+benchmark_data = {
+    "Protein Folding": {
+        "Nexa Bio1 (Secondary)": {
+            "Accuracy (%)": 71,
+            "Q3 (%)": 65,
+            "Q8 (%)": 55,
+            "TM-score": 0.60,
+            "SOTA_Accuracy (%)": 85,
+            "SOTA_TM-score": 0.75
+        },
+        "Nexa Bio2 (Tertiary)": {
+            "Confidence (%)": 90,
+            "GDT_TS": 0.82,
+            "Entropy Threshold (%)": 80,
+            "SOTA_Confidence (%)": 92,
+            "SOTA_GDT_TS": 0.85
+        },
+    },
+    "Astrophysics": {
+        "Nexa Astro": {
+            "Accuracy (%)": 97,
+            "Macro-F1 (%)": 96,
+            "ROC-AUC": 0.98,
+            "SOTA_Accuracy (%)": 96,
+            "SOTA_ROC-AUC": 0.97
+        },
+    },
+    "Materials Science": {
+        "Nexa MatSci": {
+            "MAE (eV)": 0.02,
+            "RMSE (eV)": 0.03,
+            "Bandgap Accuracy (%)": 98,
+            "SOTA_MAE (eV)": 0.03,
+            "SOTA_Bandgap Accuracy (%)": 95
+        },
+    },
+    "Quantum State Tomography": {
+        "Nexa QST": {
+            "Fidelity": 0.80,
+            "Purity": 1.00,
+            "Trace Distance": 0.15,
+            "SOTA_Fidelity": 0.83,
+            "SOTA_Trace Distance": 0.12
+        },
+    },
+    "Computational Fluid Dynamics": {
+        "Nexa CFD": {
+            "Relative L2 Error": 0.015,
+            "Energy Conservation Loss": 0.005,
+            "PSNR": 30,
+            "SSIM": 0.88,
+            "SOTA_Relative L2 Error": 0.020,
+            "SOTA_SSIM": 0.85
+        },
+    },
+    "High-Energy Physics": {
+        "Nexa HEP": {
+            "ROC-AUC": 0.92,
+            "Event Accuracy (%)": 90,
+            "Jet Tagging (%)": 88,
+            "SOTA_ROC-AUC": 0.93,
+            "SOTA_Event Accuracy (%)": 89
+        },
+    },
+    "LLM Hypothesis & Methodology": {
+        "Nexa MOE": {
+            "Coherence (1–10)": 9.1,
+            "Novelty (1–10)": 8.6,
+            "Utility (1–10)": 8.8,
+            "Expert-Rated SOTA (1–10)": 9.0
+        },
+    },
+}
+# ─── 2. SECTION DESCRIPTIONS ───────────────────────────────────────────────────
+section_descriptions = {
+    "Protein Folding": """**Protein Folding**
+Benchmarks for secondary (Q3/Q8) and tertiary (TM-score) structure prediction.
+Nexa Bio1 handles sequence→secondary, Nexa Bio2 handles full 3D fold confidence.""",
+    "Astrophysics": """**Astrophysics**
+Stellar classification and redshift estimation.
+Metrics: Accuracy, F1, ROC-AUC against SDSS-Net and astroML baselines.""",
+    "Materials Science": """**Materials Science**
+Property prediction for novel materials (e.g., bandgap, formation energy).
+Metrics: MAE/RMSE, bandgap‐prediction accuracy vs. CGCNN, ALIGNN.""",
+    "Quantum State Tomography": """**Quantum State Tomography**
+Reconstruct quantum states from measurement data.
+Metrics: Fidelity, Purity, Trace Distance against PINNs and QuNet.""",
+    "Computational Fluid Dynamics": """**CFD**
+Flow field prediction (Navier–Stokes).
+Metrics: Relative L2 Error, PSNR/SSIM, Energy Conservation Loss vs. FNO.""",
+    "High-Energy Physics": """**High-Energy Physics**
+Particle classification and signal/background separation.
+Metrics: ROC-AUC, event reconstruction accuracy, jet-tagging efficiency.""",
+    "LLM Hypothesis & Methodology": """**LLM-Based Scientific Reasoning**
+Hypothesis and methodology generation.
+Metrics scored 1–10 by expert rubric on Coherence, Novelty, and Utility; compared to top academic LLM baselines."""
+}
+# ─── 3. PLOTTING FUNCTION ────────────────────────────────────────────────────────
+def plot_comparison(category):
+    data = benchmark_data[category]
+    fig, ax = plt.subplots(figsize=(7, 4))
+    bar_width = 0.4
+    indices = list(range(len(data)))
+    labels = list(data.keys())
+    # collect metrics that aren’t SOTA
+    for i, model in enumerate(labels):
+        metrics = data[model]
+        # extract non-SOTA metrics
+        non_sota = {k: v for k, v in metrics.items() if not k.startswith("SOTA")}
+        sota = {k.replace("SOTA_", ""): v for k, v in metrics.items() if k.startswith("SOTA")}
+        # bar positions
+        pos = i * 2
+        ax.bar([pos + j*bar_width for j in range(len(non_sota))],
+               list(non_sota.values()),
+               width=bar_width, label=f"{model} Metrics")
+        if sota:
+            ax.bar([pos + bar_width*len(non_sota) + j*bar_width for j in range(len(sota))],
+                   list(sota.values()),
+                   width=bar_width, alpha=0.7, label=f"{model} SOTA")
+    # formatting
+    ax.set_xticks([i * (2) + bar_width*(len(non_sota)/2) for i in indices])
+    ax.set_xticklabels(labels, rotation=45, ha='right')
+    ax.set_ylabel("Value / Score")
+    ax.set_title(f"{category} — Nexa vs. SOTA")
+    ax.legend(loc="upper right")
+    plt.tight_layout()
+    return fig
+# ─── 4. CALLBACK TO RENDER SECTION ─────────────────────────────────────────────
+def show_eval(category):
+    desc = section_descriptions[category]
+    df = pd.DataFrame(benchmark_data[category]).T
+    fig = plot_comparison(category)
+    return desc, df, fig
+# ─── 5. BUILD GRADIO APP ───────────────────────────────────────────────────────
+with gr.Blocks(css="""
+    body { background-color: #f7f9fc; font-family: Arial, sans-serif; }
+    .gradio-container { max-width: 900px; margin: auto; }
+    h1, h2, h3 { color: #333; }
+""") as app:
+    gr.Markdown("# 🔬 Nexa Evals Dashboard")
+    gr.Markdown("A **comprehensive** SciML benchmark framework. Select a domain to view metrics, compare with SOTA, and explore detailed plots and tables.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            category = gr.Radio(
+                choices=list(benchmark_data.keys()),
+                value="Protein Folding",
+                label="Select Domain / Model Group"
+            )
+        with gr.Column(scale=3):
+            description = gr.Markdown("")
+            table = gr.Dataframe(headers=["Metric", "Value"], interactive=False)
+            plot = gr.Plot()
+    category.change(
+        fn=show_eval,
+        inputs=category,
+        outputs=[description, table, plot]
+    )
+    # initialize
+    description.value, table.value, _ = show_eval("Protein Folding")
+# Launch (on Hugging Face the config flags will be auto-managed)
+app.launch()