Spaces:
Sleeping
Sleeping
File size: 7,090 Bytes
16d37fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
# βββ 1. BENCHMARK DATA ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Nested dict: Domain β { Model Name β {metric_name: value, β¦, "SOTA_<metric>": value } }
benchmark_data = {
"Protein Folding": {
"Nexa Bio1 (Secondary)": {
"Accuracy (%)": 71,
"Q3 (%)": 65,
"Q8 (%)": 55,
"TM-score": 0.60,
"SOTA_Accuracy (%)": 85,
"SOTA_TM-score": 0.75
},
"Nexa Bio2 (Tertiary)": {
"Confidence (%)": 90,
"GDT_TS": 0.82,
"Entropy Threshold (%)": 80,
"SOTA_Confidence (%)": 92,
"SOTA_GDT_TS": 0.85
},
},
"Astrophysics": {
"Nexa Astro": {
"Accuracy (%)": 97,
"Macro-F1 (%)": 96,
"ROC-AUC": 0.98,
"SOTA_Accuracy (%)": 96,
"SOTA_ROC-AUC": 0.97
},
},
"Materials Science": {
"Nexa MatSci": {
"MAE (eV)": 0.02,
"RMSE (eV)": 0.03,
"Bandgap Accuracy (%)": 98,
"SOTA_MAE (eV)": 0.03,
"SOTA_Bandgap Accuracy (%)": 95
},
},
"Quantum State Tomography": {
"Nexa QST": {
"Fidelity": 0.80,
"Purity": 1.00,
"Trace Distance": 0.15,
"SOTA_Fidelity": 0.83,
"SOTA_Trace Distance": 0.12
},
},
"Computational Fluid Dynamics": {
"Nexa CFD": {
"Relative L2 Error": 0.015,
"Energy Conservation Loss": 0.005,
"PSNR": 30,
"SSIM": 0.88,
"SOTA_Relative L2 Error": 0.020,
"SOTA_SSIM": 0.85
},
},
"High-Energy Physics": {
"Nexa HEP": {
"ROC-AUC": 0.92,
"Event Accuracy (%)": 90,
"Jet Tagging (%)": 88,
"SOTA_ROC-AUC": 0.93,
"SOTA_Event Accuracy (%)": 89
},
},
"LLM Hypothesis & Methodology": {
"Nexa MOE": {
"Coherence (1β10)": 9.1,
"Novelty (1β10)": 8.6,
"Utility (1β10)": 8.8,
"Expert-Rated SOTA (1β10)": 9.0
},
},
}
# βββ 2. SECTION DESCRIPTIONS βββββββββββββββββββββββββββββββββββββββββββββββββββ
section_descriptions = {
"Protein Folding": """**Protein Folding**
Benchmarks for secondary (Q3/Q8) and tertiary (TM-score) structure prediction.
Nexa Bio1 handles sequenceβsecondary, Nexa Bio2 handles full 3D fold confidence.""",
"Astrophysics": """**Astrophysics**
Stellar classification and redshift estimation.
Metrics: Accuracy, F1, ROC-AUC against SDSS-Net and astroML baselines.""",
"Materials Science": """**Materials Science**
Property prediction for novel materials (e.g., bandgap, formation energy).
Metrics: MAE/RMSE, bandgapβprediction accuracy vs. CGCNN, ALIGNN.""",
"Quantum State Tomography": """**Quantum State Tomography**
Reconstruct quantum states from measurement data.
Metrics: Fidelity, Purity, Trace Distance against PINNs and QuNet.""",
"Computational Fluid Dynamics": """**CFD**
Flow field prediction (NavierβStokes).
Metrics: Relative L2 Error, PSNR/SSIM, Energy Conservation Loss vs. FNO.""",
"High-Energy Physics": """**High-Energy Physics**
Particle classification and signal/background separation.
Metrics: ROC-AUC, event reconstruction accuracy, jet-tagging efficiency.""",
"LLM Hypothesis & Methodology": """**LLM-Based Scientific Reasoning**
Hypothesis and methodology generation.
Metrics scored 1β10 by expert rubric on Coherence, Novelty, and Utility; compared to top academic LLM baselines."""
}
# βββ 3. PLOTTING FUNCTION ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def plot_comparison(category):
data = benchmark_data[category]
fig, ax = plt.subplots(figsize=(7, 4))
bar_width = 0.4
indices = list(range(len(data)))
labels = list(data.keys())
# collect metrics that arenβt SOTA
for i, model in enumerate(labels):
metrics = data[model]
# extract non-SOTA metrics
non_sota = {k: v for k, v in metrics.items() if not k.startswith("SOTA")}
sota = {k.replace("SOTA_", ""): v for k, v in metrics.items() if k.startswith("SOTA")}
# bar positions
pos = i * 2
ax.bar([pos + j*bar_width for j in range(len(non_sota))],
list(non_sota.values()),
width=bar_width, label=f"{model} Metrics")
if sota:
ax.bar([pos + bar_width*len(non_sota) + j*bar_width for j in range(len(sota))],
list(sota.values()),
width=bar_width, alpha=0.7, label=f"{model} SOTA")
# formatting
ax.set_xticks([i * (2) + bar_width*(len(non_sota)/2) for i in indices])
ax.set_xticklabels(labels, rotation=45, ha='right')
ax.set_ylabel("Value / Score")
ax.set_title(f"{category} β Nexa vs. SOTA")
ax.legend(loc="upper right")
plt.tight_layout()
return fig
# βββ 4. CALLBACK TO RENDER SECTION βββββββββββββββββββββββββββββββββββββββββββββ
def show_eval(category):
desc = section_descriptions[category]
df = pd.DataFrame(benchmark_data[category]).T
fig = plot_comparison(category)
return desc, df, fig
# βββ 5. BUILD GRADIO APP βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(css="""
body { background-color: #f7f9fc; font-family: Arial, sans-serif; }
.gradio-container { max-width: 900px; margin: auto; }
h1, h2, h3 { color: #333; }
""") as app:
gr.Markdown("# π¬ Nexa Evals Dashboard")
gr.Markdown("A **comprehensive** SciML benchmark framework. Select a domain to view metrics, compare with SOTA, and explore detailed plots and tables.")
with gr.Row():
with gr.Column(scale=1):
category = gr.Radio(
choices=list(benchmark_data.keys()),
value="Protein Folding",
label="Select Domain / Model Group"
)
with gr.Column(scale=3):
description = gr.Markdown("")
table = gr.Dataframe(headers=["Metric", "Value"], interactive=False)
plot = gr.Plot()
category.change(
fn=show_eval,
inputs=category,
outputs=[description, table, plot]
)
# initialize
description.value, table.value, _ = show_eval("Protein Folding")
# Launch (on Hugging Face the config flags will be auto-managed)
app.launch()
|