Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
# βββ 1. BENCHMARK DATA ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# Nested dict: Domain β { Model Name β {metric_name: value, β¦, "SOTA_<metric>": value } } | |
benchmark_data = { | |
"Protein Folding": { | |
"Nexa Bio1 (Secondary)": { | |
"Accuracy (%)": 71, | |
"Q3 (%)": 65, | |
"Q8 (%)": 55, | |
"TM-score": 0.60, | |
"SOTA_Accuracy (%)": 85, | |
"SOTA_TM-score": 0.75 | |
}, | |
"Nexa Bio2 (Tertiary)": { | |
"Confidence (%)": 90, | |
"GDT_TS": 0.82, | |
"Entropy Threshold (%)": 80, | |
"SOTA_Confidence (%)": 92, | |
"SOTA_GDT_TS": 0.85 | |
}, | |
}, | |
"Astrophysics": { | |
"Nexa Astro": { | |
"Accuracy (%)": 97, | |
"Macro-F1 (%)": 96, | |
"ROC-AUC": 0.98, | |
"SOTA_Accuracy (%)": 96, | |
"SOTA_ROC-AUC": 0.97 | |
}, | |
}, | |
"Materials Science": { | |
"Nexa MatSci": { | |
"MAE (eV)": 0.02, | |
"RMSE (eV)": 0.03, | |
"Bandgap Accuracy (%)": 98, | |
"SOTA_MAE (eV)": 0.03, | |
"SOTA_Bandgap Accuracy (%)": 95 | |
}, | |
}, | |
"Quantum State Tomography": { | |
"Nexa QST": { | |
"Fidelity": 0.80, | |
"Purity": 1.00, | |
"Trace Distance": 0.15, | |
"SOTA_Fidelity": 0.83, | |
"SOTA_Trace Distance": 0.12 | |
}, | |
}, | |
"Computational Fluid Dynamics": { | |
"Nexa CFD": { | |
"Relative L2 Error": 0.015, | |
"Energy Conservation Loss": 0.005, | |
"PSNR": 30, | |
"SSIM": 0.88, | |
"SOTA_Relative L2 Error": 0.020, | |
"SOTA_SSIM": 0.85 | |
}, | |
}, | |
"High-Energy Physics": { | |
"Nexa HEP": { | |
"ROC-AUC": 0.92, | |
"Event Accuracy (%)": 90, | |
"Jet Tagging (%)": 88, | |
"SOTA_ROC-AUC": 0.93, | |
"SOTA_Event Accuracy (%)": 89 | |
}, | |
}, | |
"LLM Hypothesis & Methodology": { | |
"Nexa MOE": { | |
"Coherence (1β10)": 9.1, | |
"Novelty (1β10)": 8.6, | |
"Utility (1β10)": 8.8, | |
"Expert-Rated SOTA (1β10)": 9.0 | |
}, | |
}, | |
} | |
# βββ 2. SECTION DESCRIPTIONS βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
section_descriptions = { | |
"Protein Folding": """**Protein Folding** | |
Benchmarks for secondary (Q3/Q8) and tertiary (TM-score) structure prediction. | |
Nexa Bio1 handles sequenceβsecondary, Nexa Bio2 handles full 3D fold confidence.""", | |
"Astrophysics": """**Astrophysics** | |
Stellar classification and redshift estimation. | |
Metrics: Accuracy, F1, ROC-AUC against SDSS-Net and astroML baselines.""", | |
"Materials Science": """**Materials Science** | |
Property prediction for novel materials (e.g., bandgap, formation energy). | |
Metrics: MAE/RMSE, bandgapβprediction accuracy vs. CGCNN, ALIGNN.""", | |
"Quantum State Tomography": """**Quantum State Tomography** | |
Reconstruct quantum states from measurement data. | |
Metrics: Fidelity, Purity, Trace Distance against PINNs and QuNet.""", | |
"Computational Fluid Dynamics": """**CFD** | |
Flow field prediction (NavierβStokes). | |
Metrics: Relative L2 Error, PSNR/SSIM, Energy Conservation Loss vs. FNO.""", | |
"High-Energy Physics": """**High-Energy Physics** | |
Particle classification and signal/background separation. | |
Metrics: ROC-AUC, event reconstruction accuracy, jet-tagging efficiency.""", | |
"LLM Hypothesis & Methodology": """**LLM-Based Scientific Reasoning** | |
Hypothesis and methodology generation. | |
Metrics scored 1β10 by expert rubric on Coherence, Novelty, and Utility; compared to top academic LLM baselines.""" | |
} | |
# βββ 3. PLOTTING FUNCTION ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def plot_comparison(category): | |
data = benchmark_data[category] | |
fig, ax = plt.subplots(figsize=(7, 4)) | |
bar_width = 0.4 | |
indices = list(range(len(data))) | |
labels = list(data.keys()) | |
# collect metrics that arenβt SOTA | |
for i, model in enumerate(labels): | |
metrics = data[model] | |
# extract non-SOTA metrics | |
non_sota = {k: v for k, v in metrics.items() if not k.startswith("SOTA")} | |
sota = {k.replace("SOTA_", ""): v for k, v in metrics.items() if k.startswith("SOTA")} | |
# bar positions | |
pos = i * 2 | |
ax.bar([pos + j*bar_width for j in range(len(non_sota))], | |
list(non_sota.values()), | |
width=bar_width, label=f"{model} Metrics") | |
if sota: | |
ax.bar([pos + bar_width*len(non_sota) + j*bar_width for j in range(len(sota))], | |
list(sota.values()), | |
width=bar_width, alpha=0.7, label=f"{model} SOTA") | |
# formatting | |
ax.set_xticks([i * (2) + bar_width*(len(non_sota)/2) for i in indices]) | |
ax.set_xticklabels(labels, rotation=45, ha='right') | |
ax.set_ylabel("Value / Score") | |
ax.set_title(f"{category} β Nexa vs. SOTA") | |
ax.legend(loc="upper right") | |
plt.tight_layout() | |
return fig | |
# βββ 4. CALLBACK TO RENDER SECTION βββββββββββββββββββββββββββββββββββββββββββββ | |
def show_eval(category): | |
desc = section_descriptions[category] | |
df = pd.DataFrame(benchmark_data[category]).T | |
fig = plot_comparison(category) | |
return desc, df, fig | |
# βββ 5. BUILD GRADIO APP βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
with gr.Blocks(css=""" | |
body { background-color: #f7f9fc; font-family: Arial, sans-serif; } | |
.gradio-container { max-width: 900px; margin: auto; } | |
h1, h2, h3 { color: #333; } | |
""") as app: | |
gr.Markdown("# π¬ Nexa Evals Dashboard") | |
gr.Markdown("A **comprehensive** SciML benchmark framework. Select a domain to view metrics, compare with SOTA, and explore detailed plots and tables.") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
category = gr.Radio( | |
choices=list(benchmark_data.keys()), | |
value="Protein Folding", | |
label="Select Domain / Model Group" | |
) | |
with gr.Column(scale=3): | |
description = gr.Markdown("") | |
table = gr.Dataframe(headers=["Metric", "Value"], interactive=False) | |
plot = gr.Plot() | |
category.change( | |
fn=show_eval, | |
inputs=category, | |
outputs=[description, table, plot] | |
) | |
# initialize | |
description.value, table.value, _ = show_eval("Protein Folding") | |
# Launch (on Hugging Face the config flags will be auto-managed) | |
app.launch() | |