Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
|
5 |
+
# βββ 1. BENCHMARK DATA ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
6 |
+
# Nested dict: Domain β { Model Name β {metric_name: value, β¦, "SOTA_<metric>": value } }
|
7 |
+
benchmark_data = {
|
8 |
+
"Protein Folding": {
|
9 |
+
"Nexa Bio1 (Secondary)": {
|
10 |
+
"Accuracy (%)": 71,
|
11 |
+
"Q3 (%)": 65,
|
12 |
+
"Q8 (%)": 55,
|
13 |
+
"TM-score": 0.60,
|
14 |
+
"SOTA_Accuracy (%)": 85,
|
15 |
+
"SOTA_TM-score": 0.75
|
16 |
+
},
|
17 |
+
"Nexa Bio2 (Tertiary)": {
|
18 |
+
"Confidence (%)": 90,
|
19 |
+
"GDT_TS": 0.82,
|
20 |
+
"Entropy Threshold (%)": 80,
|
21 |
+
"SOTA_Confidence (%)": 92,
|
22 |
+
"SOTA_GDT_TS": 0.85
|
23 |
+
},
|
24 |
+
},
|
25 |
+
"Astrophysics": {
|
26 |
+
"Nexa Astro": {
|
27 |
+
"Accuracy (%)": 97,
|
28 |
+
"Macro-F1 (%)": 96,
|
29 |
+
"ROC-AUC": 0.98,
|
30 |
+
"SOTA_Accuracy (%)": 96,
|
31 |
+
"SOTA_ROC-AUC": 0.97
|
32 |
+
},
|
33 |
+
},
|
34 |
+
"Materials Science": {
|
35 |
+
"Nexa MatSci": {
|
36 |
+
"MAE (eV)": 0.02,
|
37 |
+
"RMSE (eV)": 0.03,
|
38 |
+
"Bandgap Accuracy (%)": 98,
|
39 |
+
"SOTA_MAE (eV)": 0.03,
|
40 |
+
"SOTA_Bandgap Accuracy (%)": 95
|
41 |
+
},
|
42 |
+
},
|
43 |
+
"Quantum State Tomography": {
|
44 |
+
"Nexa QST": {
|
45 |
+
"Fidelity": 0.80,
|
46 |
+
"Purity": 1.00,
|
47 |
+
"Trace Distance": 0.15,
|
48 |
+
"SOTA_Fidelity": 0.83,
|
49 |
+
"SOTA_Trace Distance": 0.12
|
50 |
+
},
|
51 |
+
},
|
52 |
+
"Computational Fluid Dynamics": {
|
53 |
+
"Nexa CFD": {
|
54 |
+
"Relative L2 Error": 0.015,
|
55 |
+
"Energy Conservation Loss": 0.005,
|
56 |
+
"PSNR": 30,
|
57 |
+
"SSIM": 0.88,
|
58 |
+
"SOTA_Relative L2 Error": 0.020,
|
59 |
+
"SOTA_SSIM": 0.85
|
60 |
+
},
|
61 |
+
},
|
62 |
+
"High-Energy Physics": {
|
63 |
+
"Nexa HEP": {
|
64 |
+
"ROC-AUC": 0.92,
|
65 |
+
"Event Accuracy (%)": 90,
|
66 |
+
"Jet Tagging (%)": 88,
|
67 |
+
"SOTA_ROC-AUC": 0.93,
|
68 |
+
"SOTA_Event Accuracy (%)": 89
|
69 |
+
},
|
70 |
+
},
|
71 |
+
"LLM Hypothesis & Methodology": {
|
72 |
+
"Nexa MOE": {
|
73 |
+
"Coherence (1β10)": 9.1,
|
74 |
+
"Novelty (1β10)": 8.6,
|
75 |
+
"Utility (1β10)": 8.8,
|
76 |
+
"Expert-Rated SOTA (1β10)": 9.0
|
77 |
+
},
|
78 |
+
},
|
79 |
+
}
|
80 |
+
|
81 |
+
# βββ 2. SECTION DESCRIPTIONS βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
82 |
+
section_descriptions = {
|
83 |
+
"Protein Folding": """**Protein Folding**
|
84 |
+
Benchmarks for secondary (Q3/Q8) and tertiary (TM-score) structure prediction.
|
85 |
+
Nexa Bio1 handles sequenceβsecondary, Nexa Bio2 handles full 3D fold confidence.""",
|
86 |
+
"Astrophysics": """**Astrophysics**
|
87 |
+
Stellar classification and redshift estimation.
|
88 |
+
Metrics: Accuracy, F1, ROC-AUC against SDSS-Net and astroML baselines.""",
|
89 |
+
"Materials Science": """**Materials Science**
|
90 |
+
Property prediction for novel materials (e.g., bandgap, formation energy).
|
91 |
+
Metrics: MAE/RMSE, bandgapβprediction accuracy vs. CGCNN, ALIGNN.""",
|
92 |
+
"Quantum State Tomography": """**Quantum State Tomography**
|
93 |
+
Reconstruct quantum states from measurement data.
|
94 |
+
Metrics: Fidelity, Purity, Trace Distance against PINNs and QuNet.""",
|
95 |
+
"Computational Fluid Dynamics": """**CFD**
|
96 |
+
Flow field prediction (NavierβStokes).
|
97 |
+
Metrics: Relative L2 Error, PSNR/SSIM, Energy Conservation Loss vs. FNO.""",
|
98 |
+
"High-Energy Physics": """**High-Energy Physics**
|
99 |
+
Particle classification and signal/background separation.
|
100 |
+
Metrics: ROC-AUC, event reconstruction accuracy, jet-tagging efficiency.""",
|
101 |
+
"LLM Hypothesis & Methodology": """**LLM-Based Scientific Reasoning**
|
102 |
+
Hypothesis and methodology generation.
|
103 |
+
Metrics scored 1β10 by expert rubric on Coherence, Novelty, and Utility; compared to top academic LLM baselines."""
|
104 |
+
}
|
105 |
+
|
106 |
+
# βββ 3. PLOTTING FUNCTION ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
107 |
+
def plot_comparison(category):
|
108 |
+
data = benchmark_data[category]
|
109 |
+
fig, ax = plt.subplots(figsize=(7, 4))
|
110 |
+
bar_width = 0.4
|
111 |
+
indices = list(range(len(data)))
|
112 |
+
labels = list(data.keys())
|
113 |
+
|
114 |
+
# collect metrics that arenβt SOTA
|
115 |
+
for i, model in enumerate(labels):
|
116 |
+
metrics = data[model]
|
117 |
+
# extract non-SOTA metrics
|
118 |
+
non_sota = {k: v for k, v in metrics.items() if not k.startswith("SOTA")}
|
119 |
+
sota = {k.replace("SOTA_", ""): v for k, v in metrics.items() if k.startswith("SOTA")}
|
120 |
+
|
121 |
+
# bar positions
|
122 |
+
pos = i * 2
|
123 |
+
ax.bar([pos + j*bar_width for j in range(len(non_sota))],
|
124 |
+
list(non_sota.values()),
|
125 |
+
width=bar_width, label=f"{model} Metrics")
|
126 |
+
if sota:
|
127 |
+
ax.bar([pos + bar_width*len(non_sota) + j*bar_width for j in range(len(sota))],
|
128 |
+
list(sota.values()),
|
129 |
+
width=bar_width, alpha=0.7, label=f"{model} SOTA")
|
130 |
+
|
131 |
+
# formatting
|
132 |
+
ax.set_xticks([i * (2) + bar_width*(len(non_sota)/2) for i in indices])
|
133 |
+
ax.set_xticklabels(labels, rotation=45, ha='right')
|
134 |
+
ax.set_ylabel("Value / Score")
|
135 |
+
ax.set_title(f"{category} β Nexa vs. SOTA")
|
136 |
+
ax.legend(loc="upper right")
|
137 |
+
plt.tight_layout()
|
138 |
+
return fig
|
139 |
+
|
140 |
+
# βββ 4. CALLBACK TO RENDER SECTION βββββββββββββββββββββββββββββββββββββββββββββ
|
141 |
+
def show_eval(category):
|
142 |
+
desc = section_descriptions[category]
|
143 |
+
df = pd.DataFrame(benchmark_data[category]).T
|
144 |
+
fig = plot_comparison(category)
|
145 |
+
return desc, df, fig
|
146 |
+
|
147 |
+
# βββ 5. BUILD GRADIO APP βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
148 |
+
with gr.Blocks(css="""
|
149 |
+
body { background-color: #f7f9fc; font-family: Arial, sans-serif; }
|
150 |
+
.gradio-container { max-width: 900px; margin: auto; }
|
151 |
+
h1, h2, h3 { color: #333; }
|
152 |
+
""") as app:
|
153 |
+
gr.Markdown("# π¬ Nexa Evals Dashboard")
|
154 |
+
gr.Markdown("A **comprehensive** SciML benchmark framework. Select a domain to view metrics, compare with SOTA, and explore detailed plots and tables.")
|
155 |
+
|
156 |
+
with gr.Row():
|
157 |
+
with gr.Column(scale=1):
|
158 |
+
category = gr.Radio(
|
159 |
+
choices=list(benchmark_data.keys()),
|
160 |
+
value="Protein Folding",
|
161 |
+
label="Select Domain / Model Group"
|
162 |
+
)
|
163 |
+
with gr.Column(scale=3):
|
164 |
+
description = gr.Markdown("")
|
165 |
+
table = gr.Dataframe(headers=["Metric", "Value"], interactive=False)
|
166 |
+
plot = gr.Plot()
|
167 |
+
|
168 |
+
category.change(
|
169 |
+
fn=show_eval,
|
170 |
+
inputs=category,
|
171 |
+
outputs=[description, table, plot]
|
172 |
+
)
|
173 |
+
|
174 |
+
# initialize
|
175 |
+
description.value, table.value, _ = show_eval("Protein Folding")
|
176 |
+
|
177 |
+
# Launch (on Hugging Face the config flags will be auto-managed)
|
178 |
+
app.launch()
|