Allanatrix commited on
Commit
16d37fe
Β·
verified Β·
1 Parent(s): 891a83a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -0
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+
5
+ # ─── 1. BENCHMARK DATA ──────────────────────────────────────────────────────────
6
+ # Nested dict: Domain β†’ { Model Name β†’ {metric_name: value, …, "SOTA_<metric>": value } }
7
+ benchmark_data = {
8
+ "Protein Folding": {
9
+ "Nexa Bio1 (Secondary)": {
10
+ "Accuracy (%)": 71,
11
+ "Q3 (%)": 65,
12
+ "Q8 (%)": 55,
13
+ "TM-score": 0.60,
14
+ "SOTA_Accuracy (%)": 85,
15
+ "SOTA_TM-score": 0.75
16
+ },
17
+ "Nexa Bio2 (Tertiary)": {
18
+ "Confidence (%)": 90,
19
+ "GDT_TS": 0.82,
20
+ "Entropy Threshold (%)": 80,
21
+ "SOTA_Confidence (%)": 92,
22
+ "SOTA_GDT_TS": 0.85
23
+ },
24
+ },
25
+ "Astrophysics": {
26
+ "Nexa Astro": {
27
+ "Accuracy (%)": 97,
28
+ "Macro-F1 (%)": 96,
29
+ "ROC-AUC": 0.98,
30
+ "SOTA_Accuracy (%)": 96,
31
+ "SOTA_ROC-AUC": 0.97
32
+ },
33
+ },
34
+ "Materials Science": {
35
+ "Nexa MatSci": {
36
+ "MAE (eV)": 0.02,
37
+ "RMSE (eV)": 0.03,
38
+ "Bandgap Accuracy (%)": 98,
39
+ "SOTA_MAE (eV)": 0.03,
40
+ "SOTA_Bandgap Accuracy (%)": 95
41
+ },
42
+ },
43
+ "Quantum State Tomography": {
44
+ "Nexa QST": {
45
+ "Fidelity": 0.80,
46
+ "Purity": 1.00,
47
+ "Trace Distance": 0.15,
48
+ "SOTA_Fidelity": 0.83,
49
+ "SOTA_Trace Distance": 0.12
50
+ },
51
+ },
52
+ "Computational Fluid Dynamics": {
53
+ "Nexa CFD": {
54
+ "Relative L2 Error": 0.015,
55
+ "Energy Conservation Loss": 0.005,
56
+ "PSNR": 30,
57
+ "SSIM": 0.88,
58
+ "SOTA_Relative L2 Error": 0.020,
59
+ "SOTA_SSIM": 0.85
60
+ },
61
+ },
62
+ "High-Energy Physics": {
63
+ "Nexa HEP": {
64
+ "ROC-AUC": 0.92,
65
+ "Event Accuracy (%)": 90,
66
+ "Jet Tagging (%)": 88,
67
+ "SOTA_ROC-AUC": 0.93,
68
+ "SOTA_Event Accuracy (%)": 89
69
+ },
70
+ },
71
+ "LLM Hypothesis & Methodology": {
72
+ "Nexa MOE": {
73
+ "Coherence (1–10)": 9.1,
74
+ "Novelty (1–10)": 8.6,
75
+ "Utility (1–10)": 8.8,
76
+ "Expert-Rated SOTA (1–10)": 9.0
77
+ },
78
+ },
79
+ }
80
+
81
+ # ─── 2. SECTION DESCRIPTIONS ───────────────────────────────────────────────────
82
+ section_descriptions = {
83
+ "Protein Folding": """**Protein Folding**
84
+ Benchmarks for secondary (Q3/Q8) and tertiary (TM-score) structure prediction.
85
+ Nexa Bio1 handles sequence→secondary, Nexa Bio2 handles full 3D fold confidence.""",
86
+ "Astrophysics": """**Astrophysics**
87
+ Stellar classification and redshift estimation.
88
+ Metrics: Accuracy, F1, ROC-AUC against SDSS-Net and astroML baselines.""",
89
+ "Materials Science": """**Materials Science**
90
+ Property prediction for novel materials (e.g., bandgap, formation energy).
91
+ Metrics: MAE/RMSE, bandgap‐prediction accuracy vs. CGCNN, ALIGNN.""",
92
+ "Quantum State Tomography": """**Quantum State Tomography**
93
+ Reconstruct quantum states from measurement data.
94
+ Metrics: Fidelity, Purity, Trace Distance against PINNs and QuNet.""",
95
+ "Computational Fluid Dynamics": """**CFD**
96
+ Flow field prediction (Navier–Stokes).
97
+ Metrics: Relative L2 Error, PSNR/SSIM, Energy Conservation Loss vs. FNO.""",
98
+ "High-Energy Physics": """**High-Energy Physics**
99
+ Particle classification and signal/background separation.
100
+ Metrics: ROC-AUC, event reconstruction accuracy, jet-tagging efficiency.""",
101
+ "LLM Hypothesis & Methodology": """**LLM-Based Scientific Reasoning**
102
+ Hypothesis and methodology generation.
103
+ Metrics scored 1–10 by expert rubric on Coherence, Novelty, and Utility; compared to top academic LLM baselines."""
104
+ }
105
+
106
+ # ─── 3. PLOTTING FUNCTION ────────────────────────────────────────────────────────
107
+ def plot_comparison(category):
108
+ data = benchmark_data[category]
109
+ fig, ax = plt.subplots(figsize=(7, 4))
110
+ bar_width = 0.4
111
+ indices = list(range(len(data)))
112
+ labels = list(data.keys())
113
+
114
+ # collect metrics that aren’t SOTA
115
+ for i, model in enumerate(labels):
116
+ metrics = data[model]
117
+ # extract non-SOTA metrics
118
+ non_sota = {k: v for k, v in metrics.items() if not k.startswith("SOTA")}
119
+ sota = {k.replace("SOTA_", ""): v for k, v in metrics.items() if k.startswith("SOTA")}
120
+
121
+ # bar positions
122
+ pos = i * 2
123
+ ax.bar([pos + j*bar_width for j in range(len(non_sota))],
124
+ list(non_sota.values()),
125
+ width=bar_width, label=f"{model} Metrics")
126
+ if sota:
127
+ ax.bar([pos + bar_width*len(non_sota) + j*bar_width for j in range(len(sota))],
128
+ list(sota.values()),
129
+ width=bar_width, alpha=0.7, label=f"{model} SOTA")
130
+
131
+ # formatting
132
+ ax.set_xticks([i * (2) + bar_width*(len(non_sota)/2) for i in indices])
133
+ ax.set_xticklabels(labels, rotation=45, ha='right')
134
+ ax.set_ylabel("Value / Score")
135
+ ax.set_title(f"{category} β€” Nexa vs. SOTA")
136
+ ax.legend(loc="upper right")
137
+ plt.tight_layout()
138
+ return fig
139
+
140
+ # ─── 4. CALLBACK TO RENDER SECTION ─────────────────────────────────────────────
141
+ def show_eval(category):
142
+ desc = section_descriptions[category]
143
+ df = pd.DataFrame(benchmark_data[category]).T
144
+ fig = plot_comparison(category)
145
+ return desc, df, fig
146
+
147
+ # ─── 5. BUILD GRADIO APP ───────────────────────────────────────────────────────
148
+ with gr.Blocks(css="""
149
+ body { background-color: #f7f9fc; font-family: Arial, sans-serif; }
150
+ .gradio-container { max-width: 900px; margin: auto; }
151
+ h1, h2, h3 { color: #333; }
152
+ """) as app:
153
+ gr.Markdown("# πŸ”¬ Nexa Evals Dashboard")
154
+ gr.Markdown("A **comprehensive** SciML benchmark framework. Select a domain to view metrics, compare with SOTA, and explore detailed plots and tables.")
155
+
156
+ with gr.Row():
157
+ with gr.Column(scale=1):
158
+ category = gr.Radio(
159
+ choices=list(benchmark_data.keys()),
160
+ value="Protein Folding",
161
+ label="Select Domain / Model Group"
162
+ )
163
+ with gr.Column(scale=3):
164
+ description = gr.Markdown("")
165
+ table = gr.Dataframe(headers=["Metric", "Value"], interactive=False)
166
+ plot = gr.Plot()
167
+
168
+ category.change(
169
+ fn=show_eval,
170
+ inputs=category,
171
+ outputs=[description, table, plot]
172
+ )
173
+
174
+ # initialize
175
+ description.value, table.value, _ = show_eval("Protein Folding")
176
+
177
+ # Launch (on Hugging Face the config flags will be auto-managed)
178
+ app.launch()