Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,78 +1,547 @@
|
|
1 |
import gradio as gr
|
2 |
import plotly.graph_objs as go
|
|
|
|
|
|
|
|
|
3 |
import json
|
4 |
|
5 |
-
#
|
6 |
MODEL_EVALS = {
|
7 |
"Proteins": {
|
8 |
-
"
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
},
|
14 |
-
"
|
15 |
-
"
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
},
|
18 |
-
"Materials": {
|
19 |
-
"
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
},
|
22 |
-
"
|
23 |
-
"
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
},
|
26 |
-
"
|
27 |
-
"
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
}
|
35 |
|
36 |
-
def
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
fig = go.Figure()
|
41 |
-
fig.add_trace(go.Bar(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
fig.update_layout(
|
43 |
-
title=f"
|
44 |
xaxis_title="Model",
|
45 |
-
yaxis_title="
|
46 |
yaxis_range=[0, 1.0],
|
47 |
template="plotly_white",
|
48 |
-
height=500
|
|
|
|
|
|
|
49 |
)
|
|
|
|
|
|
|
|
|
50 |
return fig
|
51 |
|
52 |
-
def
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
def
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
#
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
with gr.Row():
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import plotly.graph_objs as go
|
3 |
+
import plotly.express as px
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
from datetime import datetime
|
7 |
import json
|
8 |
|
9 |
+
# Enhanced model evaluation data with comprehensive metrics
|
10 |
MODEL_EVALS = {
|
11 |
"Proteins": {
|
12 |
+
"models": {
|
13 |
+
"AlphaFold2 (Tertiary GDT-TS)": {
|
14 |
+
"score": 0.924,
|
15 |
+
"parameters": "2.3B",
|
16 |
+
"institution": "DeepMind",
|
17 |
+
"date": "2021-07-15",
|
18 |
+
"paper": "https://doi.org/10.1038/s41586-021-03819-2",
|
19 |
+
"task": "Protein Structure Prediction"
|
20 |
+
},
|
21 |
+
"Nexa Bio2 (Tertiary)": {
|
22 |
+
"score": 0.90,
|
23 |
+
"parameters": "1.8B",
|
24 |
+
"institution": "Nexa Research",
|
25 |
+
"date": "2024-11-20",
|
26 |
+
"paper": "https://arxiv.org/abs/2024.protein.nexa",
|
27 |
+
"task": "Protein Structure Prediction"
|
28 |
+
},
|
29 |
+
"DeepCNF (Secondary)": {
|
30 |
+
"score": 0.85,
|
31 |
+
"parameters": "450M",
|
32 |
+
"institution": "University of Missouri",
|
33 |
+
"date": "2019-03-12",
|
34 |
+
"paper": "https://doi.org/10.1186/s12859-019-2940-0",
|
35 |
+
"task": "Secondary Structure Prediction"
|
36 |
+
},
|
37 |
+
"Porter6 (Secondary)": {
|
38 |
+
"score": 0.8456,
|
39 |
+
"parameters": "120M",
|
40 |
+
"institution": "University of Padova",
|
41 |
+
"date": "2022-05-10",
|
42 |
+
"paper": "https://doi.org/10.1038/s41598-022-10847-w",
|
43 |
+
"task": "Secondary Structure Prediction"
|
44 |
+
},
|
45 |
+
"Nexa Bio1 (Secondary)": {
|
46 |
+
"score": 0.71,
|
47 |
+
"parameters": "800M",
|
48 |
+
"institution": "Nexa Research",
|
49 |
+
"date": "2024-09-15",
|
50 |
+
"paper": "https://arxiv.org/abs/2024.bio1.nexa",
|
51 |
+
"task": "Secondary Structure Prediction"
|
52 |
+
}
|
53 |
+
},
|
54 |
+
"metric": "Accuracy",
|
55 |
+
"description": "Protein structure prediction accuracy across secondary and tertiary structure tasks"
|
56 |
},
|
57 |
+
"Astronomy": {
|
58 |
+
"models": {
|
59 |
+
"Nexa Astro": {
|
60 |
+
"score": 0.97,
|
61 |
+
"parameters": "2.1B",
|
62 |
+
"institution": "Nexa Research",
|
63 |
+
"date": "2024-10-05",
|
64 |
+
"paper": "https://arxiv.org/abs/2024.astro.nexa",
|
65 |
+
"task": "Galaxy Classification"
|
66 |
+
},
|
67 |
+
"Baseline CNN": {
|
68 |
+
"score": 0.89,
|
69 |
+
"parameters": "50M",
|
70 |
+
"institution": "Various",
|
71 |
+
"date": "2020-01-01",
|
72 |
+
"paper": "Standard CNN Architecture",
|
73 |
+
"task": "Galaxy Classification"
|
74 |
+
}
|
75 |
+
},
|
76 |
+
"metric": "F1-Score",
|
77 |
+
"description": "Astronomical object classification and analysis performance"
|
78 |
},
|
79 |
+
"Materials Science": {
|
80 |
+
"models": {
|
81 |
+
"Nexa Materials": {
|
82 |
+
"score": 0.9999,
|
83 |
+
"parameters": "1.5B",
|
84 |
+
"institution": "Nexa Research",
|
85 |
+
"date": "2024-12-01",
|
86 |
+
"paper": "https://arxiv.org/abs/2024.materials.nexa",
|
87 |
+
"task": "Property Prediction"
|
88 |
+
},
|
89 |
+
"Random Forest Baseline": {
|
90 |
+
"score": 0.92,
|
91 |
+
"parameters": "N/A",
|
92 |
+
"institution": "Various",
|
93 |
+
"date": "2018-01-01",
|
94 |
+
"paper": "Standard ML Baseline",
|
95 |
+
"task": "Property Prediction"
|
96 |
+
}
|
97 |
+
},
|
98 |
+
"metric": "RΒ² Score",
|
99 |
+
"description": "Materials property prediction and discovery performance"
|
100 |
},
|
101 |
+
"Quantum State Tomography": {
|
102 |
+
"models": {
|
103 |
+
"Quantum TomoNet": {
|
104 |
+
"score": 0.85,
|
105 |
+
"parameters": "890M",
|
106 |
+
"institution": "IBM Research",
|
107 |
+
"date": "2023-04-20",
|
108 |
+
"paper": "https://doi.org/10.1038/s41567-023-02020-x",
|
109 |
+
"task": "State Reconstruction"
|
110 |
+
},
|
111 |
+
"Nexa QST Model": {
|
112 |
+
"score": 0.80,
|
113 |
+
"parameters": "1.2B",
|
114 |
+
"institution": "Nexa Research",
|
115 |
+
"date": "2024-08-30",
|
116 |
+
"paper": "https://arxiv.org/abs/2024.qst.nexa",
|
117 |
+
"task": "State Reconstruction"
|
118 |
+
}
|
119 |
+
},
|
120 |
+
"metric": "Fidelity",
|
121 |
+
"description": "Quantum state reconstruction accuracy and fidelity measures"
|
122 |
},
|
123 |
+
"High Energy Physics": {
|
124 |
+
"models": {
|
125 |
+
"CMSNet": {
|
126 |
+
"score": 0.94,
|
127 |
+
"parameters": "3.2B",
|
128 |
+
"institution": "CERN",
|
129 |
+
"date": "2023-11-15",
|
130 |
+
"paper": "https://doi.org/10.1007/JHEP11(2023)045",
|
131 |
+
"task": "Particle Detection"
|
132 |
+
},
|
133 |
+
"Nexa HEP Model": {
|
134 |
+
"score": 0.91,
|
135 |
+
"parameters": "2.8B",
|
136 |
+
"institution": "Nexa Research",
|
137 |
+
"date": "2024-07-12",
|
138 |
+
"paper": "https://arxiv.org/abs/2024.hep.nexa",
|
139 |
+
"task": "Particle Detection"
|
140 |
+
}
|
141 |
+
},
|
142 |
+
"metric": "AUC-ROC",
|
143 |
+
"description": "High energy physics event detection and classification"
|
144 |
},
|
145 |
+
"Computational Fluid Dynamics": {
|
146 |
+
"models": {
|
147 |
+
"Nexa CFD Model": {
|
148 |
+
"score": 0.92,
|
149 |
+
"parameters": "1.9B",
|
150 |
+
"institution": "Nexa Research",
|
151 |
+
"date": "2024-06-18",
|
152 |
+
"paper": "https://arxiv.org/abs/2024.cfd.nexa",
|
153 |
+
"task": "Flow Prediction"
|
154 |
+
},
|
155 |
+
"FlowNet": {
|
156 |
+
"score": 0.89,
|
157 |
+
"parameters": "1.1B",
|
158 |
+
"institution": "Technical University of Munich",
|
159 |
+
"date": "2022-09-30",
|
160 |
+
"paper": "https://doi.org/10.1016/j.jcp.2022.111567",
|
161 |
+
"task": "Flow Prediction"
|
162 |
+
}
|
163 |
+
},
|
164 |
+
"metric": "RMSE",
|
165 |
+
"description": "Fluid dynamics simulation and prediction accuracy"
|
166 |
+
}
|
167 |
}
|
168 |
|
169 |
+
def create_overall_leaderboard():
|
170 |
+
"""Create overall leaderboard across all domains"""
|
171 |
+
all_models = []
|
172 |
+
for domain, data in MODEL_EVALS.items():
|
173 |
+
for model_name, model_data in data["models"].items():
|
174 |
+
all_models.append({
|
175 |
+
"Model": model_name,
|
176 |
+
"Domain": domain,
|
177 |
+
"Score": model_data["score"],
|
178 |
+
"Parameters": model_data["parameters"],
|
179 |
+
"Institution": model_data["institution"],
|
180 |
+
"Date": model_data["date"],
|
181 |
+
"Paper": model_data["paper"],
|
182 |
+
"Task": model_data["task"]
|
183 |
+
})
|
184 |
+
|
185 |
+
df = pd.DataFrame(all_models)
|
186 |
+
df = df.sort_values('Score', ascending=False)
|
187 |
+
return df
|
188 |
|
189 |
+
def create_domain_plot(domain):
|
190 |
+
"""Create domain-specific bar chart"""
|
191 |
+
if domain not in MODEL_EVALS:
|
192 |
+
return go.Figure()
|
193 |
+
|
194 |
+
models_data = MODEL_EVALS[domain]["models"]
|
195 |
+
models = list(models_data.keys())
|
196 |
+
scores = [models_data[model]["score"] for model in models]
|
197 |
+
|
198 |
+
# Color scheme: Nexa models in brand color, others in neutral
|
199 |
+
colors = ['#6366f1' if 'Nexa' in model else '#64748b' for model in models]
|
200 |
+
|
201 |
fig = go.Figure()
|
202 |
+
fig.add_trace(go.Bar(
|
203 |
+
x=models,
|
204 |
+
y=scores,
|
205 |
+
marker_color=colors,
|
206 |
+
text=[f"{score:.3f}" for score in scores],
|
207 |
+
textposition='auto',
|
208 |
+
hovertemplate='<b>%{x}</b><br>Score: %{y:.3f}<extra></extra>'
|
209 |
+
))
|
210 |
+
|
211 |
fig.update_layout(
|
212 |
+
title=f"{domain} - Model Performance Comparison",
|
213 |
xaxis_title="Model",
|
214 |
+
yaxis_title=f"{MODEL_EVALS[domain]['metric']}",
|
215 |
yaxis_range=[0, 1.0],
|
216 |
template="plotly_white",
|
217 |
+
height=500,
|
218 |
+
font=dict(size=12),
|
219 |
+
title_font_size=16,
|
220 |
+
showlegend=False
|
221 |
)
|
222 |
+
|
223 |
+
# Rotate x-axis labels for better readability
|
224 |
+
fig.update_xaxes(tickangle=45)
|
225 |
+
|
226 |
return fig
|
227 |
|
228 |
+
def create_radar_chart():
|
229 |
+
"""Create radar chart showing Nexa models across domains"""
|
230 |
+
nexa_models = {}
|
231 |
+
categories = []
|
232 |
+
|
233 |
+
for domain, data in MODEL_EVALS.items():
|
234 |
+
for model_name, model_data in data["models"].items():
|
235 |
+
if "Nexa" in model_name:
|
236 |
+
categories.append(domain)
|
237 |
+
nexa_models[domain] = model_data["score"]
|
238 |
+
break
|
239 |
+
|
240 |
+
if not nexa_models:
|
241 |
+
return go.Figure()
|
242 |
+
|
243 |
+
fig = go.Figure()
|
244 |
+
|
245 |
+
fig.add_trace(go.Scatterpolar(
|
246 |
+
r=list(nexa_models.values()),
|
247 |
+
theta=categories,
|
248 |
+
fill='toself',
|
249 |
+
name='Nexa Models',
|
250 |
+
line_color='#6366f1',
|
251 |
+
fillcolor='rgba(99, 102, 241, 0.2)'
|
252 |
+
))
|
253 |
+
|
254 |
+
fig.update_layout(
|
255 |
+
polar=dict(
|
256 |
+
radialaxis=dict(
|
257 |
+
visible=True,
|
258 |
+
range=[0, 1]
|
259 |
+
)),
|
260 |
+
showlegend=True,
|
261 |
+
title="Nexa Models Performance Across Domains",
|
262 |
+
height=500
|
263 |
+
)
|
264 |
+
|
265 |
+
return fig
|
266 |
|
267 |
+
def create_timeline_plot():
|
268 |
+
"""Create timeline showing model releases"""
|
269 |
+
all_models = []
|
270 |
+
for domain, data in MODEL_EVALS.items():
|
271 |
+
for model_name, model_data in data["models"].items():
|
272 |
+
all_models.append({
|
273 |
+
"Model": model_name,
|
274 |
+
"Domain": domain,
|
275 |
+
"Score": model_data["score"],
|
276 |
+
"Date": pd.to_datetime(model_data["date"]),
|
277 |
+
"Institution": model_data["institution"],
|
278 |
+
"IsNexa": "Nexa" in model_name
|
279 |
+
})
|
280 |
+
|
281 |
+
df = pd.DataFrame(all_models)
|
282 |
+
df = df.sort_values('Date')
|
283 |
+
|
284 |
+
fig = px.scatter(
|
285 |
+
df,
|
286 |
+
x='Date',
|
287 |
+
y='Score',
|
288 |
+
color='IsNexa',
|
289 |
+
size='Score',
|
290 |
+
hover_data=['Model', 'Domain', 'Institution'],
|
291 |
+
color_discrete_map={True: '#6366f1', False: '#64748b'},
|
292 |
+
title="Model Performance Timeline"
|
293 |
+
)
|
294 |
+
|
295 |
+
fig.update_layout(
|
296 |
+
height=500,
|
297 |
+
showlegend=True,
|
298 |
+
legend=dict(title="Model Type", labels={"True": "Nexa Models", "False": "Other Models"})
|
299 |
+
)
|
300 |
+
|
301 |
+
return fig
|
302 |
|
303 |
+
def get_domain_details(domain):
|
304 |
+
"""Get detailed information about a domain"""
|
305 |
+
if domain not in MODEL_EVALS:
|
306 |
+
return "Domain not found"
|
307 |
+
|
308 |
+
data = MODEL_EVALS[domain]
|
309 |
+
details = {
|
310 |
+
"domain": domain,
|
311 |
+
"metric": data["metric"],
|
312 |
+
"description": data["description"],
|
313 |
+
"models": data["models"]
|
314 |
+
}
|
315 |
+
return json.dumps(details, indent=2)
|
316 |
|
317 |
+
def format_leaderboard_table(df):
|
318 |
+
"""Format the leaderboard table for display"""
|
319 |
+
# Create clickable links for papers
|
320 |
+
df_display = df.copy()
|
321 |
+
df_display['Paper'] = df_display['Paper'].apply(
|
322 |
+
lambda x: f'<a href="{x}" target="_blank">π Paper</a>' if x.startswith('http') else x
|
323 |
+
)
|
324 |
+
return df_display
|
325 |
|
326 |
+
# Custom CSS for styling
|
327 |
+
custom_css = """
|
328 |
+
<style>
|
329 |
+
.main-header {
|
330 |
+
text-align: center;
|
331 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
332 |
+
color: white;
|
333 |
+
padding: 2rem;
|
334 |
+
border-radius: 10px;
|
335 |
+
margin-bottom: 2rem;
|
336 |
+
}
|
337 |
+
.metric-card {
|
338 |
+
background: #f8fafc;
|
339 |
+
border: 1px solid #e2e8f0;
|
340 |
+
border-radius: 8px;
|
341 |
+
padding: 1rem;
|
342 |
+
margin: 0.5rem;
|
343 |
+
text-align: center;
|
344 |
+
}
|
345 |
+
.metric-value {
|
346 |
+
font-size: 2rem;
|
347 |
+
font-weight: bold;
|
348 |
+
color: #6366f1;
|
349 |
+
}
|
350 |
+
.metric-label {
|
351 |
+
color: #64748b;
|
352 |
+
font-size: 0.9rem;
|
353 |
+
}
|
354 |
+
</style>
|
355 |
+
"""
|
356 |
|
357 |
+
# Create Gradio interface
|
358 |
+
with gr.Blocks(
|
359 |
+
title="π¬ Nexa Evals - Scientific ML Benchmark Leaderboard",
|
360 |
+
theme=gr.themes.Soft(),
|
361 |
+
css=custom_css
|
362 |
+
) as demo:
|
363 |
+
|
364 |
+
# Header
|
365 |
+
gr.HTML("""
|
366 |
+
<div class="main-header">
|
367 |
+
<h1>π¬ Nexa Evals</h1>
|
368 |
+
<h2>Scientific Machine Learning Benchmark Leaderboard</h2>
|
369 |
+
<p>Comprehensive evaluation suite comparing state-of-the-art models across scientific domains</p>
|
370 |
+
</div>
|
371 |
+
""")
|
372 |
+
|
373 |
+
# Metrics overview
|
374 |
+
total_models = sum(len(data["models"]) for data in MODEL_EVALS.values())
|
375 |
+
total_domains = len(MODEL_EVALS)
|
376 |
+
nexa_models = sum(1 for data in MODEL_EVALS.values()
|
377 |
+
for model in data["models"].keys() if "Nexa" in model)
|
378 |
+
|
379 |
with gr.Row():
|
380 |
+
gr.HTML(f"""
|
381 |
+
<div class="metric-card">
|
382 |
+
<div class="metric-value">{total_models}</div>
|
383 |
+
<div class="metric-label">Total Models</div>
|
384 |
+
</div>
|
385 |
+
""")
|
386 |
+
gr.HTML(f"""
|
387 |
+
<div class="metric-card">
|
388 |
+
<div class="metric-value">{total_domains}</div>
|
389 |
+
<div class="metric-label">Scientific Domains</div>
|
390 |
+
</div>
|
391 |
+
""")
|
392 |
+
gr.HTML(f"""
|
393 |
+
<div class="metric-card">
|
394 |
+
<div class="metric-value">{nexa_models}</div>
|
395 |
+
<div class="metric-label">Nexa Models</div>
|
396 |
+
</div>
|
397 |
+
""")
|
398 |
+
|
399 |
+
# Main content tabs
|
400 |
+
with gr.Tabs():
|
401 |
+
|
402 |
+
# Overall Leaderboard Tab
|
403 |
+
with gr.TabItem("π Overall Leaderboard"):
|
404 |
+
gr.Markdown("""
|
405 |
+
### Complete ranking of all models across scientific domains
|
406 |
+
Models are ranked by their performance scores within their respective domains.
|
407 |
+
""")
|
408 |
+
|
409 |
+
overall_df = create_overall_leaderboard()
|
410 |
+
leaderboard_table = gr.Dataframe(
|
411 |
+
value=overall_df,
|
412 |
+
headers=["Model", "Domain", "Score", "Parameters", "Institution", "Date", "Paper", "Task"],
|
413 |
+
datatype=["str", "str", "number", "str", "str", "str", "str", "str"],
|
414 |
+
interactive=False,
|
415 |
+
height=400
|
416 |
+
)
|
417 |
+
|
418 |
+
# Domain Analysis Tab
|
419 |
+
with gr.TabItem("π Domain Analysis"):
|
420 |
+
gr.Markdown("""
|
421 |
+
### Domain-specific model performance analysis
|
422 |
+
Select a domain to view detailed performance metrics and model comparisons.
|
423 |
+
""")
|
424 |
+
|
425 |
+
with gr.Row():
|
426 |
+
domain_dropdown = gr.Dropdown(
|
427 |
+
choices=list(MODEL_EVALS.keys()),
|
428 |
+
value=list(MODEL_EVALS.keys())[0],
|
429 |
+
label="Select Scientific Domain"
|
430 |
+
)
|
431 |
+
|
432 |
+
with gr.Row():
|
433 |
+
domain_plot = gr.Plot(label="Performance Comparison")
|
434 |
+
|
435 |
+
with gr.Row():
|
436 |
+
domain_details = gr.Code(
|
437 |
+
label="Domain Details (JSON)",
|
438 |
+
language="json"
|
439 |
+
)
|
440 |
+
|
441 |
+
domain_dropdown.change(
|
442 |
+
fn=lambda x: [create_domain_plot(x), get_domain_details(x)],
|
443 |
+
inputs=domain_dropdown,
|
444 |
+
outputs=[domain_plot, domain_details]
|
445 |
+
)
|
446 |
+
|
447 |
+
# Initialize with first domain
|
448 |
+
demo.load(
|
449 |
+
fn=lambda: [create_domain_plot(list(MODEL_EVALS.keys())[0]),
|
450 |
+
get_domain_details(list(MODEL_EVALS.keys())[0])],
|
451 |
+
outputs=[domain_plot, domain_details]
|
452 |
+
)
|
453 |
+
|
454 |
+
# Nexa Models Tab
|
455 |
+
with gr.TabItem("π Nexa Models"):
|
456 |
+
gr.Markdown("""
|
457 |
+
### Nexa Research model performance overview
|
458 |
+
Comprehensive analysis of Nexa models across all scientific domains.
|
459 |
+
""")
|
460 |
+
|
461 |
+
with gr.Row():
|
462 |
+
nexa_radar = gr.Plot(
|
463 |
+
value=create_radar_chart(),
|
464 |
+
label="Nexa Models - Cross-Domain Performance"
|
465 |
+
)
|
466 |
+
|
467 |
+
nexa_df = overall_df[overall_df['Model'].str.contains('Nexa', na=False)]
|
468 |
+
nexa_table = gr.Dataframe(
|
469 |
+
value=nexa_df,
|
470 |
+
headers=["Model", "Domain", "Score", "Parameters", "Institution", "Date", "Paper", "Task"],
|
471 |
+
label="Nexa Models Detailed View",
|
472 |
+
height=300
|
473 |
+
)
|
474 |
+
|
475 |
+
# Timeline Tab
|
476 |
+
with gr.TabItem("π Timeline"):
|
477 |
+
gr.Markdown("""
|
478 |
+
### Model development timeline
|
479 |
+
Track the evolution of scientific ML models over time.
|
480 |
+
""")
|
481 |
+
|
482 |
+
timeline_plot = gr.Plot(
|
483 |
+
value=create_timeline_plot(),
|
484 |
+
label="Model Performance Timeline"
|
485 |
+
)
|
486 |
+
|
487 |
+
# About Tab
|
488 |
+
with gr.TabItem("βΉοΈ About"):
|
489 |
+
gr.Markdown("""
|
490 |
+
## About Nexa Evals
|
491 |
+
|
492 |
+
Nexa Evals is a comprehensive benchmarking suite for evaluating machine learning models
|
493 |
+
across diverse scientific domains. Our evaluation framework provides:
|
494 |
+
|
495 |
+
### π― Evaluation Domains
|
496 |
+
- **Proteins**: Structure prediction (secondary/tertiary)
|
497 |
+
- **Astronomy**: Galaxy classification and analysis
|
498 |
+
- **Materials Science**: Property prediction and discovery
|
499 |
+
- **Quantum State Tomography**: State reconstruction
|
500 |
+
- **High Energy Physics**: Particle detection and classification
|
501 |
+
- **Computational Fluid Dynamics**: Flow prediction and simulation
|
502 |
+
|
503 |
+
### π Evaluation Metrics
|
504 |
+
Each domain uses appropriate metrics:
|
505 |
+
- **Accuracy**: Classification tasks
|
506 |
+
- **F1-Score**: Balanced precision/recall evaluation
|
507 |
+
- **RΒ² Score**: Regression performance
|
508 |
+
- **Fidelity**: Quantum state reconstruction accuracy
|
509 |
+
- **AUC-ROC**: Binary classification performance
|
510 |
+
- **RMSE**: Regression error measurement
|
511 |
+
|
512 |
+
### π¬ Scientific Rigor
|
513 |
+
All benchmarks are based on established datasets and evaluation protocols
|
514 |
+
from peer-reviewed research. Model scores are computed using standardized
|
515 |
+
metrics to ensure fair comparison.
|
516 |
+
|
517 |
+
### π Nexa Research
|
518 |
+
Nexa Research is developing next-generation AI models specifically designed
|
519 |
+
for scientific applications. Our models are trained on domain-specific data
|
520 |
+
and optimized for scientific reasoning and discovery.
|
521 |
+
|
522 |
+
### π Citations & References
|
523 |
+
For detailed information about evaluation protocols and datasets, please
|
524 |
+
refer to the linked papers in the model details.
|
525 |
+
|
526 |
+
---
|
527 |
+
|
528 |
+
**Last Updated**: {datetime.now().strftime("%B %d, %Y")}
|
529 |
+
|
530 |
+
**Contact**: [Nexa Research](https://nexaresearch.ai) | [GitHub](https://github.com/nexa-research)
|
531 |
+
""")
|
532 |
+
|
533 |
+
# Footer
|
534 |
+
gr.HTML("""
|
535 |
+
<div style="text-align: center; margin-top: 2rem; padding: 1rem; background: #f8fafc; border-radius: 8px;">
|
536 |
+
<p>π¬ <strong>Nexa Evals</strong> - Advancing Scientific Machine Learning</p>
|
537 |
+
<p>Built with β€οΈ by <a href="https://nexaresearch.ai" target="_blank">Nexa Research</a></p>
|
538 |
+
</div>
|
539 |
+
""")
|
540 |
|
541 |
+
if __name__ == "__main__":
|
542 |
+
demo.launch(
|
543 |
+
share=False,
|
544 |
+
server_name="0.0.0.0",
|
545 |
+
server_port=7860,
|
546 |
+
show_error=True
|
547 |
+
)
|