Jerrycool commited on
Commit
ffb569a
·
verified ·
1 Parent(s): a84f158

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -830
app.py CHANGED
@@ -1,10 +1,8 @@
1
  import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
- from datetime import datetime
5
 
6
- # --- Make sure these imports work relative to your file structure ---
7
- # Option 1: If src is a directory in the same folder as your script:
8
  try:
9
  from src.about import (
10
  CITATION_BUTTON_LABEL,
@@ -12,903 +10,211 @@ try:
12
  EVALUATION_QUEUE_TEXT,
13
  INTRODUCTION_TEXT,
14
  LLM_BENCHMARKS_TEXT,
15
- TITLE,
16
  )
17
  from src.display.css_html_js import custom_css
18
  from src.envs import REPO_ID
19
  from src.submission.submit import add_new_eval
20
- print("Successfully imported from src module.")
21
- # Option 2: If you don't have these files, define placeholders
22
  except ImportError:
23
- print("Warning: Using placeholder values because src module imports failed.")
24
  CITATION_BUTTON_LABEL = "Citation"
25
  CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
26
  EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
27
- INTRODUCTION_TEXT = """
28
- # Welcome to the MLE-Dojo Benchmark Leaderboard
29
-
30
- This leaderboard tracks the performance of various AI models across multiple machine learning engineering domains.
31
- Our comprehensive evaluation system uses ELO ratings to provide a fair comparison between different models.
32
-
33
- ## How to read this leaderboard
34
- - Select a domain category to view specialized rankings
35
- - Higher ELO scores indicate better performance
36
- - Click on any model name to learn more about it
37
- """
38
- LLM_BENCHMARKS_TEXT = """
39
- # About the MLE-Dojo Benchmark
40
-
41
- ## Evaluation Methodology
42
- The MLE-Dojo benchmark evaluates models across various domains including:
43
-
44
- - **MLE-Lite**: Basic machine learning engineering tasks
45
- - **Tabular**: Data manipulation, analysis, and modeling with structured data
46
- - **NLP**: Natural language processing tasks including classification, generation, and understanding
47
- - **CV**: Computer vision tasks including image classification, object detection, and generation
48
-
49
- Our evaluation uses a sophisticated ELO rating system that considers the relative performance of models against each other.
50
-
51
- ## Contact
52
- For more information or to submit your model, please contact us at [email protected]
53
- """
54
- TITLE = "<h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>"
55
  custom_css = ""
56
  REPO_ID = "your/space-id"
57
  def add_new_eval(*args): return "Submission placeholder."
58
 
59
- # --- Elo Leaderboard Configuration ---
60
- # Enhanced data with Rank (placeholder), Organizer, License, and URL
61
  data = [
62
- {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
63
- {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
64
- {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
65
- {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
66
- {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
67
- {'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
68
- {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
69
- {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
70
  ]
71
-
72
- # Add organization logos (for visual enhancement)
73
- org_logos = {
74
- 'OpenAI': '📱', # You can replace these with actual icon URLs in production
75
- 'DeepSeek': '🔍',
76
- 'Google': '🌐',
77
- 'Default': '🤖'
78
- }
79
-
80
- # Create a master DataFrame
81
  master_df = pd.DataFrame(data)
82
 
83
- # Add last updated timestamp
84
- last_updated = datetime.now().strftime("%B %d, %Y at %H:%M:%S")
85
-
86
- # Define categories with fancy icons
87
- CATEGORIES = [
88
- ("🏆 Overall", "Overall"),
89
- ("💡 MLE-Lite", "MLE-Lite"),
90
- ("📊 Tabular", "Tabular"),
91
- ("📝 NLP", "NLP"),
92
- ("👁️ CV", "CV")
93
- ]
94
  DEFAULT_CATEGORY = "Overall"
95
-
96
- # Map user-facing categories to DataFrame column names
97
  category_to_column = {
 
98
  "MLE-Lite": "MLE-Lite_Elo",
99
  "Tabular": "Tabular_Elo",
100
  "NLP": "NLP_Elo",
101
  "CV": "CV_Elo",
102
- "Overall": "Overall"
103
  }
104
 
105
- # --- Helper function to update leaderboard ---
106
- def update_leaderboard(category_label):
107
- """
108
- Enhanced function to update the leaderboard with visual improvements
109
- """
110
- # Extract the category value from the label if it's a tuple (icon, value)
111
- if isinstance(category_label, tuple):
112
- category = category_label[1]
113
- else:
114
- # For backward compatibility or direct values
115
- category = category_label.split(" ")[-1] if " " in category_label else category_label
116
-
117
- score_column = category_to_column.get(category)
118
- if score_column is None or score_column not in master_df.columns:
119
- print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
120
- score_column = category_to_column[DEFAULT_CATEGORY]
121
- if score_column not in master_df.columns:
122
- print(f"Error: Default column '{score_column}' also not found.")
123
- return pd.DataFrame({
124
- "Rank": [],
125
- "Model": [],
126
- "Organizer": [],
127
- "License": [],
128
- "Elo Score": []
129
- })
130
-
131
- # Select base columns + the score column for sorting
132
- cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
133
- df = master_df[cols_to_select].copy()
134
-
135
- # Sort by the selected 'Elo Score' descending
136
- df.sort_values(by=score_column, ascending=False, inplace=True)
137
-
138
- # Add Rank with just numbers (no medals)
139
  df.reset_index(drop=True, inplace=True)
140
- df.insert(0, 'Rank', df.index.map(lambda idx: f"{idx + 1}"))
141
-
142
- # Add organization icons to model names
143
  df['Model'] = df.apply(
144
- lambda row: f"""<div style="display: flex; align-items: center;">
145
- <span style="font-size: 1.5em; margin-right: 10px;">{org_logos.get(row['organizer'], org_logos['Default'])}</span>
146
- <a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank'
147
- style='color: #0066cc; text-decoration: none; font-weight: 500; font-size: 1.05em;'>
148
- {row['model_name']}
149
- </a>
150
- </div>""",
151
  axis=1
152
  )
 
 
153
 
154
- # Format Elo scores with visual indicators
155
- df['Elo Display'] = df[score_column].apply(
156
- lambda score: f"""<div style="display: flex; align-items: center;">
157
- <span style="font-weight: bold; color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'}">
158
- {score}
159
- </span>
160
- <div style="margin-left: 10px; height: 12px; width: 60px; background-color: #eaeaea; border-radius: 6px; overflow: hidden;">
161
- <div style="height: 100%; width: {min(100, max(5, (score-700)/7))}%; background-color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'};"></div>
162
- </div>
163
- </div>"""
164
- )
165
-
166
- # Rename columns for display
167
- df.rename(columns={score_column: 'Elo Score'}, inplace=True)
168
- df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
169
-
170
- # Select and reorder columns for final display
171
- final_columns = ["Rank", "Model", "Organizer", "License", "Elo Display"]
172
- df = df[final_columns]
173
-
174
- # Rename for display
175
- df.columns = ["Rank", "Model", "Organization", "License", f"Elo Score ({category})"]
176
-
177
- return df
178
 
179
- # --- Mock/Placeholder functions/data for other tabs ---
180
- print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
181
- finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
182
- running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
183
- pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
184
- EVAL_COLS = ["Model", "Status", "Requested", "Started"]
185
- EVAL_TYPES = ["str", "str", "str", "str"]
186
-
187
- # --- Keep restart function if relevant ---
188
- def restart_space():
189
- print(f"Attempting to restart space: {REPO_ID}")
190
- # Replace with your actual space restart mechanism if needed
191
-
192
- # --- Enhanced CSS for beauty and readability ---
193
- # FIXED CSS with better contrast, improved scrolling for tables, and other fixes
194
- enhanced_css = """
195
- /* Base styling */
196
- :root {
197
- --primary-color: #1a5fb4;
198
- --secondary-color: #2ec27e;
199
- --accent-color: #e5a50a;
200
- --warning-color: #ff7800;
201
- --text-color: #333333;
202
- --background-color: #e9edf1; /* Lightened background */
203
- --card-background: #ffffff;
204
- --border-color: #c0c9d6; /* Darkened border */
205
- --shadow-color: rgba(0, 0, 0, 0.12); /* Increased shadow opacity */
206
  }
207
 
208
- /* Typography */
209
- body, .gradio-container {
210
- font-family: 'Inter', 'Segoe UI', Roboto, -apple-system, BlinkMacSystemFont, system-ui, sans-serif !important;
211
- font-size: 16px !important;
212
- line-height: 1.6 !important;
213
- color: var(--text-color) !important;
214
- background-color: var(--background-color) !important;
 
 
215
  }
216
-
217
- /* Headings */
218
- h1 {
219
  font-size: 2.5rem !important;
220
  font-weight: 700 !important;
221
- margin-bottom: 1.5rem !important;
222
- color: var(--primary-color) !important;
223
- text-align: center !important;
224
- letter-spacing: -0.02em !important;
225
- line-height: 1.2 !important;
226
  }
227
-
228
- h2 {
229
- font-size: 1.8rem !important;
230
- font-weight: 600 !important;
231
- margin-top: 1.5rem !important;
232
- margin-bottom: 1rem !important;
233
- color: var(--primary-color) !important;
234
- letter-spacing: -0.01em !important;
235
- }
236
-
237
- h3 {
238
- font-size: 1.4rem !important;
239
- font-weight: 600 !important;
240
- margin-top: 1.2rem !important;
241
- margin-bottom: 0.8rem !important;
242
- color: var(--text-color) !important;
243
- }
244
-
245
- /* Tabs styling */
246
- .tabs {
247
- margin-top: 1rem !important;
248
- border-radius: 12px !important;
249
- overflow: hidden !important;
250
- box-shadow: 0 4px 12px var(--shadow-color) !important;
251
- background-color: var(--card-background);
252
- }
253
-
254
- .tab-nav button {
255
- font-size: 1.1rem !important;
256
- font-weight: 500 !important;
257
- padding: 0.8rem 1.5rem !important;
258
- border-radius: 0 !important;
259
- transition: all 0.2s ease !important;
260
- border-bottom: 2px solid transparent !important;
261
- background-color: transparent !important;
262
- color: var(--text-color) !important;
263
- }
264
-
265
- .tab-nav button.selected {
266
- background-color: transparent !important;
267
- color: var(--primary-color) !important;
268
- font-weight: 600 !important;
269
- border-bottom: 2px solid var(--primary-color) !important;
270
- }
271
-
272
- /* Card styling */
273
- .gradio-container .gr-box, .gradio-container .gr-panel {
274
- border-radius: 12px !important;
275
- border: 1px solid var(--border-color) !important;
276
- box-shadow: 0 4px 12px var(--shadow-color) !important;
277
- overflow: hidden !important;
278
- background-color: var(--card-background) !important;
279
- }
280
-
281
- /* Table styling - FIXING SCROLLING ISSUES */
282
- table {
283
- width: 100% !important;
284
- border-collapse: separate !important;
285
- border-spacing: 0 !important;
286
- margin: 1.5rem 0 !important;
287
- border-radius: 8px !important;
288
- overflow: visible !important; /* Changed from hidden to visible */
289
- box-shadow: 0 4px 12px var(--shadow-color) !important;
290
- background-color: var(--card-background);
291
  }
292
 
293
- /* Data table container - ensure scrolling works */
294
- .gr-table-container {
295
- overflow: auto !important;
296
- max-height: 600px !important; /* Add max height to ensure scrolling */
297
- margin-bottom: 20px !important;
298
- }
299
-
300
- th {
301
- background-color: #e0ebff !important; /* Darker header background */
302
- color: var(--primary-color) !important;
303
- font-weight: 600 !important;
304
- padding: 1rem !important;
305
- font-size: 1.1rem !important;
306
- text-align: left !important;
307
- border-bottom: 2px solid var(--primary-color) !important;
308
- position: sticky !important; /* Keep headers visible when scrolling */
309
- top: 0 !important;
310
- z-index: 10 !important;
311
- }
312
-
313
- td {
314
- padding: 1rem !important;
315
- border-bottom: 1px solid var(--border-color) !important;
316
- font-size: 1rem !important;
317
- vertical-align: middle !important;
318
- background-color: var(--card-background);
319
- }
320
-
321
- tr:last-child td {
322
- border-bottom: none !important;
323
- }
324
-
325
- tr:nth-child(even) td {
326
- background-color: #f0f5ff !important; /* Increased contrast for even rows */
327
- }
328
-
329
- tr:hover td {
330
- background-color: #e0ebff !important; /* Darker hover color */
331
- }
332
-
333
- /* Button styling */
334
- button.primary, .gr-button.primary {
335
- background-color: var(--primary-color) !important;
336
- color: white !important;
337
- font-weight: 500 !important;
338
- padding: 0.8rem 1.5rem !important;
339
- border-radius: 8px !important;
340
  border: none !important;
341
- cursor: pointer !important;
342
- transition: all 0.2s ease !important;
343
- box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1) !important;
344
- }
345
-
346
- button.primary:hover, .gr-button.primary:hover {
347
- background-color: #0b4a9e !important;
348
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important;
349
- transform: translateY(-1px) !important;
350
- }
351
-
352
- /* Radio buttons */
353
- .gr-radio {
354
- display: flex !important;
355
- flex-wrap: wrap !important;
356
- gap: 10px !important;
357
- margin: 1rem 0 !important;
358
- }
359
-
360
- .gr-radio label {
361
- background-color: #f0f5ff !important; /* Darker radio button background */
362
- border: 1px solid var(--border-color) !important;
363
- border-radius: 8px !important;
364
- padding: 0.7rem 1.2rem !important;
365
- font-size: 1rem !important;
366
  font-weight: 500 !important;
367
- cursor: pointer !important;
368
- transition: all 0.2s ease !important;
369
- display: flex !important;
370
- align-items: center !important;
371
- gap: 8px !important;
372
- color: var(--text-color) !important;
373
- }
374
-
375
- .gr-radio label:hover {
376
- background-color: #e0e9f7 !important;
377
- border-color: #a0b0c0 !important; /* Darker border on hover */
378
- }
379
-
380
- .gr-radio label.selected {
381
- background-color: #d0dfff !important; /* Darker selected background */
382
- border-color: var(--primary-color) !important;
383
- color: var(--primary-color) !important;
384
- font-weight: 600 !important;
385
- }
386
-
387
- /* Input fields */
388
- input, textarea, select {
389
- font-size: 1rem !important;
390
- padding: 0.8rem !important;
391
- border-radius: 8px !important;
392
- border: 1px solid var(--border-color) !important;
393
- transition: all 0.2s ease !important;
394
- background-color: #ffffff !important;
395
- color: var(--text-color) !important;
396
- }
397
-
398
- input:focus, textarea:focus, select:focus {
399
- border-color: var(--primary-color) !important;
400
- box-shadow: 0 0 0 2px rgba(26, 95, 180, 0.2) !important;
401
- outline: none !important;
402
  }
403
-
404
- /* Accordion styling */
405
- .gr-accordion {
406
- border-radius: 8px !important;
407
- overflow: hidden !important;
408
- margin: 1rem 0 !important;
409
- border: 1px solid var(--border-color) !important;
410
- background-color: var(--card-background);
411
  }
412
-
413
- .gr-accordion-header {
414
- padding: 1rem !important;
415
- background-color: #f0f5ff !important; /* Darker accordion header */
416
- font-weight: 600 !important;
417
- font-size: 1.1rem !important;
418
- color: var(--text-color) !important;
419
- border-bottom: 1px solid var(--border-color) !important;
420
  }
421
 
422
- .gr-accordion-content {
423
- padding: 1rem !important;
424
- background-color: var(--card-background) !important;
425
- }
426
-
427
- /* Markdown text improvements */
428
- .markdown-text {
429
- font-size: 1.05rem !important;
430
- line-height: 1.7 !important;
431
- color: var(--text-color) !important;
432
- }
433
-
434
- .markdown-text p {
435
- margin-bottom: 1rem !important;
436
- }
437
-
438
- .markdown-text ul, .markdown-text ol {
439
- margin-left: 1.5rem !important;
440
- margin-bottom: 1rem !important;
441
- }
442
-
443
- .markdown-text li {
444
- margin-bottom: 0.5rem !important;
445
- }
446
-
447
- .markdown-text strong {
448
- font-weight: 600 !important;
449
- color: #111 !important;
450
- }
451
-
452
- /* Status indicators */
453
- .status-badge {
454
  display: inline-block;
455
- padding: 0.3rem 0.7rem;
456
- border-radius: 99px;
457
- font-size: 0.85rem;
458
- font-weight: 500;
459
- text-align: center;
460
- }
461
-
462
- .status-pending {
463
- background-color: #fff3cc;
464
- color: #b58a00;
465
- border: 1px solid #ffd74d;
466
- }
467
-
468
- .status-running {
469
- background-color: #ccebff;
470
- color: #0066cc;
471
- border: 1px solid #66b3ff;
472
- }
473
-
474
- .status-completed {
475
- background-color: #d6f5e6;
476
- color: #00875a;
477
- border: 1px solid #57d9a3;
478
- }
479
-
480
- /* Footer */
481
- .footer {
482
- margin-top: 2rem;
483
- padding: 1.5rem 1rem;
484
- text-align: center;
485
- font-size: 0.9rem;
486
- color: #333;
487
- border-top: 1px solid var(--border-color);
488
- background-color: #d9e0e8; /* Darker footer background */
489
- }
490
-
491
- /* Enhanced leaderboard title area */
492
- .leaderboard-header {
493
- display: flex;
494
- align-items: center;
495
- justify-content: space-between;
496
- margin-bottom: 1.5rem;
497
- padding: 1.5rem;
498
- background-color: var(--card-background);
499
- border-radius: 12px;
500
- border: 1px solid var(--border-color);
501
- box-shadow: 0 4px 12px var(--shadow-color);
502
- }
503
-
504
- .leaderboard-title {
505
- font-size: 2.2rem;
506
- font-weight: 700;
507
- color: var(--primary-color);
508
- margin: 0;
509
- display: flex;
510
- align-items: center;
511
- gap: 0.5rem;
512
- }
513
-
514
- .leaderboard-subtitle {
515
- font-size: 1.1rem;
516
- color: #444; /* Darker subtitle text */
517
- margin-top: 0.5rem;
518
- }
519
-
520
- .timestamp {
521
- font-size: 0.85rem;
522
- color: #444; /* Darker timestamp text */
523
- font-style: italic;
524
- background-color: #f0f5ff; /* Darker timestamp background */
525
- padding: 5px 10px;
526
- border-radius: 6px;
527
- }
528
-
529
- /* Category selector buttons */
530
- .category-buttons {
531
- display: flex;
532
- flex-wrap: wrap;
533
- gap: 10px;
534
- margin-bottom: 1.5rem;
535
- }
536
-
537
- .category-button {
538
- padding: 0.7rem 1.2rem;
539
- background-color: #e0ebff; /* Darker button background */
540
- border: 1px solid #b0d0ff;
541
- border-radius: 8px;
542
- font-weight: 500;
543
- cursor: pointer;
544
- transition: all 0.2s ease;
545
- display: flex;
546
- align-items: center;
547
- gap: 8px;
548
- }
549
-
550
- .category-button:hover {
551
- background-color: #c0d0ff; /* Darker hover state */
552
- border-color: #80a0ff;
553
- }
554
-
555
- .category-button.active {
556
- background-color: var(--primary-color);
557
- color: white;
558
- border-color: var(--primary-color);
559
- }
560
-
561
- /* Logo and brand styling */
562
- .logo {
563
- font-size: 2.5em;
564
  margin-right: 0.5rem;
 
 
 
 
 
 
565
  }
566
-
567
- /* Properly display sorting arrows */
568
- table th.sort-asc::after {
569
- content: " ↑";
570
- color: var(--primary-color);
571
- }
572
-
573
- table th.sort-desc::after {
574
- content: " ↓";
575
- color: var(--primary-color);
576
  }
577
 
578
- /* Style for About section cards */
579
- .about-card {
580
- background-color: #f0f5ff; /* Darker card background */
581
- padding: 20px;
582
- border-radius: 12px;
583
- height: 100%;
584
- border: 1px solid var(--border-color);
 
585
  }
586
- .about-card h3 {
587
- text-align: center;
588
- margin-top: 0;
589
- color: var(--primary-color);
590
  }
591
- .about-card p {
592
- color: var(--text-color);
593
- font-size: 0.95rem;
594
- line-height: 1.6;
595
  }
596
- .about-card-icon {
597
- font-size: 2.5em;
598
- text-align: center;
599
- margin-bottom: 15px;
600
- display: block;
601
  }
602
-
603
- /* Ensure the table container has a fixed height and scrolls properly */
604
- #leaderboard-table {
605
- overflow: auto !important;
606
- max-height: 500px !important;
607
  }
608
-
609
- /* Fix for dataframe component scrolling */
610
- .gradio-dataframe {
611
- overflow: auto !important;
612
- max-height: 500px !important;
613
  }
614
-
615
- /* Fix sorting issues */
616
- .sort-column {
617
- cursor: pointer;
618
  }
619
  """
620
 
621
- # Combine with any existing CSS
622
- custom_css = enhanced_css
 
 
 
 
 
623
 
624
- # --- Gradio App Definition ---
625
- demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
626
 
627
  with demo:
628
- # Enhanced header with timestamp
629
- gr.HTML(f"""
630
- <div class="leaderboard-header">
631
- <div>
632
- <div class="leaderboard-title">
633
- <span class="logo">🏆</span> MLE-Dojo Benchmark Leaderboard
634
- </div>
635
- <div class="leaderboard-subtitle">
636
- Comprehensive evaluation of AI models across multiple domains
637
- </div>
638
- </div>
639
- <div class="timestamp">
640
- Last updated: {last_updated}
641
- </div>
642
- </div>
643
- """)
644
-
645
- # Introduction with enhanced styling
646
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
647
 
648
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
649
- with gr.TabItem("📊 Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
650
- with gr.Column():
651
- gr.HTML("""
652
- <h2 style="display: flex; align-items: center; gap: 10px;">
653
- <span style="font-size: 1.3em;">📈</span> Model Performance Rankings
654
- </h2>
655
- <p class="leaderboard-subtitle">Select a category to view specialized performance metrics</p>
656
- """)
657
-
658
- # Enhanced category selector
659
- category_selector = gr.Radio(
660
- choices=[x[0] for x in CATEGORIES],
661
- label="Select Performance Domain:",
662
- value="🏆 Overall",
663
- interactive=True,
664
- elem_classes="fancy-radio"
665
- )
666
-
667
- # Visual separator
668
- gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
669
-
670
- # Enhanced leaderboard table
671
- leaderboard_df_component = gr.Dataframe(
672
- value=update_leaderboard(DEFAULT_CATEGORY),
673
- headers=["Rank", "Model", "Organization", "License", f"Elo Score ({DEFAULT_CATEGORY})"],
674
- datatype=["html", "html", "str", "str", "html"],
675
- interactive=False,
676
- row_count=(len(master_df), "fixed"),
677
- col_count=(5, "fixed"),
678
- wrap=True,
679
- elem_id="leaderboard-table",
680
- )
681
-
682
- # Stats cards (visual enhancement)
683
- with gr.Row():
684
- with gr.Column(scale=1):
685
- gr.HTML(f"""
686
- <div style="background-color: #f0f5ff; padding: 20px; border-radius: 12px; text-align: center;">
687
- <div style="font-size: 2em;">🔍</div>
688
- <div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
689
- <div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
690
- </div>
691
- """)
692
- with gr.Column(scale=1):
693
- gr.HTML(f"""
694
- <div style="background-color: #e6f7ef; padding: 20px; border-radius: 12px; text-align: center;">
695
- <div style="font-size: 2em;">🌐</div>
696
- <div style="font-size: 2em; font-weight: bold; color: #00875a;">{master_df['organizer'].nunique()}</div>
697
- <div style="font-size: 1.1em; color: #666;">Organizations</div>
698
- </div>
699
- """)
700
- with gr.Column(scale=1):
701
- gr.HTML(f"""
702
- <div style="background-color: #fff8e0; padding: 20px; border-radius: 12px; text-align: center;">
703
- <div style="font-size: 2em;">🏅</div>
704
- <div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)}</div>
705
- <div style="font-size: 1.1em; color: #666;">Performance Domains</div>
706
- </div>
707
- """)
708
-
709
- # Link the radio button change to the update function
710
- category_selector.change(
711
- fn=update_leaderboard,
712
- inputs=category_selector,
713
- outputs=leaderboard_df_component
714
- )
715
-
716
- with gr.TabItem("📚 About", elem_id="llm-benchmark-tab-about", id=1):
717
- # Enhanced about section
718
- gr.HTML("""
719
- <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
720
- <div style="font-size: 4em;">🧪</div>
721
- <div>
722
- <h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
723
- <p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
724
- </div>
725
- </div>
726
- """)
727
-
728
- # Use the LLM_BENCHMARKS_TEXT variable
729
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
730
-
731
- # Add methodology cards for visual enhancement
732
- with gr.Row():
733
- with gr.Column():
734
- gr.HTML("""
735
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
736
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">💡</div>
737
- <h3 style="text-align: center; margin-top: 0;">MLE-Lite</h3>
738
- <p>Evaluates a model's ability to handle basic machine learning engineering tasks including
739
- data preprocessing, feature engineering, model selection, and basic deployment.</p>
740
- </div>
741
- """)
742
- with gr.Column():
743
- gr.HTML("""
744
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
745
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">📊</div>
746
- <h3 style="text-align: center; margin-top: 0;">Tabular</h3>
747
- <p>Tests a model's ability to process, analyze and model structured data, including
748
- statistical analysis,statistical analysis, predictive modeling, and data visualization with tabular datasets.</p>
749
- </div>
750
- """)
751
-
752
- with gr.Row():
753
- with gr.Column():
754
- gr.HTML("""
755
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
756
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">📝</div>
757
- <h3 style="text-align: center; margin-top: 0;">NLP</h3>
758
- <p>Evaluates natural language processing capabilities including text classification,
759
- sentiment analysis, entity recognition, text generation, and language understanding.</p>
760
- </div>
761
- """)
762
- with gr.Column():
763
- gr.HTML("""
764
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
765
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">👁️</div>
766
- <h3 style="text-align: center; margin-top: 0;">CV</h3>
767
- <p>Tests computer vision capabilities including image classification, object detection,
768
- image generation, and visual understanding tasks across various domains.</p>
769
- </div>
770
- """)
771
-
772
- # Optional: Uncomment if you want to re-enable the Submit tab
773
- # with gr.TabItem("🚀 Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
774
- # with gr.Column():
775
- # gr.HTML("""
776
- # <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
777
- # <div style="font-size: 4em;">🚀</div>
778
- # <div>
779
- # <h2 style="margin: 0;">Submit Your Model for Evaluation</h2>
780
- # <p style="margin: 5px 0 0 0; color: #666;">Add your model to the MLE-Dojo leaderboard</p>
781
- # </div>
782
- # </div>
783
- # """)
784
- #
785
- # with gr.Row():
786
- # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
787
- #
788
- # with gr.Column():
789
- # with gr.Accordion(f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
790
- # finished_eval_table = gr.components.Dataframe(
791
- # value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
792
- # )
793
- # with gr.Accordion(f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
794
- # running_eval_table = gr.components.Dataframe(
795
- # value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
796
- # )
797
- # with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
798
- # pending_eval_table = gr.components.Dataframe(
799
- # value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
800
- # )
801
- #
802
- # gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
803
- #
804
- # gr.HTML("""
805
- # <h2 style="display: flex; align-items: center; gap: 10px;">
806
- # <span style="font-size: 1.3em;">📝</span> Model Submission Form
807
- # </h2>
808
- # """)
809
- #
810
- # with gr.Row():
811
- # with gr.Column():
812
- # model_name_textbox = gr.Textbox(
813
- # label="Model Name (on Hugging Face Hub)",
814
- # placeholder="Enter your model name...",
815
- # elem_classes="enhanced-input"
816
- # )
817
- # revision_name_textbox = gr.Textbox(
818
- # label="Revision / Commit Hash",
819
- # placeholder="main",
820
- # elem_classes="enhanced-input"
821
- # )
822
- # model_type = gr.Dropdown(
823
- # choices=["Type A", "Type B", "Type C"],
824
- # label="Model Type",
825
- # multiselect=False,
826
- # value=None,
827
- # interactive=True,
828
- # elem_classes="enhanced-dropdown"
829
- # )
830
- # with gr.Column():
831
- # precision = gr.Dropdown(
832
- # choices=["float16", "bfloat16", "float32", "int8", "auto"],
833
- # label="Precision",
834
- # multiselect=False,
835
- # value="auto",
836
- # interactive=True,
837
- # elem_classes="enhanced-dropdown"
838
- # )
839
- # weight_type = gr.Dropdown(
840
- # choices=["Original", "Adapter", "Delta"],
841
- # label="Weights Type",
842
- # multiselect=False,
843
- # value="Original",
844
- # interactive=True,
845
- # elem_classes="enhanced-dropdown"
846
- # )
847
- # base_model_name_textbox = gr.Textbox(
848
- # label="Base Model (for delta or adapter weights)",
849
- # placeholder="Only needed for adapter/delta weights",
850
- # elem_classes="enhanced-input"
851
- # )
852
- #
853
- # submit_button = gr.Button(
854
- # "Submit for Evaluation",
855
- # elem_classes="primary-button"
856
- # )
857
- # submission_result = gr.Markdown()
858
- # submit_button.click(
859
- # add_new_eval,
860
- # [model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type],
861
- # submission_result,
862
- # )
863
 
864
- # Enhanced citation section
865
- with gr.Accordion("📄 Citation", open=False, elem_classes="citation-accordion"):
866
- gr.HTML("""
867
- <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
868
- <div style="font-size: 2.5em;">📄</div>
869
- <div>
870
- <h3 style="margin: 0;">How to Cite This Benchmark</h3>
871
- <p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research</p>
872
- </div>
873
- </div>
874
- """)
875
-
876
- citation_button = gr.Textbox(
877
  value=CITATION_BUTTON_TEXT,
878
  label=CITATION_BUTTON_LABEL,
879
  lines=10,
880
  elem_id="citation-button",
881
- show_copy_button=True,
882
  )
883
-
884
- # Footer
885
- gr.HTML("""
886
- <div class="footer">
887
- <p>© 2025 MLE-Dojo Benchmark. All rights reserved.</p>
888
- <p style="margin-top: 5px; display: flex; justify-content: center; gap: 20px;">
889
- <a href="#" style="color: #1a5fb4; text-decoration: none;">Privacy Policy</a>
890
- <a href="#" style="color: #1a5fb4; text-decoration: none;">Terms of Service</a>
891
- <a href="#" style="color: #1a5fb4; text-decoration: none;">Contact Us</a>
892
- </p>
893
- </div>
894
- """)
895
-
896
- # --- Keep scheduler if relevant ---
897
- if __name__ == "__main__":
898
- try:
899
- scheduler = BackgroundScheduler()
900
- if callable(restart_space):
901
- if REPO_ID and REPO_ID != "your/space-id":
902
- scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
903
- scheduler.start()
904
- else:
905
- print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
906
- else:
907
- print("Warning: restart_space function not available; space restart job not scheduled.")
908
- except Exception as e:
909
- print(f"Failed to initialize or start scheduler: {e}")
910
 
911
- # --- Launch the app ---
912
  if __name__ == "__main__":
913
- print("Launching Enhanced Gradio App...")
914
- demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
 
4
 
5
+ # --- Placeholder Imports / Definitions ---
 
6
  try:
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
 
10
  EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
+ TITLE, # Will override below
14
  )
15
  from src.display.css_html_js import custom_css
16
  from src.envs import REPO_ID
17
  from src.submission.submit import add_new_eval
 
 
18
  except ImportError:
 
19
  CITATION_BUTTON_LABEL = "Citation"
20
  CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
21
  EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
22
+ INTRODUCTION_TEXT = "Welcome to the MLE-Dojo Benchmark Leaderboard."
23
+ LLM_BENCHMARKS_TEXT = "Information about the benchmarks..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  custom_css = ""
25
  REPO_ID = "your/space-id"
26
  def add_new_eval(*args): return "Submission placeholder."
27
 
28
+ # --- Elo Data ---
 
29
  data = [
30
+ {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
31
+ {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
32
+ {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
33
+ {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
34
+ {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
35
+ {'model_name': 'gemini-2.0-flash','url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
36
+ {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
37
+ {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
38
  ]
 
 
 
 
 
 
 
 
 
 
39
  master_df = pd.DataFrame(data)
40
 
41
+ CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
 
 
 
 
 
 
 
 
 
 
42
  DEFAULT_CATEGORY = "Overall"
 
 
43
  category_to_column = {
44
+ "Overall": "Overall",
45
  "MLE-Lite": "MLE-Lite_Elo",
46
  "Tabular": "Tabular_Elo",
47
  "NLP": "NLP_Elo",
48
  "CV": "CV_Elo",
 
49
  }
50
 
51
+ def update_leaderboard(category):
52
+ col = category_to_column.get(category, category_to_column[DEFAULT_CATEGORY])
53
+ df = master_df[['model_name','url','organizer','license',col]].copy()
54
+ df.sort_values(by=col, ascending=False, inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  df.reset_index(drop=True, inplace=True)
56
+ df.insert(0, 'Rank', df.index+1)
 
 
57
  df['Model'] = df.apply(
58
+ lambda r: f"<a href='{r['url']}' target='_blank'>{r['model_name']}</a>",
 
 
 
 
 
 
59
  axis=1
60
  )
61
+ df.rename(columns={col:'Elo Score','organizer':'Organizer','license':'License'}, inplace=True)
62
+ return df[['Rank','Model','Organizer','License','Elo Score']]
63
 
64
+ # --- Dark Theme + Custom CSS ---
65
+ custom_css += """
66
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ body {
69
+ font-family: 'Inter', sans-serif;
70
+ background-color: #1e1e2f !important;
71
+ color: #e0e0f0 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  }
73
 
74
+ /* Hero Section */
75
+ .hero-section {
76
+ background: linear-gradient(135deg, #6c63ff, #8f94fb);
77
+ color: #fff;
78
+ padding: 2rem 1rem;
79
+ border-radius: .75rem;
80
+ margin-bottom: 1.5rem;
81
+ text-align: center;
82
+ box-shadow: 0 4px 10px rgba(0,0,0,0.3);
83
  }
84
+ .hero-section h1 {
85
+ margin: 0;
 
86
  font-size: 2.5rem !important;
87
  font-weight: 700 !important;
 
 
 
 
 
88
  }
89
+ .hero-section h2 {
90
+ margin: .5rem 0 0 !important;
91
+ font-size: 1.25rem !important;
92
+ font-weight: 400 !important;
93
+ opacity: 0.9;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  }
95
 
96
+ /* Tab Buttons */
97
+ .tab-buttons button {
98
+ border-radius: 20px !important;
99
+ padding: 0.5rem 1rem !important;
100
+ margin-right: 0.5rem !important;
101
+ background: #3a3a4c !important;
102
+ color: #e0e0f0 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  border: none !important;
104
+ transition: background 0.3s !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  font-weight: 500 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  }
107
+ .tab-buttons button:hover {
108
+ background: #4a4a6f !important;
 
 
 
 
 
 
109
  }
110
+ .tab-buttons button[aria-selected="true"] {
111
+ background: #6c63ff !important;
112
+ color: #fff !important;
 
 
 
 
 
113
  }
114
 
115
+ /* Category Selector Pills */
116
+ #category-selector input[type="radio"] { display: none; }
117
+ #category-selector label {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  display: inline-block;
119
+ padding: 0.5rem 1rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  margin-right: 0.5rem;
121
+ border-radius: 999px;
122
+ background: #3a3a4c;
123
+ color: #e0e0f0;
124
+ cursor: pointer;
125
+ transition: background 0.3s, color 0.3s;
126
+ font-weight: 500;
127
  }
128
+ #category-selector input[type="radio"]:checked + label {
129
+ background: #6c63ff;
130
+ color: #fff;
 
 
 
 
 
 
 
131
  }
132
 
133
+ /* Table Styling */
134
+ table {
135
+ width: 100%;
136
+ border: none;
137
+ border-radius: .5rem;
138
+ overflow: hidden;
139
+ box-shadow: 0 2px 4px rgba(0,0,0,0.3);
140
+ margin: 1rem 0;
141
  }
142
+ th {
143
+ background: #6c63ff !important;
144
+ color: #fff !important;
 
145
  }
146
+ td, th {
147
+ padding: 0.75rem 1rem;
148
+ background: #1e1e2f;
149
+ color: #e0e0f0;
150
  }
151
+ tr:nth-child(even) td {
152
+ background: #2a2a3c;
 
 
 
153
  }
154
+ tr:hover td {
155
+ background: #3c3b52;
 
 
 
156
  }
157
+ td a {
158
+ color: #9afeff;
159
+ text-decoration: none;
 
 
160
  }
161
+ td a:hover {
162
+ text-decoration: underline;
 
 
163
  }
164
  """
165
 
166
+ # --- Override Title with Hero ---
167
+ TITLE = """
168
+ <div class="hero-section">
169
+ <h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>
170
+ <h2>Improving LLM Agents for Machine Learning Engineering</h2>
171
+ </div>
172
+ """
173
 
174
+ # --- Build App with valid Dark theme ---
175
+ demo = gr.Blocks(css=custom_css, theme=gr.themes.Dark())
176
 
177
  with demo:
178
+ gr.HTML(TITLE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
180
 
181
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
182
+ with gr.TabItem("📋 Leaderboard"):
183
+ gr.Markdown("## Model Elo Rankings by Category")
184
+ category_selector = gr.Radio(
185
+ choices=CATEGORIES,
186
+ value=DEFAULT_CATEGORY,
187
+ interactive=True,
188
+ elem_id="category-selector"
189
+ )
190
+ leaderboard_df = gr.Dataframe(
191
+ value=update_leaderboard(DEFAULT_CATEGORY),
192
+ headers=["Rank","Model","Organizer","License","Elo Score"],
193
+ datatype=["number","html","str","str","number"],
194
+ interactive=False,
195
+ row_count=(len(master_df),"fixed"),
196
+ col_count=(5,"fixed"),
197
+ wrap=True,
198
+ elem_id="leaderboard-table"
199
+ )
200
+ category_selector.change(
201
+ fn=update_leaderboard,
202
+ inputs=category_selector,
203
+ outputs=leaderboard_df
204
+ )
205
+
206
+ with gr.TabItem("ℹ️ About"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
+ with gr.Accordion("📙 Citation", open=False):
210
+ gr.Textbox(
 
 
 
 
 
 
 
 
 
 
 
211
  value=CITATION_BUTTON_TEXT,
212
  label=CITATION_BUTTON_LABEL,
213
  lines=10,
214
  elem_id="citation-button",
215
+ show_copy_button=True
216
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
 
218
  if __name__ == "__main__":
219
+ print("Launching Gradio App...")
220
+ demo.launch()