Jerrycool commited on
Commit
a84f158
Β·
verified Β·
1 Parent(s): ea96be1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +442 -404
app.py CHANGED
@@ -14,7 +14,7 @@ try:
14
  LLM_BENCHMARKS_TEXT,
15
  TITLE,
16
  )
17
- from src.display.css_html_js import custom_css # Assuming this might exist
18
  from src.envs import REPO_ID
19
  from src.submission.submit import add_new_eval
20
  print("Successfully imported from src module.")
@@ -22,39 +22,39 @@ try:
22
  except ImportError:
23
  print("Warning: Using placeholder values because src module imports failed.")
24
  CITATION_BUTTON_LABEL = "Citation"
25
- CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark...\n@misc{mledojo2025benchmark,\n title={MLE-Dojo Benchmark},\n author={MLE-Dojo Team},\n year={2025},\n howpublished={\\url{https://your-benchmark-url.example.com}},\n}" # Added example citation text
26
- EVALUATION_QUEUE_TEXT = "### Current evaluation queue:" # Use Markdown heading
27
  INTRODUCTION_TEXT = """
28
- ## Welcome to the MLE-Dojo Benchmark Leaderboard
29
-
30
  This leaderboard tracks the performance of various AI models across multiple machine learning engineering domains.
31
  Our comprehensive evaluation system uses ELO ratings to provide a fair comparison between different models.
32
-
33
- ### How to read this leaderboard
34
- - Select a domain category using the radio buttons below to view specialized rankings.
35
- - Higher ELO scores indicate better performance within that category.
36
- - Click on a model name to visit its page (if available).
37
  """
38
  LLM_BENCHMARKS_TEXT = """
39
- ## About the MLE-Dojo Benchmark
40
-
41
- ### Evaluation Methodology
42
  The MLE-Dojo benchmark evaluates models across various domains including:
43
-
44
- - **MLE-Lite**: Basic machine learning engineering tasks (data preprocessing, feature engineering, model selection).
45
- - **Tabular**: Data manipulation, analysis, and modeling with structured data.
46
- - **NLP**: Natural language processing tasks (classification, generation, understanding).
47
- - **CV**: Computer vision tasks (image classification, object detection, generation).
48
-
49
- Our evaluation uses a sophisticated ELO rating system that considers the relative performance of models against each other based on competitions within each domain.
50
-
51
- ### Contact
52
- For more information or to submit your model, please contact us at `[email protected]` (replace with actual contact).
53
  """
54
- TITLE = "<h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>" # Keep title simple for header
55
- custom_css = "" # Will be populated by enhanced_css later
56
- REPO_ID = "your/space-id" # Replace with your actual Hugging Face Space ID if restarting
57
- def add_new_eval(*args): return "Submission placeholder." # Placeholder function
58
 
59
  # --- Elo Leaderboard Configuration ---
60
  # Enhanced data with Rank (placeholder), Organizer, License, and URL
@@ -71,7 +71,7 @@ data = [
71
 
72
  # Add organization logos (for visual enhancement)
73
  org_logos = {
74
- 'OpenAI': 'πŸ“±', # Replace with actual icon URLs or keep emojis
75
  'DeepSeek': 'πŸ”',
76
  'Google': '🌐',
77
  'Default': 'πŸ€–'
@@ -81,50 +81,51 @@ org_logos = {
81
  master_df = pd.DataFrame(data)
82
 
83
  # Add last updated timestamp
84
- last_updated = datetime.now().strftime("%B %d, %Y at %H:%M:%S %Z") # Added Timezone
85
 
86
  # Define categories with fancy icons
87
  CATEGORIES = [
88
- ("πŸ† Overall", "Overall"),
89
- ("πŸ’‘ MLE-Lite", "MLE-Lite"),
90
- ("πŸ“Š Tabular", "Tabular"),
91
- ("πŸ“ NLP", "NLP"),
92
  ("πŸ‘οΈ CV", "CV")
93
  ]
94
- DEFAULT_CATEGORY_LABEL = "πŸ† Overall" # Use the label for default value
95
- DEFAULT_CATEGORY_VALUE = "Overall" # The actual value
96
 
97
- # Map user-facing category *values* to DataFrame column names
98
  category_to_column = {
99
- "Overall": "Overall",
100
  "MLE-Lite": "MLE-Lite_Elo",
101
  "Tabular": "Tabular_Elo",
102
  "NLP": "NLP_Elo",
103
  "CV": "CV_Elo",
 
104
  }
105
 
106
  # --- Helper function to update leaderboard ---
107
  def update_leaderboard(category_label):
108
  """
109
- Enhanced function to update the leaderboard with visual improvements and numerical rank.
110
  """
111
- # Find the category value corresponding to the selected label
112
- category_value = DEFAULT_CATEGORY_VALUE # Default fallback
113
- for label, value in CATEGORIES:
114
- if label == category_label:
115
- category_value = value
116
- break
117
-
118
- score_column = category_to_column.get(category_value)
119
  if score_column is None or score_column not in master_df.columns:
120
- print(f"Warning: Invalid category value '{category_value}' or column '{score_column}'. Falling back to default.")
121
- score_column = category_to_column[DEFAULT_CATEGORY_VALUE] # Fallback to default value
122
- category_value = DEFAULT_CATEGORY_VALUE # Ensure category value matches fallback
123
  if score_column not in master_df.columns:
124
  print(f"Error: Default column '{score_column}' also not found.")
125
- # Return an empty DataFrame with the correct structure
126
  return pd.DataFrame({
127
- "Rank": [], "Model": [], "Organization": [], "License": [], f"Elo Score ({category_value})": []
 
 
 
 
128
  })
129
 
130
  # Select base columns + the score column for sorting
@@ -134,15 +135,15 @@ def update_leaderboard(category_label):
134
  # Sort by the selected 'Elo Score' descending
135
  df.sort_values(by=score_column, ascending=False, inplace=True)
136
 
137
- # Add Rank (numerical)
138
  df.reset_index(drop=True, inplace=True)
139
- df.insert(0, 'Rank', df.index + 1) # Insert numerical rank starting from 1
140
-
141
- # Add organization icons to model names with clickable links
142
  df['Model'] = df.apply(
143
  lambda row: f"""<div style="display: flex; align-items: center;">
144
  <span style="font-size: 1.5em; margin-right: 10px;">{org_logos.get(row['organizer'], org_logos['Default'])}</span>
145
- <a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank'
146
  style='color: #0066cc; text-decoration: none; font-weight: 500; font-size: 1.05em;'>
147
  {row['model_name']}
148
  </a>
@@ -150,34 +151,33 @@ def update_leaderboard(category_label):
150
  axis=1
151
  )
152
 
153
- # Format Elo scores with visual indicators (bar + color)
154
  df['Elo Display'] = df[score_column].apply(
155
- lambda score: f"""<div style="display: flex; align-items: center; justify-content: flex-start;">
156
- <span style="font-weight: bold; min-width: 40px; text-align: right; color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'}">
157
  {score}
158
  </span>
159
- <div style="margin-left: 10px; height: 12px; width: 80px; background-color: #eaeaea; border-radius: 6px; overflow: hidden;">
160
- <div style="height: 100%; width: {min(100, max(5, (score - 700) / 7))}%; background-color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'};"></div>
161
  </div>
162
  </div>"""
163
  )
164
 
165
  # Rename columns for display
166
- df.rename(columns={'organizer': 'Organization', 'license': 'License'}, inplace=True) # Renamed for clarity
 
167
 
168
  # Select and reorder columns for final display
169
- # Use the determined category_value for the score column header
170
- final_columns = ["Rank", "Model", "Organization", "License", "Elo Display"]
171
- df_display = df[final_columns].copy()
172
-
173
- # Rename the score column dynamically
174
- df_display.rename(columns={"Elo Display": f"Elo Score ({category_value})"}, inplace=True)
175
-
176
- return df_display
177
 
 
 
 
 
178
 
179
  # --- Mock/Placeholder functions/data for other tabs ---
180
- print("Warning: Evaluation queue data fetching is disabled/mocked.")
181
  finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
182
  running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
183
  pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
@@ -186,14 +186,11 @@ EVAL_TYPES = ["str", "str", "str", "str"]
186
 
187
  # --- Keep restart function if relevant ---
188
  def restart_space():
189
- """Placeholder for Hugging Face space restart logic."""
190
- print(f"Attempting to trigger restart for space: {REPO_ID} (placeholder action)")
191
- # In a real HF Space, you might use the hf_hub API client if it supports this,
192
- # or trigger a webhook, or rely on built-in auto-restart features.
193
- # This function likely cannot directly restart the space from within itself.
194
 
195
  # --- Enhanced CSS for beauty and readability ---
196
- # (Your enhanced_css string remains the same as provided in the prompt)
197
  enhanced_css = """
198
  /* Base styling */
199
  :root {
@@ -201,11 +198,11 @@ enhanced_css = """
201
  --secondary-color: #2ec27e;
202
  --accent-color: #e5a50a;
203
  --warning-color: #ff7800;
204
- --text-color: #333333; /* Darker text for better contrast */
205
- --background-color: #f4f6f8; /* Light grey background */
206
- --card-background: #ffffff; /* White background for cards/tables */
207
- --border-color: #e0e0e0;
208
- --shadow-color: rgba(0, 0, 0, 0.08);
209
  }
210
 
211
  /* Typography */
@@ -214,7 +211,7 @@ body, .gradio-container {
214
  font-size: 16px !important;
215
  line-height: 1.6 !important;
216
  color: var(--text-color) !important;
217
- background-color: var(--background-color) !important; /* Ensure body background is set */
218
  }
219
 
220
  /* Headings */
@@ -251,7 +248,7 @@ h3 {
251
  border-radius: 12px !important;
252
  overflow: hidden !important;
253
  box-shadow: 0 4px 12px var(--shadow-color) !important;
254
- background-color: var(--card-background); /* White background for tabs container */
255
  }
256
 
257
  .tab-nav button {
@@ -261,75 +258,64 @@ h3 {
261
  border-radius: 0 !important;
262
  transition: all 0.2s ease !important;
263
  border-bottom: 2px solid transparent !important;
264
- background-color: transparent !important; /* Ensure buttons are transparent */
265
  color: var(--text-color) !important;
266
  }
267
 
268
  .tab-nav button.selected {
269
- background-color: transparent !important; /* Keep transparent */
270
  color: var(--primary-color) !important;
271
  font-weight: 600 !important;
272
  border-bottom: 2px solid var(--primary-color) !important;
273
  }
274
 
275
  /* Card styling */
276
- .gradio-container .gr-block { /* Target blocks for card styling */
277
  border-radius: 12px !important;
278
  border: 1px solid var(--border-color) !important;
279
  box-shadow: 0 4px 12px var(--shadow-color) !important;
280
  overflow: hidden !important;
281
- background-color: var(--card-background) !important; /* White background */
282
- padding: 1.5rem !important; /* Add padding to cards */
283
- margin-bottom: 1.5rem !important; /* Add space between cards */
284
- }
285
- /* Ensure panels also get card styling */
286
- .gradio-container .gr-panel {
287
- border-radius: 12px !important;
288
- border: 1px solid var(--border-color) !important;
289
- box-shadow: 0 4px 12px var(--shadow-color) !important;
290
- overflow: hidden !important;
291
- background-color: var(--card-background) !important;
292
- padding: 1.5rem !important;
293
- margin-bottom: 1.5rem !important;
294
- }
295
-
296
-
297
- /* Table styling */
298
- .gr-dataframe { /* Target the dataframe component specifically */
299
- border-radius: 8px !important;
300
- overflow: hidden !important; /* Needed for border-radius on table */
301
- box-shadow: 0 4px 12px var(--shadow-color) !important;
302
- border: 1px solid var(--border-color) !important; /* Add border around table */
303
- margin: 1.5rem 0 !important;
304
  }
305
 
 
306
  table {
307
  width: 100% !important;
308
- border-collapse: separate !important; /* Needed for spacing and rounded corners */
309
  border-spacing: 0 !important;
310
- background-color: var(--card-background); /* White background for table */
 
 
 
 
 
 
 
 
 
 
 
311
  }
312
 
313
  th {
314
- background-color: #f0f5ff !important; /* Lighter blue for header */
315
  color: var(--primary-color) !important;
316
  font-weight: 600 !important;
317
- padding: 1rem 1.2rem !important; /* Adjust padding */
318
- font-size: 1.05rem !important; /* Slightly smaller header font */
319
  text-align: left !important;
320
  border-bottom: 2px solid var(--primary-color) !important;
321
- position: sticky !important; /* Make header sticky */
322
- top: 0 !important; /* Stick to the top */
323
- z-index: 1 !important; /* Ensure header is above scrolling content */
324
  }
325
 
326
  td {
327
- padding: 0.9rem 1.2rem !important; /* Adjust padding */
328
  border-bottom: 1px solid var(--border-color) !important;
329
  font-size: 1rem !important;
330
  vertical-align: middle !important;
331
- background-color: var(--card-background); /* Ensure cell background is white */
332
- color: var(--text-color); /* Ensure text color is applied */
333
  }
334
 
335
  tr:last-child td {
@@ -337,11 +323,11 @@ tr:last-child td {
337
  }
338
 
339
  tr:nth-child(even) td {
340
- background-color: #f8fafd !important; /* Very light blue for alternating rows */
341
  }
342
 
343
  tr:hover td {
344
- background-color: #edf2fb !important; /* Slightly darker blue on hover */
345
  }
346
 
347
  /* Button styling */
@@ -369,14 +355,10 @@ button.primary:hover, .gr-button.primary:hover {
369
  flex-wrap: wrap !important;
370
  gap: 10px !important;
371
  margin: 1rem 0 !important;
372
- background-color: transparent !important; /* Ensure container is transparent */
373
- border: none !important; /* Remove default border */
374
- box-shadow: none !important; /* Remove default shadow */
375
- padding: 0 !important; /* Remove default padding */
376
  }
377
 
378
- .gr-radio > label { /* Target the label inside gr-radio */
379
- background-color: #f5f7fa !important;
380
  border: 1px solid var(--border-color) !important;
381
  border-radius: 8px !important;
382
  padding: 0.7rem 1.2rem !important;
@@ -388,32 +370,22 @@ button.primary:hover, .gr-button.primary:hover {
388
  align-items: center !important;
389
  gap: 8px !important;
390
  color: var(--text-color) !important;
391
- box-shadow: none !important; /* Override potential inner shadows */
392
- }
393
- /* Remove inner block styling if gradio adds extra divs */
394
- .gr-radio > div {
395
- background: none !important;
396
- border: none !important;
397
- padding: 0 !important;
398
- margin: 0 !important;
399
- box-shadow: none !important;
400
  }
401
 
402
-
403
  .gr-radio label:hover {
404
- background-color: #eaeef3 !important;
405
- border-color: #c0c9d6 !important;
406
  }
407
 
408
  .gr-radio label.selected {
409
- background-color: #e0e9f7 !important;
410
  border-color: var(--primary-color) !important;
411
  color: var(--primary-color) !important;
412
  font-weight: 600 !important;
413
  }
414
 
415
  /* Input fields */
416
- input[type="text"], textarea, select { /* Be more specific */
417
  font-size: 1rem !important;
418
  padding: 0.8rem !important;
419
  border-radius: 8px !important;
@@ -421,11 +393,9 @@ input[type="text"], textarea, select { /* Be more specific */
421
  transition: all 0.2s ease !important;
422
  background-color: #ffffff !important;
423
  color: var(--text-color) !important;
424
- width: 100%; /* Make inputs take full width */
425
- box-sizing: border-box; /* Include padding in width */
426
  }
427
 
428
- input[type="text"]:focus, textarea:focus, select:focus {
429
  border-color: var(--primary-color) !important;
430
  box-shadow: 0 0 0 2px rgba(26, 95, 180, 0.2) !important;
431
  outline: none !important;
@@ -437,48 +407,28 @@ input[type="text"]:focus, textarea:focus, select:focus {
437
  overflow: hidden !important;
438
  margin: 1rem 0 !important;
439
  border: 1px solid var(--border-color) !important;
440
- background-color: var(--card-background) !important; /* White background */
441
- box-shadow: 0 2px 6px var(--shadow-color) !important; /* Lighter shadow for accordion */
442
  }
443
 
444
- .gr-accordion > .gr-block { /* Target inner block of accordion */
445
- border: none !important;
446
- box-shadow: none !important;
447
- padding: 0 !important; /* Remove padding from inner block */
448
- margin: 0 !important;
449
- }
450
-
451
-
452
- .gr-accordion-header { /* Check Gradio structure for header class */
453
- padding: 1rem 1.2rem !important; /* Adjust padding */
454
- background-color: #f5f7fa !important;
455
  font-weight: 600 !important;
456
  font-size: 1.1rem !important;
457
  color: var(--text-color) !important;
458
  border-bottom: 1px solid var(--border-color) !important;
459
- cursor: pointer; /* Indicate clickable */
460
- }
461
- /* Style for open accordion header */
462
- .gr-accordion[open] > .gr-accordion-header { /* Might need adjustment based on Gradio version */
463
- border-bottom: 1px solid var(--border-color) !important;
464
  }
465
 
466
- /* Style for accordion content (might be nested) */
467
- .gr-accordion .gr-panel, .gr-accordion .gr-box { /* Check which element holds content */
468
- padding: 1.2rem !important; /* Add padding to content */
469
- background-color: var(--card-background) !important; /* White background */
470
- border: none !important; /* Remove borders inside accordion */
471
- box-shadow: none !important;
472
- border-radius: 0 0 8px 8px !important; /* Round bottom corners */
473
  }
474
 
475
-
476
  /* Markdown text improvements */
477
- .markdown-text { /* Might need a more specific selector like .gr-markdown */
478
  font-size: 1.05rem !important;
479
  line-height: 1.7 !important;
480
  color: var(--text-color) !important;
481
- background-color: transparent !important; /* Ensure markdown bg is transparent */
482
  }
483
 
484
  .markdown-text p {
@@ -494,26 +444,12 @@ input[type="text"]:focus, textarea:focus, select:focus {
494
  margin-bottom: 0.5rem !important;
495
  }
496
 
497
- .markdown-text strong, .markdown-text b {
498
  font-weight: 600 !important;
499
- color: #111 !important; /* Slightly darker for emphasis */
500
- }
501
- .markdown-text code { /* Style inline code */
502
- background-color: #eef0f2;
503
- padding: 0.2em 0.4em;
504
- border-radius: 4px;
505
- font-size: 0.9em;
506
- color: #3a4a5b;
507
- }
508
- .markdown-text a { /* Style links */
509
- color: var(--primary-color);
510
- text-decoration: none;
511
- }
512
- .markdown-text a:hover {
513
- text-decoration: underline;
514
  }
515
 
516
- /* Status indicators (if used in submission tab) */
517
  .status-badge {
518
  display: inline-block;
519
  padding: 0.3rem 0.7rem;
@@ -522,7 +458,24 @@ input[type="text"]:focus, textarea:focus, select:focus {
522
  font-weight: 500;
523
  text-align: center;
524
  }
525
- /* Add specific styles for statuses if needed */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
 
527
  /* Footer */
528
  .footer {
@@ -530,132 +483,146 @@ input[type="text"]:focus, textarea:focus, select:focus {
530
  padding: 1.5rem 1rem;
531
  text-align: center;
532
  font-size: 0.9rem;
533
- color: #555;
534
  border-top: 1px solid var(--border-color);
535
- background-color: #e9edf1; /* Light background for footer */
536
  }
537
 
538
  /* Enhanced leaderboard title area */
539
  .leaderboard-header {
540
  display: flex;
541
- flex-direction: column; /* Stack elements vertically on small screens */
542
- align-items: center; /* Center items */
543
  justify-content: space-between;
544
  margin-bottom: 1.5rem;
545
  padding: 1.5rem;
546
- background-color: var(--card-background); /* White background */
547
  border-radius: 12px;
548
  border: 1px solid var(--border-color);
549
  box-shadow: 0 4px 12px var(--shadow-color);
550
- text-align: center; /* Center text */
551
  }
552
- @media (min-width: 768px) { /* Apply side-by-side layout on larger screens */
553
- .leaderboard-header {
554
- flex-direction: row;
555
- text-align: left;
556
- }
557
- }
558
-
559
 
560
  .leaderboard-title {
561
- font-size: 2.0rem; /* Adjusted size */
562
  font-weight: 700;
563
  color: var(--primary-color);
564
- margin: 0 0 0.5rem 0; /* Add bottom margin */
565
  display: flex;
566
  align-items: center;
567
- gap: 0.7rem; /* Increase gap */
568
- justify-content: center; /* Center on small screens */
569
- }
570
- @media (min-width: 768px) {
571
- .leaderboard-title {
572
- justify-content: flex-start; /* Align left on large screens */
573
- font-size: 2.2rem; /* Restore size */
574
- }
575
  }
576
 
577
  .leaderboard-subtitle {
578
  font-size: 1.1rem;
579
- color: #666;
580
- margin: 0 0 1rem 0; /* Add bottom margin */
581
- }
582
- @media (min-width: 768px) {
583
- .leaderboard-subtitle { margin-bottom: 0; } /* Remove bottom margin on large screens */
584
  }
585
 
586
  .timestamp {
587
  font-size: 0.85rem;
588
- color: #666;
589
  font-style: italic;
590
- background-color: #f5f7fa;
591
  padding: 5px 10px;
592
  border-radius: 6px;
593
- margin-top: 0.5rem; /* Add space above timestamp */
594
  }
595
- @media (min-width: 768px) {
596
- .timestamp { margin-top: 0; } /* Remove top margin on large screens */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
597
  }
598
 
 
 
 
 
599
 
600
- /* Category selector buttons (Already styled via .gr-radio) */
 
 
 
 
601
 
602
  /* Logo and brand styling */
603
  .logo {
604
- font-size: 2.5em; /* Keep logo size */
605
  margin-right: 0.5rem;
606
  }
607
 
 
 
 
 
 
 
 
 
 
 
 
608
  /* Style for About section cards */
609
  .about-card {
610
- background-color: #f5f7fa; /* Lighter background for these cards */
611
  padding: 20px;
612
  border-radius: 12px;
613
- height: 100%; /* Make cards in a row equal height */
614
  border: 1px solid var(--border-color);
615
- display: flex; /* Use flexbox for alignment */
616
- flex-direction: column; /* Stack content vertically */
617
- text-align: center; /* Center text */
618
- box-shadow: 0 2px 6px var(--shadow-color); /* Add subtle shadow */
619
  }
620
  .about-card h3 {
621
  text-align: center;
622
  margin-top: 0;
623
- margin-bottom: 10px; /* Space below heading */
624
  color: var(--primary-color);
625
  }
626
  .about-card p {
627
  color: var(--text-color);
628
  font-size: 0.95rem;
629
  line-height: 1.6;
630
- flex-grow: 1; /* Allow paragraph to take up space */
631
  }
632
  .about-card-icon {
633
  font-size: 2.5em;
634
  text-align: center;
635
  margin-bottom: 15px;
636
  display: block;
637
- color: var(--secondary-color); /* Use secondary color for icons */
638
  }
639
- /* Ensure citation textbox has good contrast */
640
- #citation-button textarea {
641
- background-color: #f5f7fa !important;
642
- color: var(--text-color) !important;
643
- border: 1px solid var(--border-color) !important;
644
- font-family: monospace !important; /* Use monospace for citation */
645
- font-size: 0.95rem !important;
 
 
 
 
 
 
 
 
 
646
  }
647
  """
648
 
649
- # Combine with any existing CSS (if custom_css was loaded from src)
650
- # If custom_css from src exists, append enhanced_css. Otherwise, just use enhanced_css.
651
- if custom_css:
652
- custom_css += "\n" + enhanced_css
653
- else:
654
- custom_css = enhanced_css
655
 
656
  # --- Gradio App Definition ---
657
- # REMOVED theme=gr.themes.Soft() to allow custom CSS to dominate
658
- demo = gr.Blocks(css=custom_css)
659
 
660
  with demo:
661
  # Enhanced header with timestamp
@@ -675,173 +642,250 @@ with demo:
675
  </div>
676
  """)
677
 
678
- # Introduction moved outside Tabs for permanent visibility
679
- with gr.Blocks(elem_classes="gr-block"): # Wrap intro in a styled block
680
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
681
 
682
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
683
  with gr.TabItem("πŸ“Š Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
684
  with gr.Column():
685
- with gr.Blocks(elem_classes="gr-block"): # Wrap leaderboard controls/table
686
- gr.HTML("""
687
- <h2 style="display: flex; align-items: center; gap: 10px; margin-bottom: 0.5rem;">
688
- <span style="font-size: 1.3em;">πŸ“ˆ</span> Model Performance Rankings
689
- </h2>
690
- <p class="leaderboard-subtitle" style="margin-top: 0;">Select a category to view specialized performance metrics</p>
691
- """)
692
-
693
- # Enhanced category selector
694
- category_selector = gr.Radio(
695
- # Use labels from CATEGORIES
696
- choices=[label for label, value in CATEGORIES],
697
- label="Select Performance Domain:",
698
- value=DEFAULT_CATEGORY_LABEL, # Default to the label
699
- interactive=True,
700
- elem_classes="gr-radio" # Apply custom radio styling
701
- )
702
-
703
- # Visual separator (Optional)
704
- # gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
705
-
706
- # Enhanced leaderboard table with scrolling
707
- leaderboard_df_component = gr.Dataframe(
708
- # Initialize with default category value
709
- value=update_leaderboard(DEFAULT_CATEGORY_LABEL),
710
- # Headers will be set dynamically by the update function's output DataFrame
711
- # Set datatypes for correct rendering (Rank is now number)
712
- datatype=["number", "html", "str", "str", "html"],
713
- interactive=False, # IMPORTANT: Keep False to disable UI sorting/editing
714
- # row_count=(10, "dynamic"), # Alternative: show 10 rows, scroll others
715
- col_count=(5, "fixed"), # We have 5 columns
716
- wrap=True, # Allow text wrapping in cells
717
- elem_id="leaderboard-table", # ID for potential CSS targeting
718
- )
719
-
720
- # Stats cards (visual enhancement) - Placed after leaderboard table
721
- with gr.Blocks(elem_classes="gr-block"): # Wrap stats in a styled block
722
- gr.HTML("<h2>Benchmark Statistics</h2>") # Add title for stats section
723
- with gr.Row(equal_height=True):
724
- with gr.Column(scale=1):
725
- gr.HTML(f"""
726
- <div class="about-card" style="text-align: center;">
727
- <div class="about-card-icon">πŸ”</div>
728
- <div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
729
- <div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
730
- </div>
731
- """)
732
- with gr.Column(scale=1):
733
- gr.HTML(f"""
734
- <div class="about-card" style="text-align: center;">
735
- <div class="about-card-icon">🌐</div>
736
- <div style="font-size: 2em; font-weight: bold; color: #00875a;">{50}</div>
737
- <div style="font-size: 1.1em; color: #666;">Competitions (Example)</div>
738
- </div>
739
- """)
740
- with gr.Column(scale=1):
741
- gr.HTML(f"""
742
- <div class="about-card" style="text-align: center;">
743
- <div class="about-card-icon">πŸ…</div>
744
- <div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)-1}</div>
745
- <div style="font-size: 1.1em; color: #666;">Performance Domains</div>
746
- </div>
747
- """)
748
-
749
- # Link the radio button change to the update function
750
- category_selector.change(
751
- fn=update_leaderboard,
752
- inputs=category_selector,
753
- outputs=leaderboard_df_component
754
- )
755
-
756
- with gr.TabItem("πŸ“š About", elem_id="llm-benchmark-tab-about", id=1):
757
- # Wrap content in a styled block for consistent padding/background
758
- with gr.Blocks(elem_classes="gr-block"):
759
- # Enhanced about section header
760
  gr.HTML("""
761
- <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
762
- <div style="font-size: 4em;">πŸ§ͺ</div>
763
- <div>
764
- <h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
765
- <p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
766
- </div>
767
- </div>
768
  """)
769
-
770
- # Use the LLM_BENCHMARKS_TEXT variable
771
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
772
-
773
- # Add methodology cards for visual enhancement
774
- gr.HTML("<h2 style='margin-top: 2rem;'>Evaluation Domains</h2>") # Title for cards
775
- with gr.Row(equal_height=True):
776
- with gr.Column():
777
- gr.HTML("""
778
- <div class="about-card">
779
- <div class="about-card-icon">πŸ’‘</div>
780
- <h3>MLE-Lite</h3>
781
- <p>Evaluates basic ML engineering tasks: data preprocessing, feature engineering, model selection, and basic deployment concepts.</p>
782
- </div>
783
- """)
784
- with gr.Column():
785
- gr.HTML("""
786
- <div class="about-card">
787
- <div class="about-card-icon">πŸ“Š</div>
788
- <h3>Tabular</h3>
789
- <p>Tests processing, analysis, and modeling of structured data, including statistical analysis, predictive modeling, and visualization.</p>
 
 
 
 
 
 
 
 
 
 
 
 
790
  </div>
791
  """)
792
-
793
- with gr.Row(equal_height=True):
794
- with gr.Column():
795
- gr.HTML("""
796
- <div class="about-card">
797
- <div class="about-card-icon">πŸ“</div>
798
- <h3>NLP</h3>
799
- <p>Evaluates natural language processing: text classification, sentiment analysis, entity recognition, text generation, and understanding.</p>
800
  </div>
801
  """)
802
- with gr.Column():
803
- gr.HTML("""
804
- <div class="about-card">
805
- <div class="about-card-icon">πŸ‘οΈ</div>
806
- <h3>CV</h3>
807
- <p>Tests computer vision capabilities: image classification, object detection, image generation, and visual understanding tasks.</p>
808
  </div>
809
  """)
 
 
 
 
 
 
 
810
 
811
- # Optional: Uncomment if you want to re-enable the Submit tab
812
- # with gr.TabItem("πŸš€ Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
813
- # with gr.Blocks(elem_classes="gr-block"): # Wrap in styled block
814
- # # ... (Your submission form code here) ...
815
- # gr.Markdown("Submit tab content goes here.") # Placeholder
816
-
817
-
818
- # Enhanced citation section (outside tabs, inside main demo block)
819
- with gr.Blocks(elem_classes="gr-block"): # Wrap in styled block
820
- with gr.Accordion("πŸ“„ Citation", open=False): # Accordion itself doesn't need gr-block
821
  gr.HTML("""
822
- <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
823
- <div style="font-size: 2.5em;">πŸ“„</div>
824
  <div>
825
- <h3 style="margin: 0;">How to Cite This Benchmark</h3>
826
- <p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research.</p>
827
  </div>
828
  </div>
829
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
830
 
831
- citation_button = gr.Textbox(
832
- value=CITATION_BUTTON_TEXT,
833
- label=CITATION_BUTTON_LABEL,
834
- lines=5, # Adjust lines as needed
835
- elem_id="citation-button", # Use this ID for CSS
836
- show_copy_button=True,
837
- interactive=False # Textbox is not meant for user input here
838
- )
839
-
840
- # Footer (outside tabs, inside main demo block)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
841
  gr.HTML("""
842
  <div class="footer">
843
  <p>Β© 2025 MLE-Dojo Benchmark. All rights reserved.</p>
844
- <p style="margin-top: 5px; display: flex; justify-content: center; flex-wrap: wrap; gap: 20px;">
845
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Privacy Policy</a>
846
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Terms of Service</a>
847
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Contact Us</a>
@@ -850,16 +894,12 @@ with demo:
850
  """)
851
 
852
  # --- Keep scheduler if relevant ---
853
- # Note: Scheduler might not work reliably in all Gradio deployment environments (like Spaces free tier)
854
  if __name__ == "__main__":
855
  try:
856
  scheduler = BackgroundScheduler()
857
  if callable(restart_space):
858
- # Ensure REPO_ID is set correctly for your HF Space
859
  if REPO_ID and REPO_ID != "your/space-id":
860
- print(f"Scheduling space restart job for {REPO_ID} every 30 minutes.")
861
- # Restart interval might need adjustment based on environment limits
862
- scheduler.add_job(restart_space, "interval", seconds=1800)
863
  scheduler.start()
864
  else:
865
  print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
@@ -871,6 +911,4 @@ if __name__ == "__main__":
871
  # --- Launch the app ---
872
  if __name__ == "__main__":
873
  print("Launching Enhanced Gradio App...")
874
- # share=True is needed for public access if running locally and want others to see it
875
- # demo.launch(share=True)
876
- demo.launch() # Launches locally by default
 
14
  LLM_BENCHMARKS_TEXT,
15
  TITLE,
16
  )
17
+ from src.display.css_html_js import custom_css
18
  from src.envs import REPO_ID
19
  from src.submission.submit import add_new_eval
20
  print("Successfully imported from src module.")
 
22
  except ImportError:
23
  print("Warning: Using placeholder values because src module imports failed.")
24
  CITATION_BUTTON_LABEL = "Citation"
25
+ CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
26
+ EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
27
  INTRODUCTION_TEXT = """
28
+ # Welcome to the MLE-Dojo Benchmark Leaderboard
29
+
30
  This leaderboard tracks the performance of various AI models across multiple machine learning engineering domains.
31
  Our comprehensive evaluation system uses ELO ratings to provide a fair comparison between different models.
32
+
33
+ ## How to read this leaderboard
34
+ - Select a domain category to view specialized rankings
35
+ - Higher ELO scores indicate better performance
36
+ - Click on any model name to learn more about it
37
  """
38
  LLM_BENCHMARKS_TEXT = """
39
+ # About the MLE-Dojo Benchmark
40
+
41
+ ## Evaluation Methodology
42
  The MLE-Dojo benchmark evaluates models across various domains including:
43
+
44
+ - **MLE-Lite**: Basic machine learning engineering tasks
45
+ - **Tabular**: Data manipulation, analysis, and modeling with structured data
46
+ - **NLP**: Natural language processing tasks including classification, generation, and understanding
47
+ - **CV**: Computer vision tasks including image classification, object detection, and generation
48
+
49
+ Our evaluation uses a sophisticated ELO rating system that considers the relative performance of models against each other.
50
+
51
+ ## Contact
52
+ For more information or to submit your model, please contact us at [email protected]
53
  """
54
+ TITLE = "<h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>"
55
+ custom_css = ""
56
+ REPO_ID = "your/space-id"
57
+ def add_new_eval(*args): return "Submission placeholder."
58
 
59
  # --- Elo Leaderboard Configuration ---
60
  # Enhanced data with Rank (placeholder), Organizer, License, and URL
 
71
 
72
  # Add organization logos (for visual enhancement)
73
  org_logos = {
74
+ 'OpenAI': 'πŸ“±', # You can replace these with actual icon URLs in production
75
  'DeepSeek': 'πŸ”',
76
  'Google': '🌐',
77
  'Default': 'πŸ€–'
 
81
  master_df = pd.DataFrame(data)
82
 
83
  # Add last updated timestamp
84
+ last_updated = datetime.now().strftime("%B %d, %Y at %H:%M:%S")
85
 
86
  # Define categories with fancy icons
87
  CATEGORIES = [
88
+ ("πŸ† Overall", "Overall"),
89
+ ("πŸ’‘ MLE-Lite", "MLE-Lite"),
90
+ ("πŸ“Š Tabular", "Tabular"),
91
+ ("πŸ“ NLP", "NLP"),
92
  ("πŸ‘οΈ CV", "CV")
93
  ]
94
+ DEFAULT_CATEGORY = "Overall"
 
95
 
96
+ # Map user-facing categories to DataFrame column names
97
  category_to_column = {
 
98
  "MLE-Lite": "MLE-Lite_Elo",
99
  "Tabular": "Tabular_Elo",
100
  "NLP": "NLP_Elo",
101
  "CV": "CV_Elo",
102
+ "Overall": "Overall"
103
  }
104
 
105
  # --- Helper function to update leaderboard ---
106
  def update_leaderboard(category_label):
107
  """
108
+ Enhanced function to update the leaderboard with visual improvements
109
  """
110
+ # Extract the category value from the label if it's a tuple (icon, value)
111
+ if isinstance(category_label, tuple):
112
+ category = category_label[1]
113
+ else:
114
+ # For backward compatibility or direct values
115
+ category = category_label.split(" ")[-1] if " " in category_label else category_label
116
+
117
+ score_column = category_to_column.get(category)
118
  if score_column is None or score_column not in master_df.columns:
119
+ print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
120
+ score_column = category_to_column[DEFAULT_CATEGORY]
 
121
  if score_column not in master_df.columns:
122
  print(f"Error: Default column '{score_column}' also not found.")
 
123
  return pd.DataFrame({
124
+ "Rank": [],
125
+ "Model": [],
126
+ "Organizer": [],
127
+ "License": [],
128
+ "Elo Score": []
129
  })
130
 
131
  # Select base columns + the score column for sorting
 
135
  # Sort by the selected 'Elo Score' descending
136
  df.sort_values(by=score_column, ascending=False, inplace=True)
137
 
138
+ # Add Rank with just numbers (no medals)
139
  df.reset_index(drop=True, inplace=True)
140
+ df.insert(0, 'Rank', df.index.map(lambda idx: f"{idx + 1}"))
141
+
142
+ # Add organization icons to model names
143
  df['Model'] = df.apply(
144
  lambda row: f"""<div style="display: flex; align-items: center;">
145
  <span style="font-size: 1.5em; margin-right: 10px;">{org_logos.get(row['organizer'], org_logos['Default'])}</span>
146
+ <a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank'
147
  style='color: #0066cc; text-decoration: none; font-weight: 500; font-size: 1.05em;'>
148
  {row['model_name']}
149
  </a>
 
151
  axis=1
152
  )
153
 
154
+ # Format Elo scores with visual indicators
155
  df['Elo Display'] = df[score_column].apply(
156
+ lambda score: f"""<div style="display: flex; align-items: center;">
157
+ <span style="font-weight: bold; color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'}">
158
  {score}
159
  </span>
160
+ <div style="margin-left: 10px; height: 12px; width: 60px; background-color: #eaeaea; border-radius: 6px; overflow: hidden;">
161
+ <div style="height: 100%; width: {min(100, max(5, (score-700)/7))}%; background-color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'};"></div>
162
  </div>
163
  </div>"""
164
  )
165
 
166
  # Rename columns for display
167
+ df.rename(columns={score_column: 'Elo Score'}, inplace=True)
168
+ df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
169
 
170
  # Select and reorder columns for final display
171
+ final_columns = ["Rank", "Model", "Organizer", "License", "Elo Display"]
172
+ df = df[final_columns]
 
 
 
 
 
 
173
 
174
+ # Rename for display
175
+ df.columns = ["Rank", "Model", "Organization", "License", f"Elo Score ({category})"]
176
+
177
+ return df
178
 
179
  # --- Mock/Placeholder functions/data for other tabs ---
180
+ print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
181
  finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
182
  running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
183
  pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
 
186
 
187
  # --- Keep restart function if relevant ---
188
  def restart_space():
189
+ print(f"Attempting to restart space: {REPO_ID}")
190
+ # Replace with your actual space restart mechanism if needed
 
 
 
191
 
192
  # --- Enhanced CSS for beauty and readability ---
193
+ # FIXED CSS with better contrast, improved scrolling for tables, and other fixes
194
  enhanced_css = """
195
  /* Base styling */
196
  :root {
 
198
  --secondary-color: #2ec27e;
199
  --accent-color: #e5a50a;
200
  --warning-color: #ff7800;
201
+ --text-color: #333333;
202
+ --background-color: #e9edf1; /* Lightened background */
203
+ --card-background: #ffffff;
204
+ --border-color: #c0c9d6; /* Darkened border */
205
+ --shadow-color: rgba(0, 0, 0, 0.12); /* Increased shadow opacity */
206
  }
207
 
208
  /* Typography */
 
211
  font-size: 16px !important;
212
  line-height: 1.6 !important;
213
  color: var(--text-color) !important;
214
+ background-color: var(--background-color) !important;
215
  }
216
 
217
  /* Headings */
 
248
  border-radius: 12px !important;
249
  overflow: hidden !important;
250
  box-shadow: 0 4px 12px var(--shadow-color) !important;
251
+ background-color: var(--card-background);
252
  }
253
 
254
  .tab-nav button {
 
258
  border-radius: 0 !important;
259
  transition: all 0.2s ease !important;
260
  border-bottom: 2px solid transparent !important;
261
+ background-color: transparent !important;
262
  color: var(--text-color) !important;
263
  }
264
 
265
  .tab-nav button.selected {
266
+ background-color: transparent !important;
267
  color: var(--primary-color) !important;
268
  font-weight: 600 !important;
269
  border-bottom: 2px solid var(--primary-color) !important;
270
  }
271
 
272
  /* Card styling */
273
+ .gradio-container .gr-box, .gradio-container .gr-panel {
274
  border-radius: 12px !important;
275
  border: 1px solid var(--border-color) !important;
276
  box-shadow: 0 4px 12px var(--shadow-color) !important;
277
  overflow: hidden !important;
278
+ background-color: var(--card-background) !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  }
280
 
281
+ /* Table styling - FIXING SCROLLING ISSUES */
282
  table {
283
  width: 100% !important;
284
+ border-collapse: separate !important;
285
  border-spacing: 0 !important;
286
+ margin: 1.5rem 0 !important;
287
+ border-radius: 8px !important;
288
+ overflow: visible !important; /* Changed from hidden to visible */
289
+ box-shadow: 0 4px 12px var(--shadow-color) !important;
290
+ background-color: var(--card-background);
291
+ }
292
+
293
+ /* Data table container - ensure scrolling works */
294
+ .gr-table-container {
295
+ overflow: auto !important;
296
+ max-height: 600px !important; /* Add max height to ensure scrolling */
297
+ margin-bottom: 20px !important;
298
  }
299
 
300
  th {
301
+ background-color: #e0ebff !important; /* Darker header background */
302
  color: var(--primary-color) !important;
303
  font-weight: 600 !important;
304
+ padding: 1rem !important;
305
+ font-size: 1.1rem !important;
306
  text-align: left !important;
307
  border-bottom: 2px solid var(--primary-color) !important;
308
+ position: sticky !important; /* Keep headers visible when scrolling */
309
+ top: 0 !important;
310
+ z-index: 10 !important;
311
  }
312
 
313
  td {
314
+ padding: 1rem !important;
315
  border-bottom: 1px solid var(--border-color) !important;
316
  font-size: 1rem !important;
317
  vertical-align: middle !important;
318
+ background-color: var(--card-background);
 
319
  }
320
 
321
  tr:last-child td {
 
323
  }
324
 
325
  tr:nth-child(even) td {
326
+ background-color: #f0f5ff !important; /* Increased contrast for even rows */
327
  }
328
 
329
  tr:hover td {
330
+ background-color: #e0ebff !important; /* Darker hover color */
331
  }
332
 
333
  /* Button styling */
 
355
  flex-wrap: wrap !important;
356
  gap: 10px !important;
357
  margin: 1rem 0 !important;
 
 
 
 
358
  }
359
 
360
+ .gr-radio label {
361
+ background-color: #f0f5ff !important; /* Darker radio button background */
362
  border: 1px solid var(--border-color) !important;
363
  border-radius: 8px !important;
364
  padding: 0.7rem 1.2rem !important;
 
370
  align-items: center !important;
371
  gap: 8px !important;
372
  color: var(--text-color) !important;
 
 
 
 
 
 
 
 
 
373
  }
374
 
 
375
  .gr-radio label:hover {
376
+ background-color: #e0e9f7 !important;
377
+ border-color: #a0b0c0 !important; /* Darker border on hover */
378
  }
379
 
380
  .gr-radio label.selected {
381
+ background-color: #d0dfff !important; /* Darker selected background */
382
  border-color: var(--primary-color) !important;
383
  color: var(--primary-color) !important;
384
  font-weight: 600 !important;
385
  }
386
 
387
  /* Input fields */
388
+ input, textarea, select {
389
  font-size: 1rem !important;
390
  padding: 0.8rem !important;
391
  border-radius: 8px !important;
 
393
  transition: all 0.2s ease !important;
394
  background-color: #ffffff !important;
395
  color: var(--text-color) !important;
 
 
396
  }
397
 
398
+ input:focus, textarea:focus, select:focus {
399
  border-color: var(--primary-color) !important;
400
  box-shadow: 0 0 0 2px rgba(26, 95, 180, 0.2) !important;
401
  outline: none !important;
 
407
  overflow: hidden !important;
408
  margin: 1rem 0 !important;
409
  border: 1px solid var(--border-color) !important;
410
+ background-color: var(--card-background);
 
411
  }
412
 
413
+ .gr-accordion-header {
414
+ padding: 1rem !important;
415
+ background-color: #f0f5ff !important; /* Darker accordion header */
 
 
 
 
 
 
 
 
416
  font-weight: 600 !important;
417
  font-size: 1.1rem !important;
418
  color: var(--text-color) !important;
419
  border-bottom: 1px solid var(--border-color) !important;
 
 
 
 
 
420
  }
421
 
422
+ .gr-accordion-content {
423
+ padding: 1rem !important;
424
+ background-color: var(--card-background) !important;
 
 
 
 
425
  }
426
 
 
427
  /* Markdown text improvements */
428
+ .markdown-text {
429
  font-size: 1.05rem !important;
430
  line-height: 1.7 !important;
431
  color: var(--text-color) !important;
 
432
  }
433
 
434
  .markdown-text p {
 
444
  margin-bottom: 0.5rem !important;
445
  }
446
 
447
+ .markdown-text strong {
448
  font-weight: 600 !important;
449
+ color: #111 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
 
452
+ /* Status indicators */
453
  .status-badge {
454
  display: inline-block;
455
  padding: 0.3rem 0.7rem;
 
458
  font-weight: 500;
459
  text-align: center;
460
  }
461
+
462
+ .status-pending {
463
+ background-color: #fff3cc;
464
+ color: #b58a00;
465
+ border: 1px solid #ffd74d;
466
+ }
467
+
468
+ .status-running {
469
+ background-color: #ccebff;
470
+ color: #0066cc;
471
+ border: 1px solid #66b3ff;
472
+ }
473
+
474
+ .status-completed {
475
+ background-color: #d6f5e6;
476
+ color: #00875a;
477
+ border: 1px solid #57d9a3;
478
+ }
479
 
480
  /* Footer */
481
  .footer {
 
483
  padding: 1.5rem 1rem;
484
  text-align: center;
485
  font-size: 0.9rem;
486
+ color: #333;
487
  border-top: 1px solid var(--border-color);
488
+ background-color: #d9e0e8; /* Darker footer background */
489
  }
490
 
491
  /* Enhanced leaderboard title area */
492
  .leaderboard-header {
493
  display: flex;
494
+ align-items: center;
 
495
  justify-content: space-between;
496
  margin-bottom: 1.5rem;
497
  padding: 1.5rem;
498
+ background-color: var(--card-background);
499
  border-radius: 12px;
500
  border: 1px solid var(--border-color);
501
  box-shadow: 0 4px 12px var(--shadow-color);
 
502
  }
 
 
 
 
 
 
 
503
 
504
  .leaderboard-title {
505
+ font-size: 2.2rem;
506
  font-weight: 700;
507
  color: var(--primary-color);
508
+ margin: 0;
509
  display: flex;
510
  align-items: center;
511
+ gap: 0.5rem;
 
 
 
 
 
 
 
512
  }
513
 
514
  .leaderboard-subtitle {
515
  font-size: 1.1rem;
516
+ color: #444; /* Darker subtitle text */
517
+ margin-top: 0.5rem;
 
 
 
518
  }
519
 
520
  .timestamp {
521
  font-size: 0.85rem;
522
+ color: #444; /* Darker timestamp text */
523
  font-style: italic;
524
+ background-color: #f0f5ff; /* Darker timestamp background */
525
  padding: 5px 10px;
526
  border-radius: 6px;
 
527
  }
528
+
529
+ /* Category selector buttons */
530
+ .category-buttons {
531
+ display: flex;
532
+ flex-wrap: wrap;
533
+ gap: 10px;
534
+ margin-bottom: 1.5rem;
535
+ }
536
+
537
+ .category-button {
538
+ padding: 0.7rem 1.2rem;
539
+ background-color: #e0ebff; /* Darker button background */
540
+ border: 1px solid #b0d0ff;
541
+ border-radius: 8px;
542
+ font-weight: 500;
543
+ cursor: pointer;
544
+ transition: all 0.2s ease;
545
+ display: flex;
546
+ align-items: center;
547
+ gap: 8px;
548
  }
549
 
550
+ .category-button:hover {
551
+ background-color: #c0d0ff; /* Darker hover state */
552
+ border-color: #80a0ff;
553
+ }
554
 
555
+ .category-button.active {
556
+ background-color: var(--primary-color);
557
+ color: white;
558
+ border-color: var(--primary-color);
559
+ }
560
 
561
  /* Logo and brand styling */
562
  .logo {
563
+ font-size: 2.5em;
564
  margin-right: 0.5rem;
565
  }
566
 
567
+ /* Properly display sorting arrows */
568
+ table th.sort-asc::after {
569
+ content: " ↑";
570
+ color: var(--primary-color);
571
+ }
572
+
573
+ table th.sort-desc::after {
574
+ content: " ↓";
575
+ color: var(--primary-color);
576
+ }
577
+
578
  /* Style for About section cards */
579
  .about-card {
580
+ background-color: #f0f5ff; /* Darker card background */
581
  padding: 20px;
582
  border-radius: 12px;
583
+ height: 100%;
584
  border: 1px solid var(--border-color);
 
 
 
 
585
  }
586
  .about-card h3 {
587
  text-align: center;
588
  margin-top: 0;
 
589
  color: var(--primary-color);
590
  }
591
  .about-card p {
592
  color: var(--text-color);
593
  font-size: 0.95rem;
594
  line-height: 1.6;
 
595
  }
596
  .about-card-icon {
597
  font-size: 2.5em;
598
  text-align: center;
599
  margin-bottom: 15px;
600
  display: block;
 
601
  }
602
+
603
+ /* Ensure the table container has a fixed height and scrolls properly */
604
+ #leaderboard-table {
605
+ overflow: auto !important;
606
+ max-height: 500px !important;
607
+ }
608
+
609
+ /* Fix for dataframe component scrolling */
610
+ .gradio-dataframe {
611
+ overflow: auto !important;
612
+ max-height: 500px !important;
613
+ }
614
+
615
+ /* Fix sorting issues */
616
+ .sort-column {
617
+ cursor: pointer;
618
  }
619
  """
620
 
621
+ # Combine with any existing CSS
622
+ custom_css = enhanced_css
 
 
 
 
623
 
624
  # --- Gradio App Definition ---
625
+ demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 
626
 
627
  with demo:
628
  # Enhanced header with timestamp
 
642
  </div>
643
  """)
644
 
645
+ # Introduction with enhanced styling
646
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
647
 
648
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
649
  with gr.TabItem("πŸ“Š Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
650
  with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651
  gr.HTML("""
652
+ <h2 style="display: flex; align-items: center; gap: 10px;">
653
+ <span style="font-size: 1.3em;">πŸ“ˆ</span> Model Performance Rankings
654
+ </h2>
655
+ <p class="leaderboard-subtitle">Select a category to view specialized performance metrics</p>
 
 
 
656
  """)
657
+
658
+ # Enhanced category selector
659
+ category_selector = gr.Radio(
660
+ choices=[x[0] for x in CATEGORIES],
661
+ label="Select Performance Domain:",
662
+ value="πŸ† Overall",
663
+ interactive=True,
664
+ elem_classes="fancy-radio"
665
+ )
666
+
667
+ # Visual separator
668
+ gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
669
+
670
+ # Enhanced leaderboard table
671
+ leaderboard_df_component = gr.Dataframe(
672
+ value=update_leaderboard(DEFAULT_CATEGORY),
673
+ headers=["Rank", "Model", "Organization", "License", f"Elo Score ({DEFAULT_CATEGORY})"],
674
+ datatype=["html", "html", "str", "str", "html"],
675
+ interactive=False,
676
+ row_count=(len(master_df), "fixed"),
677
+ col_count=(5, "fixed"),
678
+ wrap=True,
679
+ elem_id="leaderboard-table",
680
+ )
681
+
682
+ # Stats cards (visual enhancement)
683
+ with gr.Row():
684
+ with gr.Column(scale=1):
685
+ gr.HTML(f"""
686
+ <div style="background-color: #f0f5ff; padding: 20px; border-radius: 12px; text-align: center;">
687
+ <div style="font-size: 2em;">πŸ”</div>
688
+ <div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
689
+ <div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
690
  </div>
691
  """)
692
+ with gr.Column(scale=1):
693
+ gr.HTML(f"""
694
+ <div style="background-color: #e6f7ef; padding: 20px; border-radius: 12px; text-align: center;">
695
+ <div style="font-size: 2em;">🌐</div>
696
+ <div style="font-size: 2em; font-weight: bold; color: #00875a;">{master_df['organizer'].nunique()}</div>
697
+ <div style="font-size: 1.1em; color: #666;">Organizations</div>
 
 
698
  </div>
699
  """)
700
+ with gr.Column(scale=1):
701
+ gr.HTML(f"""
702
+ <div style="background-color: #fff8e0; padding: 20px; border-radius: 12px; text-align: center;">
703
+ <div style="font-size: 2em;">πŸ…</div>
704
+ <div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)}</div>
705
+ <div style="font-size: 1.1em; color: #666;">Performance Domains</div>
706
  </div>
707
  """)
708
+
709
+ # Link the radio button change to the update function
710
+ category_selector.change(
711
+ fn=update_leaderboard,
712
+ inputs=category_selector,
713
+ outputs=leaderboard_df_component
714
+ )
715
 
716
+ with gr.TabItem("πŸ“š About", elem_id="llm-benchmark-tab-about", id=1):
717
+ # Enhanced about section
 
 
 
 
 
 
 
 
718
  gr.HTML("""
719
+ <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
720
+ <div style="font-size: 4em;">πŸ§ͺ</div>
721
  <div>
722
+ <h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
723
+ <p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
724
  </div>
725
  </div>
726
  """)
727
+
728
+ # Use the LLM_BENCHMARKS_TEXT variable
729
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
730
+
731
+ # Add methodology cards for visual enhancement
732
+ with gr.Row():
733
+ with gr.Column():
734
+ gr.HTML("""
735
+ <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
736
+ <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ’‘</div>
737
+ <h3 style="text-align: center; margin-top: 0;">MLE-Lite</h3>
738
+ <p>Evaluates a model's ability to handle basic machine learning engineering tasks including
739
+ data preprocessing, feature engineering, model selection, and basic deployment.</p>
740
+ </div>
741
+ """)
742
+ with gr.Column():
743
+ gr.HTML("""
744
+ <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
745
+ <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ“Š</div>
746
+ <h3 style="text-align: center; margin-top: 0;">Tabular</h3>
747
+ <p>Tests a model's ability to process, analyze and model structured data, including
748
+ statistical analysis,statistical analysis, predictive modeling, and data visualization with tabular datasets.</p>
749
+ </div>
750
+ """)
751
+
752
+ with gr.Row():
753
+ with gr.Column():
754
+ gr.HTML("""
755
+ <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
756
+ <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ“</div>
757
+ <h3 style="text-align: center; margin-top: 0;">NLP</h3>
758
+ <p>Evaluates natural language processing capabilities including text classification,
759
+ sentiment analysis, entity recognition, text generation, and language understanding.</p>
760
+ </div>
761
+ """)
762
+ with gr.Column():
763
+ gr.HTML("""
764
+ <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
765
+ <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ‘οΈ</div>
766
+ <h3 style="text-align: center; margin-top: 0;">CV</h3>
767
+ <p>Tests computer vision capabilities including image classification, object detection,
768
+ image generation, and visual understanding tasks across various domains.</p>
769
+ </div>
770
+ """)
771
 
772
+ # Optional: Uncomment if you want to re-enable the Submit tab
773
+ # with gr.TabItem("πŸš€ Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
774
+ # with gr.Column():
775
+ # gr.HTML("""
776
+ # <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
777
+ # <div style="font-size: 4em;">πŸš€</div>
778
+ # <div>
779
+ # <h2 style="margin: 0;">Submit Your Model for Evaluation</h2>
780
+ # <p style="margin: 5px 0 0 0; color: #666;">Add your model to the MLE-Dojo leaderboard</p>
781
+ # </div>
782
+ # </div>
783
+ # """)
784
+ #
785
+ # with gr.Row():
786
+ # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
787
+ #
788
+ # with gr.Column():
789
+ # with gr.Accordion(f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
790
+ # finished_eval_table = gr.components.Dataframe(
791
+ # value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
792
+ # )
793
+ # with gr.Accordion(f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
794
+ # running_eval_table = gr.components.Dataframe(
795
+ # value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
796
+ # )
797
+ # with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
798
+ # pending_eval_table = gr.components.Dataframe(
799
+ # value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
800
+ # )
801
+ #
802
+ # gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
803
+ #
804
+ # gr.HTML("""
805
+ # <h2 style="display: flex; align-items: center; gap: 10px;">
806
+ # <span style="font-size: 1.3em;">πŸ“</span> Model Submission Form
807
+ # </h2>
808
+ # """)
809
+ #
810
+ # with gr.Row():
811
+ # with gr.Column():
812
+ # model_name_textbox = gr.Textbox(
813
+ # label="Model Name (on Hugging Face Hub)",
814
+ # placeholder="Enter your model name...",
815
+ # elem_classes="enhanced-input"
816
+ # )
817
+ # revision_name_textbox = gr.Textbox(
818
+ # label="Revision / Commit Hash",
819
+ # placeholder="main",
820
+ # elem_classes="enhanced-input"
821
+ # )
822
+ # model_type = gr.Dropdown(
823
+ # choices=["Type A", "Type B", "Type C"],
824
+ # label="Model Type",
825
+ # multiselect=False,
826
+ # value=None,
827
+ # interactive=True,
828
+ # elem_classes="enhanced-dropdown"
829
+ # )
830
+ # with gr.Column():
831
+ # precision = gr.Dropdown(
832
+ # choices=["float16", "bfloat16", "float32", "int8", "auto"],
833
+ # label="Precision",
834
+ # multiselect=False,
835
+ # value="auto",
836
+ # interactive=True,
837
+ # elem_classes="enhanced-dropdown"
838
+ # )
839
+ # weight_type = gr.Dropdown(
840
+ # choices=["Original", "Adapter", "Delta"],
841
+ # label="Weights Type",
842
+ # multiselect=False,
843
+ # value="Original",
844
+ # interactive=True,
845
+ # elem_classes="enhanced-dropdown"
846
+ # )
847
+ # base_model_name_textbox = gr.Textbox(
848
+ # label="Base Model (for delta or adapter weights)",
849
+ # placeholder="Only needed for adapter/delta weights",
850
+ # elem_classes="enhanced-input"
851
+ # )
852
+ #
853
+ # submit_button = gr.Button(
854
+ # "Submit for Evaluation",
855
+ # elem_classes="primary-button"
856
+ # )
857
+ # submission_result = gr.Markdown()
858
+ # submit_button.click(
859
+ # add_new_eval,
860
+ # [model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type],
861
+ # submission_result,
862
+ # )
863
+
864
+ # Enhanced citation section
865
+ with gr.Accordion("πŸ“„ Citation", open=False, elem_classes="citation-accordion"):
866
+ gr.HTML("""
867
+ <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
868
+ <div style="font-size: 2.5em;">πŸ“„</div>
869
+ <div>
870
+ <h3 style="margin: 0;">How to Cite This Benchmark</h3>
871
+ <p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research</p>
872
+ </div>
873
+ </div>
874
+ """)
875
+
876
+ citation_button = gr.Textbox(
877
+ value=CITATION_BUTTON_TEXT,
878
+ label=CITATION_BUTTON_LABEL,
879
+ lines=10,
880
+ elem_id="citation-button",
881
+ show_copy_button=True,
882
+ )
883
+
884
+ # Footer
885
  gr.HTML("""
886
  <div class="footer">
887
  <p>Β© 2025 MLE-Dojo Benchmark. All rights reserved.</p>
888
+ <p style="margin-top: 5px; display: flex; justify-content: center; gap: 20px;">
889
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Privacy Policy</a>
890
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Terms of Service</a>
891
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Contact Us</a>
 
894
  """)
895
 
896
  # --- Keep scheduler if relevant ---
 
897
  if __name__ == "__main__":
898
  try:
899
  scheduler = BackgroundScheduler()
900
  if callable(restart_space):
 
901
  if REPO_ID and REPO_ID != "your/space-id":
902
+ scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
 
 
903
  scheduler.start()
904
  else:
905
  print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
 
911
  # --- Launch the app ---
912
  if __name__ == "__main__":
913
  print("Launching Enhanced Gradio App...")
914
+ demo.launch()