Jerrycool commited on
Commit
15173fe
Β·
verified Β·
1 Parent(s): 05732d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +438 -417
app.py CHANGED
@@ -14,7 +14,7 @@ try:
14
  LLM_BENCHMARKS_TEXT,
15
  TITLE,
16
  )
17
- from src.display.css_html_js import custom_css
18
  from src.envs import REPO_ID
19
  from src.submission.submit import add_new_eval
20
  print("Successfully imported from src module.")
@@ -22,39 +22,39 @@ try:
22
  except ImportError:
23
  print("Warning: Using placeholder values because src module imports failed.")
24
  CITATION_BUTTON_LABEL = "Citation"
25
- CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
26
- EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
27
  INTRODUCTION_TEXT = """
28
- # Welcome to the MLE-Dojo Benchmark Leaderboard
29
-
30
  This leaderboard tracks the performance of various AI models across multiple machine learning engineering domains.
31
  Our comprehensive evaluation system uses ELO ratings to provide a fair comparison between different models.
32
-
33
- ## How to read this leaderboard
34
- - Select a domain category to view specialized rankings
35
- - Higher ELO scores indicate better performance
36
- - Click on any model name to learn more about it
37
  """
38
  LLM_BENCHMARKS_TEXT = """
39
- # About the MLE-Dojo Benchmark
40
-
41
- ## Evaluation Methodology
42
  The MLE-Dojo benchmark evaluates models across various domains including:
43
-
44
- - **MLE-Lite**: Basic machine learning engineering tasks
45
- - **Tabular**: Data manipulation, analysis, and modeling with structured data
46
- - **NLP**: Natural language processing tasks including classification, generation, and understanding
47
- - **CV**: Computer vision tasks including image classification, object detection, and generation
48
-
49
- Our evaluation uses a sophisticated ELO rating system that considers the relative performance of models against each other.
50
-
51
- ## Contact
52
- For more information or to submit your model, please contact us at [email protected]
53
  """
54
- TITLE = "<h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>"
55
- custom_css = ""
56
- REPO_ID = "your/space-id"
57
- def add_new_eval(*args): return "Submission placeholder."
58
 
59
  # --- Elo Leaderboard Configuration ---
60
  # Enhanced data with Rank (placeholder), Organizer, License, and URL
@@ -71,7 +71,7 @@ data = [
71
 
72
  # Add organization logos (for visual enhancement)
73
  org_logos = {
74
- 'OpenAI': 'πŸ“±', # You can replace these with actual icon URLs in production
75
  'DeepSeek': 'πŸ”',
76
  'Google': '🌐',
77
  'Default': 'πŸ€–'
@@ -81,51 +81,50 @@ org_logos = {
81
  master_df = pd.DataFrame(data)
82
 
83
  # Add last updated timestamp
84
- last_updated = datetime.now().strftime("%B %d, %Y at %H:%M:%S")
85
 
86
  # Define categories with fancy icons
87
  CATEGORIES = [
88
- ("πŸ† Overall", "Overall"),
89
- ("πŸ’‘ MLE-Lite", "MLE-Lite"),
90
- ("πŸ“Š Tabular", "Tabular"),
91
- ("πŸ“ NLP", "NLP"),
92
  ("πŸ‘οΈ CV", "CV")
93
  ]
94
- DEFAULT_CATEGORY = "Overall"
 
95
 
96
- # Map user-facing categories to DataFrame column names
97
  category_to_column = {
 
98
  "MLE-Lite": "MLE-Lite_Elo",
99
  "Tabular": "Tabular_Elo",
100
  "NLP": "NLP_Elo",
101
  "CV": "CV_Elo",
102
- "Overall": "Overall"
103
  }
104
 
105
  # --- Helper function to update leaderboard ---
106
  def update_leaderboard(category_label):
107
  """
108
- Enhanced function to update the leaderboard with visual improvements
109
  """
110
- # Extract the category value from the label if it's a tuple (icon, value)
111
- if isinstance(category_label, tuple):
112
- category = category_label[1]
113
- else:
114
- # For backward compatibility or direct values
115
- category = category_label.split(" ")[-1] if " " in category_label else category_label
116
-
117
- score_column = category_to_column.get(category)
118
  if score_column is None or score_column not in master_df.columns:
119
- print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
120
- score_column = category_to_column[DEFAULT_CATEGORY]
 
121
  if score_column not in master_df.columns:
122
  print(f"Error: Default column '{score_column}' also not found.")
 
123
  return pd.DataFrame({
124
- "Rank": [],
125
- "Model": [],
126
- "Organizer": [],
127
- "License": [],
128
- "Elo Score": []
129
  })
130
 
131
  # Select base columns + the score column for sorting
@@ -135,27 +134,15 @@ def update_leaderboard(category_label):
135
  # Sort by the selected 'Elo Score' descending
136
  df.sort_values(by=score_column, ascending=False, inplace=True)
137
 
138
- # Add Rank with medal emojis for top 3
139
  df.reset_index(drop=True, inplace=True)
140
-
141
- # Create fancy rank with medals for top positions
142
- def get_rank_display(idx):
143
- if idx == 0:
144
- return "πŸ₯‡ 1"
145
- elif idx == 1:
146
- return "πŸ₯ˆ 2"
147
- elif idx == 2:
148
- return "πŸ₯‰ 3"
149
- else:
150
- return f"{idx + 1}"
151
-
152
- df.insert(0, 'Rank', df.index.map(get_rank_display))
153
-
154
- # Add organization icons to model names
155
  df['Model'] = df.apply(
156
  lambda row: f"""<div style="display: flex; align-items: center;">
157
  <span style="font-size: 1.5em; margin-right: 10px;">{org_logos.get(row['organizer'], org_logos['Default'])}</span>
158
- <a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank'
159
  style='color: #0066cc; text-decoration: none; font-weight: 500; font-size: 1.05em;'>
160
  {row['model_name']}
161
  </a>
@@ -163,33 +150,34 @@ def update_leaderboard(category_label):
163
  axis=1
164
  )
165
 
166
- # Format Elo scores with visual indicators
167
  df['Elo Display'] = df[score_column].apply(
168
- lambda score: f"""<div style="display: flex; align-items: center;">
169
- <span style="font-weight: bold; color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'}">
170
  {score}
171
  </span>
172
- <div style="margin-left: 10px; height: 12px; width: 60px; background-color: #eaeaea; border-radius: 6px; overflow: hidden;">
173
- <div style="height: 100%; width: {min(100, max(5, (score-700)/7))}%; background-color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'};"></div>
174
  </div>
175
  </div>"""
176
  )
177
 
178
  # Rename columns for display
179
- df.rename(columns={score_column: 'Elo Score'}, inplace=True)
180
- df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
181
 
182
  # Select and reorder columns for final display
183
- final_columns = ["Rank", "Model", "Organizer", "License", "Elo Display"]
184
- df = df[final_columns]
 
 
 
 
 
 
185
 
186
- # Rename for display
187
- df.columns = ["Rank", "Model", "Organization", "License", f"Elo Score ({category})"]
188
-
189
- return df
190
 
191
  # --- Mock/Placeholder functions/data for other tabs ---
192
- print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
193
  finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
194
  running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
195
  pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
@@ -198,10 +186,14 @@ EVAL_TYPES = ["str", "str", "str", "str"]
198
 
199
  # --- Keep restart function if relevant ---
200
  def restart_space():
201
- print(f"Attempting to restart space: {REPO_ID}")
202
- # Replace with your actual space restart mechanism if needed
 
 
 
203
 
204
  # --- Enhanced CSS for beauty and readability ---
 
205
  enhanced_css = """
206
  /* Base styling */
207
  :root {
@@ -209,11 +201,11 @@ enhanced_css = """
209
  --secondary-color: #2ec27e;
210
  --accent-color: #e5a50a;
211
  --warning-color: #ff7800;
212
- --text-color: #333333;
213
- --background-color: #ffffff;
214
- --card-background: #f9f9f9;
215
  --border-color: #e0e0e0;
216
- --shadow-color: rgba(0, 0, 0, 0.1);
217
  }
218
 
219
  /* Typography */
@@ -222,7 +214,7 @@ body, .gradio-container {
222
  font-size: 16px !important;
223
  line-height: 1.6 !important;
224
  color: var(--text-color) !important;
225
- background-color: var(--background-color) !important;
226
  }
227
 
228
  /* Headings */
@@ -259,6 +251,7 @@ h3 {
259
  border-radius: 12px !important;
260
  overflow: hidden !important;
261
  box-shadow: 0 4px 12px var(--shadow-color) !important;
 
262
  }
263
 
264
  .tab-nav button {
@@ -267,60 +260,88 @@ h3 {
267
  padding: 0.8rem 1.5rem !important;
268
  border-radius: 0 !important;
269
  transition: all 0.2s ease !important;
 
 
 
270
  }
271
 
272
  .tab-nav button.selected {
273
- background-color: var(--primary-color) !important;
274
- color: white !important;
275
  font-weight: 600 !important;
 
276
  }
277
 
278
  /* Card styling */
279
- .gradio-container .gr-box, .gradio-container .gr-panel {
280
  border-radius: 12px !important;
281
  border: 1px solid var(--border-color) !important;
282
  box-shadow: 0 4px 12px var(--shadow-color) !important;
283
  overflow: hidden !important;
 
 
 
284
  }
 
 
 
 
 
 
 
 
 
 
 
285
 
286
  /* Table styling */
 
 
 
 
 
 
 
 
287
  table {
288
  width: 100% !important;
289
- border-collapse: separate !important;
290
  border-spacing: 0 !important;
291
- margin: 1.5rem 0 !important;
292
- border-radius: 8px !important;
293
- overflow: hidden !important;
294
- box-shadow: 0 4px 12px var(--shadow-color) !important;
295
  }
296
 
297
  th {
298
- background-color: #f0f5ff !important;
299
  color: var(--primary-color) !important;
300
  font-weight: 600 !important;
301
- padding: 1rem !important;
302
- font-size: 1.1rem !important;
303
  text-align: left !important;
304
  border-bottom: 2px solid var(--primary-color) !important;
 
 
 
305
  }
306
 
307
  td {
308
- padding: 1rem !important;
309
  border-bottom: 1px solid var(--border-color) !important;
310
  font-size: 1rem !important;
311
  vertical-align: middle !important;
 
 
312
  }
313
 
314
- tr:nth-child(even) {
315
- background-color: #f8fafd !important;
316
  }
317
 
318
- tr:hover {
319
- background-color: #edf2fb !important;
320
  }
321
 
322
- tr:first-child td {
323
- border-top: none !important;
324
  }
325
 
326
  /* Button styling */
@@ -348,9 +369,13 @@ button.primary:hover, .gr-button.primary:hover {
348
  flex-wrap: wrap !important;
349
  gap: 10px !important;
350
  margin: 1rem 0 !important;
 
 
 
 
351
  }
352
 
353
- .gr-radio label {
354
  background-color: #f5f7fa !important;
355
  border: 1px solid var(--border-color) !important;
356
  border-radius: 8px !important;
@@ -362,8 +387,19 @@ button.primary:hover, .gr-button.primary:hover {
362
  display: flex !important;
363
  align-items: center !important;
364
  gap: 8px !important;
 
 
 
 
 
 
 
 
 
 
365
  }
366
 
 
367
  .gr-radio label:hover {
368
  background-color: #eaeef3 !important;
369
  border-color: #c0c9d6 !important;
@@ -377,15 +413,19 @@ button.primary:hover, .gr-button.primary:hover {
377
  }
378
 
379
  /* Input fields */
380
- input, textarea, select {
381
  font-size: 1rem !important;
382
  padding: 0.8rem !important;
383
  border-radius: 8px !important;
384
  border: 1px solid var(--border-color) !important;
385
  transition: all 0.2s ease !important;
 
 
 
 
386
  }
387
 
388
- input:focus, textarea:focus, select:focus {
389
  border-color: var(--primary-color) !important;
390
  box-shadow: 0 0 0 2px rgba(26, 95, 180, 0.2) !important;
391
  outline: none !important;
@@ -397,25 +437,48 @@ input:focus, textarea:focus, select:focus {
397
  overflow: hidden !important;
398
  margin: 1rem 0 !important;
399
  border: 1px solid var(--border-color) !important;
 
 
400
  }
401
 
402
- .gr-accordion-header {
403
- padding: 1rem !important;
 
 
 
 
 
 
 
 
404
  background-color: #f5f7fa !important;
405
  font-weight: 600 !important;
406
  font-size: 1.1rem !important;
407
  color: var(--text-color) !important;
 
 
 
 
 
 
408
  }
409
 
410
- .gr-accordion-content {
411
- padding: 1rem !important;
412
- background-color: white !important;
 
 
 
 
413
  }
414
 
 
415
  /* Markdown text improvements */
416
- .markdown-text {
417
  font-size: 1.05rem !important;
418
  line-height: 1.7 !important;
 
 
419
  }
420
 
421
  .markdown-text p {
@@ -431,12 +494,26 @@ input:focus, textarea:focus, select:focus {
431
  margin-bottom: 0.5rem !important;
432
  }
433
 
434
- .markdown-text strong {
435
  font-weight: 600 !important;
436
- color: #333 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  }
438
 
439
- /* Status indicators */
440
  .status-badge {
441
  display: inline-block;
442
  padding: 0.3rem 0.7rem;
@@ -445,127 +522,140 @@ input:focus, textarea:focus, select:focus {
445
  font-weight: 500;
446
  text-align: center;
447
  }
448
-
449
- .status-pending {
450
- background-color: #fff8e0;
451
- color: #b58a00;
452
- border: 1px solid #ffd74d;
453
- }
454
-
455
- .status-running {
456
- background-color: #e0f2ff;
457
- color: #0066cc;
458
- border: 1px solid #66b3ff;
459
- }
460
-
461
- .status-completed {
462
- background-color: #e6f7ef;
463
- color: #00875a;
464
- border: 1px solid #57d9a3;
465
- }
466
 
467
  /* Footer */
468
  .footer {
469
  margin-top: 2rem;
470
- padding: 1rem;
471
  text-align: center;
472
  font-size: 0.9rem;
473
- color: #666;
474
  border-top: 1px solid var(--border-color);
 
475
  }
476
 
477
- /* Enhanced leaderboard title */
478
  .leaderboard-header {
479
  display: flex;
480
- align-items: center;
 
481
  justify-content: space-between;
482
  margin-bottom: 1.5rem;
483
- padding-bottom: 1rem;
484
- border-bottom: 2px solid var(--border-color);
 
 
 
 
 
 
 
 
 
 
485
  }
486
 
 
487
  .leaderboard-title {
488
- font-size: 2.2rem;
489
  font-weight: 700;
490
  color: var(--primary-color);
491
- margin: 0;
492
  display: flex;
493
  align-items: center;
494
- gap: 0.5rem;
 
 
 
 
 
 
 
495
  }
496
 
497
  .leaderboard-subtitle {
498
  font-size: 1.1rem;
499
  color: #666;
500
- margin-top: 0.5rem;
 
 
 
501
  }
502
 
503
  .timestamp {
504
  font-size: 0.85rem;
505
  color: #666;
506
  font-style: italic;
 
 
 
 
507
  }
508
-
509
- /* Category selector buttons */
510
- .category-buttons {
511
- display: flex;
512
- flex-wrap: wrap;
513
- gap: 10px;
514
- margin-bottom: 1.5rem;
515
  }
516
 
517
- .category-button {
518
- padding: 0.7rem 1.2rem;
519
- background-color: #f0f5ff;
520
- border: 1px solid #d0e0ff;
521
- border-radius: 8px;
522
- font-weight: 500;
523
- cursor: pointer;
524
- transition: all 0.2s ease;
525
- display: flex;
526
- align-items: center;
527
- gap: 8px;
528
- }
529
 
530
- .category-button:hover {
531
- background-color: #e0ebff;
532
- border-color: #b0d0ff;
533
- }
534
-
535
- .category-button.active {
536
- background-color: var(--primary-color);
537
- color: white;
538
- border-color: var(--primary-color);
539
- }
540
 
541
  /* Logo and brand styling */
542
  .logo {
543
- font-size: 2.5em;
544
  margin-right: 0.5rem;
545
  }
546
 
547
- /* Medal styling for top ranks */
548
- .rank-1 {
549
- color: #ffd700;
550
- font-weight: bold;
 
 
 
 
 
 
 
551
  }
552
-
553
- .rank-2 {
554
- color: #c0c0c0;
555
- font-weight: bold;
 
556
  }
557
-
558
- .rank-3 {
559
- color: #cd7f32;
560
- font-weight: bold;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  }
562
  """
563
 
564
- # Combine with any existing CSS
565
- custom_css = enhanced_css + custom_css
 
 
 
 
566
 
567
  # --- Gradio App Definition ---
568
- demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 
569
 
570
  with demo:
571
  # Enhanced header with timestamp
@@ -585,250 +675,175 @@ with demo:
585
  </div>
586
  """)
587
 
588
- # Introduction with enhanced styling
589
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
590
 
591
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
592
  with gr.TabItem("πŸ“Š Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
593
  with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
  gr.HTML("""
595
- <h2 style="display: flex; align-items: center; gap: 10px;">
596
- <span style="font-size: 1.3em;">πŸ“ˆ</span> Model Performance Rankings
597
- </h2>
598
- <p class="leaderboard-subtitle">Select a category to view specialized performance metrics</p>
 
 
 
599
  """)
600
-
601
- # Enhanced category selector
602
- category_selector = gr.Radio(
603
- choices=[x[0] for x in CATEGORIES],
604
- label="Select Performance Domain:",
605
- value="πŸ† Overall",
606
- interactive=True,
607
- elem_classes="fancy-radio"
608
- )
609
-
610
- # Visual separator
611
- gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
612
-
613
- # Enhanced leaderboard table
614
- leaderboard_df_component = gr.Dataframe(
615
- value=update_leaderboard(DEFAULT_CATEGORY),
616
- headers=["Rank", "Model", "Organization", "License", f"Elo Score ({DEFAULT_CATEGORY})"],
617
- datatype=["html", "html", "str", "str", "html"],
618
- interactive=False,
619
- row_count=(len(master_df), "fixed"),
620
- col_count=(5, "fixed"),
621
- wrap=True,
622
- elem_id="leaderboard-table",
623
- )
624
-
625
- # Stats cards (visual enhancement)
626
- with gr.Row():
627
- with gr.Column(scale=1):
628
- gr.HTML(f"""
629
- <div style="background-color: #f0f5ff; padding: 20px; border-radius: 12px; text-align: center;">
630
- <div style="font-size: 2em;">πŸ”</div>
631
- <div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
632
- <div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
633
  </div>
634
  """)
635
- with gr.Column(scale=1):
636
- gr.HTML(f"""
637
- <div style="background-color: #e6f7ef; padding: 20px; border-radius: 12px; text-align: center;">
638
- <div style="font-size: 2em;">🌐</div>
639
- <div style="font-size: 2em; font-weight: bold; color: #00875a;">{master_df['organizer'].nunique()}</div>
640
- <div style="font-size: 1.1em; color: #666;">Organizations</div>
 
 
641
  </div>
642
  """)
643
- with gr.Column(scale=1):
644
- gr.HTML(f"""
645
- <div style="background-color: #fff8e0; padding: 20px; border-radius: 12px; text-align: center;">
646
- <div style="font-size: 2em;">πŸ…</div>
647
- <div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)}</div>
648
- <div style="font-size: 1.1em; color: #666;">Performance Domains</div>
649
  </div>
650
  """)
651
-
652
- # Link the radio button change to the update function
653
- category_selector.change(
654
- fn=update_leaderboard,
655
- inputs=category_selector,
656
- outputs=leaderboard_df_component
657
- )
658
 
659
- with gr.TabItem("πŸ“š About", elem_id="llm-benchmark-tab-about", id=1):
660
- # Enhanced about section
 
 
 
 
 
 
 
 
661
  gr.HTML("""
662
- <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
663
- <div style="font-size: 4em;">πŸ§ͺ</div>
664
  <div>
665
- <h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
666
- <p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
667
  </div>
668
  </div>
669
  """)
670
-
671
- # Use the LLM_BENCHMARKS_TEXT variable
672
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
673
-
674
- # Add methodology cards for visual enhancement
675
- with gr.Row():
676
- with gr.Column():
677
- gr.HTML("""
678
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
679
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ’‘</div>
680
- <h3 style="text-align: center; margin-top: 0;">MLE-Lite</h3>
681
- <p>Evaluates a model's ability to handle basic machine learning engineering tasks including
682
- data preprocessing, feature engineering, model selection, and basic deployment.</p>
683
- </div>
684
- """)
685
- with gr.Column():
686
- gr.HTML("""
687
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
688
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ“Š</div>
689
- <h3 style="text-align: center; margin-top: 0;">Tabular</h3>
690
- <p>Tests a model's ability to process, analyze and model structured data, including
691
- statistical analysis,statistical analysis, predictive modeling, and data visualization with tabular datasets.</p>
692
- </div>
693
- """)
694
-
695
- with gr.Row():
696
- with gr.Column():
697
- gr.HTML("""
698
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
699
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ“</div>
700
- <h3 style="text-align: center; margin-top: 0;">NLP</h3>
701
- <p>Evaluates natural language processing capabilities including text classification,
702
- sentiment analysis, entity recognition, text generation, and language understanding.</p>
703
- </div>
704
- """)
705
- with gr.Column():
706
- gr.HTML("""
707
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
708
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ‘οΈ</div>
709
- <h3 style="text-align: center; margin-top: 0;">CV</h3>
710
- <p>Tests computer vision capabilities including image classification, object detection,
711
- image generation, and visual understanding tasks across various domains.</p>
712
- </div>
713
- """)
714
 
715
- # Optional: Uncomment if you want to re-enable the Submit tab
716
- # with gr.TabItem("πŸš€ Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
717
- # with gr.Column():
718
- # gr.HTML("""
719
- # <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
720
- # <div style="font-size: 4em;">πŸš€</div>
721
- # <div>
722
- # <h2 style="margin: 0;">Submit Your Model for Evaluation</h2>
723
- # <p style="margin: 5px 0 0 0; color: #666;">Add your model to the MLE-Dojo leaderboard</p>
724
- # </div>
725
- # </div>
726
- # """)
727
- #
728
- # with gr.Row():
729
- # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
730
- #
731
- # with gr.Column():
732
- # with gr.Accordion(f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
733
- # finished_eval_table = gr.components.Dataframe(
734
- # value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
735
- # )
736
- # with gr.Accordion(f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
737
- # running_eval_table = gr.components.Dataframe(
738
- # value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
739
- # )
740
- # with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
741
- # pending_eval_table = gr.components.Dataframe(
742
- # value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
743
- # )
744
- #
745
- # gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
746
- #
747
- # gr.HTML("""
748
- # <h2 style="display: flex; align-items: center; gap: 10px;">
749
- # <span style="font-size: 1.3em;">πŸ“</span> Model Submission Form
750
- # </h2>
751
- # """)
752
- #
753
- # with gr.Row():
754
- # with gr.Column():
755
- # model_name_textbox = gr.Textbox(
756
- # label="Model Name (on Hugging Face Hub)",
757
- # placeholder="Enter your model name...",
758
- # elem_classes="enhanced-input"
759
- # )
760
- # revision_name_textbox = gr.Textbox(
761
- # label="Revision / Commit Hash",
762
- # placeholder="main",
763
- # elem_classes="enhanced-input"
764
- # )
765
- # model_type = gr.Dropdown(
766
- # choices=["Type A", "Type B", "Type C"],
767
- # label="Model Type",
768
- # multiselect=False,
769
- # value=None,
770
- # interactive=True,
771
- # elem_classes="enhanced-dropdown"
772
- # )
773
- # with gr.Column():
774
- # precision = gr.Dropdown(
775
- # choices=["float16", "bfloat16", "float32", "int8", "auto"],
776
- # label="Precision",
777
- # multiselect=False,
778
- # value="auto",
779
- # interactive=True,
780
- # elem_classes="enhanced-dropdown"
781
- # )
782
- # weight_type = gr.Dropdown(
783
- # choices=["Original", "Adapter", "Delta"],
784
- # label="Weights Type",
785
- # multiselect=False,
786
- # value="Original",
787
- # interactive=True,
788
- # elem_classes="enhanced-dropdown"
789
- # )
790
- # base_model_name_textbox = gr.Textbox(
791
- # label="Base Model (for delta or adapter weights)",
792
- # placeholder="Only needed for adapter/delta weights",
793
- # elem_classes="enhanced-input"
794
- # )
795
- #
796
- # submit_button = gr.Button(
797
- # "Submit for Evaluation",
798
- # elem_classes="primary-button"
799
- # )
800
- # submission_result = gr.Markdown()
801
- # submit_button.click(
802
- # add_new_eval,
803
- # [model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type],
804
- # submission_result,
805
- # )
806
-
807
- # Enhanced citation section
808
- with gr.Accordion("πŸ“„ Citation", open=False, elem_classes="citation-accordion"):
809
- gr.HTML("""
810
- <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
811
- <div style="font-size: 2.5em;">πŸ“„</div>
812
- <div>
813
- <h3 style="margin: 0;">How to Cite This Benchmark</h3>
814
- <p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research</p>
815
- </div>
816
- </div>
817
- """)
818
-
819
- citation_button = gr.Textbox(
820
- value=CITATION_BUTTON_TEXT,
821
- label=CITATION_BUTTON_LABEL,
822
- lines=10,
823
- elem_id="citation-button",
824
- show_copy_button=True,
825
- )
826
-
827
- # Footer
828
  gr.HTML("""
829
  <div class="footer">
830
  <p>Β© 2025 MLE-Dojo Benchmark. All rights reserved.</p>
831
- <p style="margin-top: 5px; display: flex; justify-content: center; gap: 20px;">
832
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Privacy Policy</a>
833
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Terms of Service</a>
834
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Contact Us</a>
@@ -837,12 +852,16 @@ with demo:
837
  """)
838
 
839
  # --- Keep scheduler if relevant ---
 
840
  if __name__ == "__main__":
841
  try:
842
  scheduler = BackgroundScheduler()
843
  if callable(restart_space):
 
844
  if REPO_ID and REPO_ID != "your/space-id":
845
- scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
 
 
846
  scheduler.start()
847
  else:
848
  print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
@@ -854,4 +873,6 @@ if __name__ == "__main__":
854
  # --- Launch the app ---
855
  if __name__ == "__main__":
856
  print("Launching Enhanced Gradio App...")
857
- demo.launch()
 
 
 
14
  LLM_BENCHMARKS_TEXT,
15
  TITLE,
16
  )
17
+ from src.display.css_html_js import custom_css # Assuming this might exist
18
  from src.envs import REPO_ID
19
  from src.submission.submit import add_new_eval
20
  print("Successfully imported from src module.")
 
22
  except ImportError:
23
  print("Warning: Using placeholder values because src module imports failed.")
24
  CITATION_BUTTON_LABEL = "Citation"
25
+ CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark...\n@misc{mledojo2025benchmark,\n title={MLE-Dojo Benchmark},\n author={MLE-Dojo Team},\n year={2025},\n howpublished={\\url{https://your-benchmark-url.example.com}},\n}" # Added example citation text
26
+ EVALUATION_QUEUE_TEXT = "### Current evaluation queue:" # Use Markdown heading
27
  INTRODUCTION_TEXT = """
28
+ ## Welcome to the MLE-Dojo Benchmark Leaderboard
29
+
30
  This leaderboard tracks the performance of various AI models across multiple machine learning engineering domains.
31
  Our comprehensive evaluation system uses ELO ratings to provide a fair comparison between different models.
32
+
33
+ ### How to read this leaderboard
34
+ - Select a domain category using the radio buttons below to view specialized rankings.
35
+ - Higher ELO scores indicate better performance within that category.
36
+ - Click on a model name to visit its page (if available).
37
  """
38
  LLM_BENCHMARKS_TEXT = """
39
+ ## About the MLE-Dojo Benchmark
40
+
41
+ ### Evaluation Methodology
42
  The MLE-Dojo benchmark evaluates models across various domains including:
43
+
44
+ - **MLE-Lite**: Basic machine learning engineering tasks (data preprocessing, feature engineering, model selection).
45
+ - **Tabular**: Data manipulation, analysis, and modeling with structured data.
46
+ - **NLP**: Natural language processing tasks (classification, generation, understanding).
47
+ - **CV**: Computer vision tasks (image classification, object detection, generation).
48
+
49
+ Our evaluation uses a sophisticated ELO rating system that considers the relative performance of models against each other based on competitions within each domain.
50
+
51
+ ### Contact
52
+ For more information or to submit your model, please contact us at `[email protected]` (replace with actual contact).
53
  """
54
+ TITLE = "<h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>" # Keep title simple for header
55
+ custom_css = "" # Will be populated by enhanced_css later
56
+ REPO_ID = "your/space-id" # Replace with your actual Hugging Face Space ID if restarting
57
+ def add_new_eval(*args): return "Submission placeholder." # Placeholder function
58
 
59
  # --- Elo Leaderboard Configuration ---
60
  # Enhanced data with Rank (placeholder), Organizer, License, and URL
 
71
 
72
  # Add organization logos (for visual enhancement)
73
  org_logos = {
74
+ 'OpenAI': 'πŸ“±', # Replace with actual icon URLs or keep emojis
75
  'DeepSeek': 'πŸ”',
76
  'Google': '🌐',
77
  'Default': 'πŸ€–'
 
81
  master_df = pd.DataFrame(data)
82
 
83
  # Add last updated timestamp
84
+ last_updated = datetime.now().strftime("%B %d, %Y at %H:%M:%S %Z") # Added Timezone
85
 
86
  # Define categories with fancy icons
87
  CATEGORIES = [
88
+ ("πŸ† Overall", "Overall"),
89
+ ("πŸ’‘ MLE-Lite", "MLE-Lite"),
90
+ ("πŸ“Š Tabular", "Tabular"),
91
+ ("πŸ“ NLP", "NLP"),
92
  ("πŸ‘οΈ CV", "CV")
93
  ]
94
+ DEFAULT_CATEGORY_LABEL = "πŸ† Overall" # Use the label for default value
95
+ DEFAULT_CATEGORY_VALUE = "Overall" # The actual value
96
 
97
+ # Map user-facing category *values* to DataFrame column names
98
  category_to_column = {
99
+ "Overall": "Overall",
100
  "MLE-Lite": "MLE-Lite_Elo",
101
  "Tabular": "Tabular_Elo",
102
  "NLP": "NLP_Elo",
103
  "CV": "CV_Elo",
 
104
  }
105
 
106
  # --- Helper function to update leaderboard ---
107
  def update_leaderboard(category_label):
108
  """
109
+ Enhanced function to update the leaderboard with visual improvements and numerical rank.
110
  """
111
+ # Find the category value corresponding to the selected label
112
+ category_value = DEFAULT_CATEGORY_VALUE # Default fallback
113
+ for label, value in CATEGORIES:
114
+ if label == category_label:
115
+ category_value = value
116
+ break
117
+
118
+ score_column = category_to_column.get(category_value)
119
  if score_column is None or score_column not in master_df.columns:
120
+ print(f"Warning: Invalid category value '{category_value}' or column '{score_column}'. Falling back to default.")
121
+ score_column = category_to_column[DEFAULT_CATEGORY_VALUE] # Fallback to default value
122
+ category_value = DEFAULT_CATEGORY_VALUE # Ensure category value matches fallback
123
  if score_column not in master_df.columns:
124
  print(f"Error: Default column '{score_column}' also not found.")
125
+ # Return an empty DataFrame with the correct structure
126
  return pd.DataFrame({
127
+ "Rank": [], "Model": [], "Organization": [], "License": [], f"Elo Score ({category_value})": []
 
 
 
 
128
  })
129
 
130
  # Select base columns + the score column for sorting
 
134
  # Sort by the selected 'Elo Score' descending
135
  df.sort_values(by=score_column, ascending=False, inplace=True)
136
 
137
+ # Add Rank (numerical)
138
  df.reset_index(drop=True, inplace=True)
139
+ df.insert(0, 'Rank', df.index + 1) # Insert numerical rank starting from 1
140
+
141
+ # Add organization icons to model names with clickable links
 
 
 
 
 
 
 
 
 
 
 
 
142
  df['Model'] = df.apply(
143
  lambda row: f"""<div style="display: flex; align-items: center;">
144
  <span style="font-size: 1.5em; margin-right: 10px;">{org_logos.get(row['organizer'], org_logos['Default'])}</span>
145
+ <a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank'
146
  style='color: #0066cc; text-decoration: none; font-weight: 500; font-size: 1.05em;'>
147
  {row['model_name']}
148
  </a>
 
150
  axis=1
151
  )
152
 
153
+ # Format Elo scores with visual indicators (bar + color)
154
  df['Elo Display'] = df[score_column].apply(
155
+ lambda score: f"""<div style="display: flex; align-items: center; justify-content: flex-start;">
156
+ <span style="font-weight: bold; min-width: 40px; text-align: right; color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'}">
157
  {score}
158
  </span>
159
+ <div style="margin-left: 10px; height: 12px; width: 80px; background-color: #eaeaea; border-radius: 6px; overflow: hidden;">
160
+ <div style="height: 100%; width: {min(100, max(5, (score - 700) / 7))}%; background-color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'};"></div>
161
  </div>
162
  </div>"""
163
  )
164
 
165
  # Rename columns for display
166
+ df.rename(columns={'organizer': 'Organization', 'license': 'License'}, inplace=True) # Renamed for clarity
 
167
 
168
  # Select and reorder columns for final display
169
+ # Use the determined category_value for the score column header
170
+ final_columns = ["Rank", "Model", "Organization", "License", "Elo Display"]
171
+ df_display = df[final_columns].copy()
172
+
173
+ # Rename the score column dynamically
174
+ df_display.rename(columns={"Elo Display": f"Elo Score ({category_value})"}, inplace=True)
175
+
176
+ return df_display
177
 
 
 
 
 
178
 
179
  # --- Mock/Placeholder functions/data for other tabs ---
180
+ print("Warning: Evaluation queue data fetching is disabled/mocked.")
181
  finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
182
  running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
183
  pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
 
186
 
187
  # --- Keep restart function if relevant ---
188
  def restart_space():
189
+ """Placeholder for Hugging Face space restart logic."""
190
+ print(f"Attempting to trigger restart for space: {REPO_ID} (placeholder action)")
191
+ # In a real HF Space, you might use the hf_hub API client if it supports this,
192
+ # or trigger a webhook, or rely on built-in auto-restart features.
193
+ # This function likely cannot directly restart the space from within itself.
194
 
195
  # --- Enhanced CSS for beauty and readability ---
196
+ # (Your enhanced_css string remains the same as provided in the prompt)
197
  enhanced_css = """
198
  /* Base styling */
199
  :root {
 
201
  --secondary-color: #2ec27e;
202
  --accent-color: #e5a50a;
203
  --warning-color: #ff7800;
204
+ --text-color: #333333; /* Darker text for better contrast */
205
+ --background-color: #f4f6f8; /* Light grey background */
206
+ --card-background: #ffffff; /* White background for cards/tables */
207
  --border-color: #e0e0e0;
208
+ --shadow-color: rgba(0, 0, 0, 0.08);
209
  }
210
 
211
  /* Typography */
 
214
  font-size: 16px !important;
215
  line-height: 1.6 !important;
216
  color: var(--text-color) !important;
217
+ background-color: var(--background-color) !important; /* Ensure body background is set */
218
  }
219
 
220
  /* Headings */
 
251
  border-radius: 12px !important;
252
  overflow: hidden !important;
253
  box-shadow: 0 4px 12px var(--shadow-color) !important;
254
+ background-color: var(--card-background); /* White background for tabs container */
255
  }
256
 
257
  .tab-nav button {
 
260
  padding: 0.8rem 1.5rem !important;
261
  border-radius: 0 !important;
262
  transition: all 0.2s ease !important;
263
+ border-bottom: 2px solid transparent !important;
264
+ background-color: transparent !important; /* Ensure buttons are transparent */
265
+ color: var(--text-color) !important;
266
  }
267
 
268
  .tab-nav button.selected {
269
+ background-color: transparent !important; /* Keep transparent */
270
+ color: var(--primary-color) !important;
271
  font-weight: 600 !important;
272
+ border-bottom: 2px solid var(--primary-color) !important;
273
  }
274
 
275
  /* Card styling */
276
+ .gradio-container .gr-block { /* Target blocks for card styling */
277
  border-radius: 12px !important;
278
  border: 1px solid var(--border-color) !important;
279
  box-shadow: 0 4px 12px var(--shadow-color) !important;
280
  overflow: hidden !important;
281
+ background-color: var(--card-background) !important; /* White background */
282
+ padding: 1.5rem !important; /* Add padding to cards */
283
+ margin-bottom: 1.5rem !important; /* Add space between cards */
284
  }
285
+ /* Ensure panels also get card styling */
286
+ .gradio-container .gr-panel {
287
+ border-radius: 12px !important;
288
+ border: 1px solid var(--border-color) !important;
289
+ box-shadow: 0 4px 12px var(--shadow-color) !important;
290
+ overflow: hidden !important;
291
+ background-color: var(--card-background) !important;
292
+ padding: 1.5rem !important;
293
+ margin-bottom: 1.5rem !important;
294
+ }
295
+
296
 
297
  /* Table styling */
298
+ .gr-dataframe { /* Target the dataframe component specifically */
299
+ border-radius: 8px !important;
300
+ overflow: hidden !important; /* Needed for border-radius on table */
301
+ box-shadow: 0 4px 12px var(--shadow-color) !important;
302
+ border: 1px solid var(--border-color) !important; /* Add border around table */
303
+ margin: 1.5rem 0 !important;
304
+ }
305
+
306
  table {
307
  width: 100% !important;
308
+ border-collapse: separate !important; /* Needed for spacing and rounded corners */
309
  border-spacing: 0 !important;
310
+ background-color: var(--card-background); /* White background for table */
 
 
 
311
  }
312
 
313
  th {
314
+ background-color: #f0f5ff !important; /* Lighter blue for header */
315
  color: var(--primary-color) !important;
316
  font-weight: 600 !important;
317
+ padding: 1rem 1.2rem !important; /* Adjust padding */
318
+ font-size: 1.05rem !important; /* Slightly smaller header font */
319
  text-align: left !important;
320
  border-bottom: 2px solid var(--primary-color) !important;
321
+ position: sticky !important; /* Make header sticky */
322
+ top: 0 !important; /* Stick to the top */
323
+ z-index: 1 !important; /* Ensure header is above scrolling content */
324
  }
325
 
326
  td {
327
+ padding: 0.9rem 1.2rem !important; /* Adjust padding */
328
  border-bottom: 1px solid var(--border-color) !important;
329
  font-size: 1rem !important;
330
  vertical-align: middle !important;
331
+ background-color: var(--card-background); /* Ensure cell background is white */
332
+ color: var(--text-color); /* Ensure text color is applied */
333
  }
334
 
335
+ tr:last-child td {
336
+ border-bottom: none !important;
337
  }
338
 
339
+ tr:nth-child(even) td {
340
+ background-color: #f8fafd !important; /* Very light blue for alternating rows */
341
  }
342
 
343
+ tr:hover td {
344
+ background-color: #edf2fb !important; /* Slightly darker blue on hover */
345
  }
346
 
347
  /* Button styling */
 
369
  flex-wrap: wrap !important;
370
  gap: 10px !important;
371
  margin: 1rem 0 !important;
372
+ background-color: transparent !important; /* Ensure container is transparent */
373
+ border: none !important; /* Remove default border */
374
+ box-shadow: none !important; /* Remove default shadow */
375
+ padding: 0 !important; /* Remove default padding */
376
  }
377
 
378
+ .gr-radio > label { /* Target the label inside gr-radio */
379
  background-color: #f5f7fa !important;
380
  border: 1px solid var(--border-color) !important;
381
  border-radius: 8px !important;
 
387
  display: flex !important;
388
  align-items: center !important;
389
  gap: 8px !important;
390
+ color: var(--text-color) !important;
391
+ box-shadow: none !important; /* Override potential inner shadows */
392
+ }
393
+ /* Remove inner block styling if gradio adds extra divs */
394
+ .gr-radio > div {
395
+ background: none !important;
396
+ border: none !important;
397
+ padding: 0 !important;
398
+ margin: 0 !important;
399
+ box-shadow: none !important;
400
  }
401
 
402
+
403
  .gr-radio label:hover {
404
  background-color: #eaeef3 !important;
405
  border-color: #c0c9d6 !important;
 
413
  }
414
 
415
  /* Input fields */
416
+ input[type="text"], textarea, select { /* Be more specific */
417
  font-size: 1rem !important;
418
  padding: 0.8rem !important;
419
  border-radius: 8px !important;
420
  border: 1px solid var(--border-color) !important;
421
  transition: all 0.2s ease !important;
422
+ background-color: #ffffff !important;
423
+ color: var(--text-color) !important;
424
+ width: 100%; /* Make inputs take full width */
425
+ box-sizing: border-box; /* Include padding in width */
426
  }
427
 
428
+ input[type="text"]:focus, textarea:focus, select:focus {
429
  border-color: var(--primary-color) !important;
430
  box-shadow: 0 0 0 2px rgba(26, 95, 180, 0.2) !important;
431
  outline: none !important;
 
437
  overflow: hidden !important;
438
  margin: 1rem 0 !important;
439
  border: 1px solid var(--border-color) !important;
440
+ background-color: var(--card-background) !important; /* White background */
441
+ box-shadow: 0 2px 6px var(--shadow-color) !important; /* Lighter shadow for accordion */
442
  }
443
 
444
+ .gr-accordion > .gr-block { /* Target inner block of accordion */
445
+ border: none !important;
446
+ box-shadow: none !important;
447
+ padding: 0 !important; /* Remove padding from inner block */
448
+ margin: 0 !important;
449
+ }
450
+
451
+
452
+ .gr-accordion-header { /* Check Gradio structure for header class */
453
+ padding: 1rem 1.2rem !important; /* Adjust padding */
454
  background-color: #f5f7fa !important;
455
  font-weight: 600 !important;
456
  font-size: 1.1rem !important;
457
  color: var(--text-color) !important;
458
+ border-bottom: 1px solid var(--border-color) !important;
459
+ cursor: pointer; /* Indicate clickable */
460
+ }
461
+ /* Style for open accordion header */
462
+ .gr-accordion[open] > .gr-accordion-header { /* Might need adjustment based on Gradio version */
463
+ border-bottom: 1px solid var(--border-color) !important;
464
  }
465
 
466
+ /* Style for accordion content (might be nested) */
467
+ .gr-accordion .gr-panel, .gr-accordion .gr-box { /* Check which element holds content */
468
+ padding: 1.2rem !important; /* Add padding to content */
469
+ background-color: var(--card-background) !important; /* White background */
470
+ border: none !important; /* Remove borders inside accordion */
471
+ box-shadow: none !important;
472
+ border-radius: 0 0 8px 8px !important; /* Round bottom corners */
473
  }
474
 
475
+
476
  /* Markdown text improvements */
477
+ .markdown-text { /* Might need a more specific selector like .gr-markdown */
478
  font-size: 1.05rem !important;
479
  line-height: 1.7 !important;
480
+ color: var(--text-color) !important;
481
+ background-color: transparent !important; /* Ensure markdown bg is transparent */
482
  }
483
 
484
  .markdown-text p {
 
494
  margin-bottom: 0.5rem !important;
495
  }
496
 
497
+ .markdown-text strong, .markdown-text b {
498
  font-weight: 600 !important;
499
+ color: #111 !important; /* Slightly darker for emphasis */
500
+ }
501
+ .markdown-text code { /* Style inline code */
502
+ background-color: #eef0f2;
503
+ padding: 0.2em 0.4em;
504
+ border-radius: 4px;
505
+ font-size: 0.9em;
506
+ color: #3a4a5b;
507
+ }
508
+ .markdown-text a { /* Style links */
509
+ color: var(--primary-color);
510
+ text-decoration: none;
511
+ }
512
+ .markdown-text a:hover {
513
+ text-decoration: underline;
514
  }
515
 
516
+ /* Status indicators (if used in submission tab) */
517
  .status-badge {
518
  display: inline-block;
519
  padding: 0.3rem 0.7rem;
 
522
  font-weight: 500;
523
  text-align: center;
524
  }
525
+ /* Add specific styles for statuses if needed */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
 
527
  /* Footer */
528
  .footer {
529
  margin-top: 2rem;
530
+ padding: 1.5rem 1rem;
531
  text-align: center;
532
  font-size: 0.9rem;
533
+ color: #555;
534
  border-top: 1px solid var(--border-color);
535
+ background-color: #e9edf1; /* Light background for footer */
536
  }
537
 
538
+ /* Enhanced leaderboard title area */
539
  .leaderboard-header {
540
  display: flex;
541
+ flex-direction: column; /* Stack elements vertically on small screens */
542
+ align-items: center; /* Center items */
543
  justify-content: space-between;
544
  margin-bottom: 1.5rem;
545
+ padding: 1.5rem;
546
+ background-color: var(--card-background); /* White background */
547
+ border-radius: 12px;
548
+ border: 1px solid var(--border-color);
549
+ box-shadow: 0 4px 12px var(--shadow-color);
550
+ text-align: center; /* Center text */
551
+ }
552
+ @media (min-width: 768px) { /* Apply side-by-side layout on larger screens */
553
+ .leaderboard-header {
554
+ flex-direction: row;
555
+ text-align: left;
556
+ }
557
  }
558
 
559
+
560
  .leaderboard-title {
561
+ font-size: 2.0rem; /* Adjusted size */
562
  font-weight: 700;
563
  color: var(--primary-color);
564
+ margin: 0 0 0.5rem 0; /* Add bottom margin */
565
  display: flex;
566
  align-items: center;
567
+ gap: 0.7rem; /* Increase gap */
568
+ justify-content: center; /* Center on small screens */
569
+ }
570
+ @media (min-width: 768px) {
571
+ .leaderboard-title {
572
+ justify-content: flex-start; /* Align left on large screens */
573
+ font-size: 2.2rem; /* Restore size */
574
+ }
575
  }
576
 
577
  .leaderboard-subtitle {
578
  font-size: 1.1rem;
579
  color: #666;
580
+ margin: 0 0 1rem 0; /* Add bottom margin */
581
+ }
582
+ @media (min-width: 768px) {
583
+ .leaderboard-subtitle { margin-bottom: 0; } /* Remove bottom margin on large screens */
584
  }
585
 
586
  .timestamp {
587
  font-size: 0.85rem;
588
  color: #666;
589
  font-style: italic;
590
+ background-color: #f5f7fa;
591
+ padding: 5px 10px;
592
+ border-radius: 6px;
593
+ margin-top: 0.5rem; /* Add space above timestamp */
594
  }
595
+ @media (min-width: 768px) {
596
+ .timestamp { margin-top: 0; } /* Remove top margin on large screens */
 
 
 
 
 
597
  }
598
 
 
 
 
 
 
 
 
 
 
 
 
 
599
 
600
+ /* Category selector buttons (Already styled via .gr-radio) */
 
 
 
 
 
 
 
 
 
601
 
602
  /* Logo and brand styling */
603
  .logo {
604
+ font-size: 2.5em; /* Keep logo size */
605
  margin-right: 0.5rem;
606
  }
607
 
608
+ /* Style for About section cards */
609
+ .about-card {
610
+ background-color: #f5f7fa; /* Lighter background for these cards */
611
+ padding: 20px;
612
+ border-radius: 12px;
613
+ height: 100%; /* Make cards in a row equal height */
614
+ border: 1px solid var(--border-color);
615
+ display: flex; /* Use flexbox for alignment */
616
+ flex-direction: column; /* Stack content vertically */
617
+ text-align: center; /* Center text */
618
+ box-shadow: 0 2px 6px var(--shadow-color); /* Add subtle shadow */
619
  }
620
+ .about-card h3 {
621
+ text-align: center;
622
+ margin-top: 0;
623
+ margin-bottom: 10px; /* Space below heading */
624
+ color: var(--primary-color);
625
  }
626
+ .about-card p {
627
+ color: var(--text-color);
628
+ font-size: 0.95rem;
629
+ line-height: 1.6;
630
+ flex-grow: 1; /* Allow paragraph to take up space */
631
+ }
632
+ .about-card-icon {
633
+ font-size: 2.5em;
634
+ text-align: center;
635
+ margin-bottom: 15px;
636
+ display: block;
637
+ color: var(--secondary-color); /* Use secondary color for icons */
638
+ }
639
+ /* Ensure citation textbox has good contrast */
640
+ #citation-button textarea {
641
+ background-color: #f5f7fa !important;
642
+ color: var(--text-color) !important;
643
+ border: 1px solid var(--border-color) !important;
644
+ font-family: monospace !important; /* Use monospace for citation */
645
+ font-size: 0.95rem !important;
646
  }
647
  """
648
 
649
+ # Combine with any existing CSS (if custom_css was loaded from src)
650
+ # If custom_css from src exists, append enhanced_css. Otherwise, just use enhanced_css.
651
+ if custom_css:
652
+ custom_css += "\n" + enhanced_css
653
+ else:
654
+ custom_css = enhanced_css
655
 
656
  # --- Gradio App Definition ---
657
+ # REMOVED theme=gr.themes.Soft() to allow custom CSS to dominate
658
+ demo = gr.Blocks(css=custom_css)
659
 
660
  with demo:
661
  # Enhanced header with timestamp
 
675
  </div>
676
  """)
677
 
678
+ # Introduction moved outside Tabs for permanent visibility
679
+ with gr.Blocks(elem_classes="gr-block"): # Wrap intro in a styled block
680
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
681
 
682
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
683
  with gr.TabItem("πŸ“Š Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
684
  with gr.Column():
685
+ with gr.Blocks(elem_classes="gr-block"): # Wrap leaderboard controls/table
686
+ gr.HTML("""
687
+ <h2 style="display: flex; align-items: center; gap: 10px; margin-bottom: 0.5rem;">
688
+ <span style="font-size: 1.3em;">πŸ“ˆ</span> Model Performance Rankings
689
+ </h2>
690
+ <p class="leaderboard-subtitle" style="margin-top: 0;">Select a category to view specialized performance metrics</p>
691
+ """)
692
+
693
+ # Enhanced category selector
694
+ category_selector = gr.Radio(
695
+ # Use labels from CATEGORIES
696
+ choices=[label for label, value in CATEGORIES],
697
+ label="Select Performance Domain:",
698
+ value=DEFAULT_CATEGORY_LABEL, # Default to the label
699
+ interactive=True,
700
+ elem_classes="gr-radio" # Apply custom radio styling
701
+ )
702
+
703
+ # Visual separator (Optional)
704
+ # gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
705
+
706
+ # Enhanced leaderboard table with scrolling
707
+ leaderboard_df_component = gr.Dataframe(
708
+ # Initialize with default category value
709
+ value=update_leaderboard(DEFAULT_CATEGORY_LABEL),
710
+ # Headers will be set dynamically by the update function's output DataFrame
711
+ # Set datatypes for correct rendering (Rank is now number)
712
+ datatype=["number", "html", "str", "str", "html"],
713
+ interactive=False, # IMPORTANT: Keep False to disable UI sorting/editing
714
+ # Set explicit height for scrolling
715
+ height=600,
716
+ # row_count=(10, "dynamic"), # Alternative: show 10 rows, scroll others
717
+ col_count=(5, "fixed"), # We have 5 columns
718
+ wrap=True, # Allow text wrapping in cells
719
+ elem_id="leaderboard-table", # ID for potential CSS targeting
720
+ )
721
+
722
+ # Stats cards (visual enhancement) - Placed after leaderboard table
723
+ with gr.Blocks(elem_classes="gr-block"): # Wrap stats in a styled block
724
+ gr.HTML("<h2>Benchmark Statistics</h2>") # Add title for stats section
725
+ with gr.Row(equal_height=True):
726
+ with gr.Column(scale=1):
727
+ gr.HTML(f"""
728
+ <div class="about-card" style="text-align: center;">
729
+ <div class="about-card-icon">πŸ”</div>
730
+ <div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
731
+ <div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
732
+ </div>
733
+ """)
734
+ with gr.Column(scale=1):
735
+ gr.HTML(f"""
736
+ <div class="about-card" style="text-align: center;">
737
+ <div class="about-card-icon">🌐</div>
738
+ <div style="font-size: 2em; font-weight: bold; color: #00875a;">{50}</div>
739
+ <div style="font-size: 1.1em; color: #666;">Competitions (Example)</div>
740
+ </div>
741
+ """)
742
+ with gr.Column(scale=1):
743
+ gr.HTML(f"""
744
+ <div class="about-card" style="text-align: center;">
745
+ <div class="about-card-icon">πŸ…</div>
746
+ <div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)-1}</div>
747
+ <div style="font-size: 1.1em; color: #666;">Performance Domains</div>
748
+ </div>
749
+ """)
750
+
751
+ # Link the radio button change to the update function
752
+ category_selector.change(
753
+ fn=update_leaderboard,
754
+ inputs=category_selector,
755
+ outputs=leaderboard_df_component
756
+ )
757
+
758
+ with gr.TabItem("πŸ“š About", elem_id="llm-benchmark-tab-about", id=1):
759
+ # Wrap content in a styled block for consistent padding/background
760
+ with gr.Blocks(elem_classes="gr-block"):
761
+ # Enhanced about section header
762
  gr.HTML("""
763
+ <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
764
+ <div style="font-size: 4em;">πŸ§ͺ</div>
765
+ <div>
766
+ <h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
767
+ <p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
768
+ </div>
769
+ </div>
770
  """)
771
+
772
+ # Use the LLM_BENCHMARKS_TEXT variable
773
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
774
+
775
+ # Add methodology cards for visual enhancement
776
+ gr.HTML("<h2 style='margin-top: 2rem;'>Evaluation Domains</h2>") # Title for cards
777
+ with gr.Row(equal_height=True):
778
+ with gr.Column():
779
+ gr.HTML("""
780
+ <div class="about-card">
781
+ <div class="about-card-icon">πŸ’‘</div>
782
+ <h3>MLE-Lite</h3>
783
+ <p>Evaluates basic ML engineering tasks: data preprocessing, feature engineering, model selection, and basic deployment concepts.</p>
784
+ </div>
785
+ """)
786
+ with gr.Column():
787
+ gr.HTML("""
788
+ <div class="about-card">
789
+ <div class="about-card-icon">πŸ“Š</div>
790
+ <h3>Tabular</h3>
791
+ <p>Tests processing, analysis, and modeling of structured data, including statistical analysis, predictive modeling, and visualization.</p>
 
 
 
 
 
 
 
 
 
 
 
 
792
  </div>
793
  """)
794
+
795
+ with gr.Row(equal_height=True):
796
+ with gr.Column():
797
+ gr.HTML("""
798
+ <div class="about-card">
799
+ <div class="about-card-icon">πŸ“</div>
800
+ <h3>NLP</h3>
801
+ <p>Evaluates natural language processing: text classification, sentiment analysis, entity recognition, text generation, and understanding.</p>
802
  </div>
803
  """)
804
+ with gr.Column():
805
+ gr.HTML("""
806
+ <div class="about-card">
807
+ <div class="about-card-icon">πŸ‘οΈ</div>
808
+ <h3>CV</h3>
809
+ <p>Tests computer vision capabilities: image classification, object detection, image generation, and visual understanding tasks.</p>
810
  </div>
811
  """)
 
 
 
 
 
 
 
812
 
813
+ # Optional: Uncomment if you want to re-enable the Submit tab
814
+ # with gr.TabItem("πŸš€ Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
815
+ # with gr.Blocks(elem_classes="gr-block"): # Wrap in styled block
816
+ # # ... (Your submission form code here) ...
817
+ # gr.Markdown("Submit tab content goes here.") # Placeholder
818
+
819
+
820
+ # Enhanced citation section (outside tabs, inside main demo block)
821
+ with gr.Blocks(elem_classes="gr-block"): # Wrap in styled block
822
+ with gr.Accordion("πŸ“„ Citation", open=False): # Accordion itself doesn't need gr-block
823
  gr.HTML("""
824
+ <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
825
+ <div style="font-size: 2.5em;">πŸ“„</div>
826
  <div>
827
+ <h3 style="margin: 0;">How to Cite This Benchmark</h3>
828
+ <p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research.</p>
829
  </div>
830
  </div>
831
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
832
 
833
+ citation_button = gr.Textbox(
834
+ value=CITATION_BUTTON_TEXT,
835
+ label=CITATION_BUTTON_LABEL,
836
+ lines=5, # Adjust lines as needed
837
+ elem_id="citation-button", # Use this ID for CSS
838
+ show_copy_button=True,
839
+ interactive=False # Textbox is not meant for user input here
840
+ )
841
+
842
+ # Footer (outside tabs, inside main demo block)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
843
  gr.HTML("""
844
  <div class="footer">
845
  <p>Β© 2025 MLE-Dojo Benchmark. All rights reserved.</p>
846
+ <p style="margin-top: 5px; display: flex; justify-content: center; flex-wrap: wrap; gap: 20px;">
847
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Privacy Policy</a>
848
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Terms of Service</a>
849
  <a href="#" style="color: #1a5fb4; text-decoration: none;">Contact Us</a>
 
852
  """)
853
 
854
  # --- Keep scheduler if relevant ---
855
+ # Note: Scheduler might not work reliably in all Gradio deployment environments (like Spaces free tier)
856
  if __name__ == "__main__":
857
  try:
858
  scheduler = BackgroundScheduler()
859
  if callable(restart_space):
860
+ # Ensure REPO_ID is set correctly for your HF Space
861
  if REPO_ID and REPO_ID != "your/space-id":
862
+ print(f"Scheduling space restart job for {REPO_ID} every 30 minutes.")
863
+ # Restart interval might need adjustment based on environment limits
864
+ scheduler.add_job(restart_space, "interval", seconds=1800)
865
  scheduler.start()
866
  else:
867
  print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
 
873
  # --- Launch the app ---
874
  if __name__ == "__main__":
875
  print("Launching Enhanced Gradio App...")
876
+ # share=True is needed for public access if running locally and want others to see it
877
+ # demo.launch(share=True)
878
+ demo.launch() # Launches locally by default