Jerrycool commited on
Commit
db61dac
·
verified ·
1 Parent(s): edc82f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +275 -313
app.py CHANGED
@@ -1,401 +1,363 @@
1
  # -*- coding: utf-8 -*-
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
- # Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
6
 
7
- # --- Make sure these imports work relative to your file structure ---
 
 
 
8
  try:
9
- # Assume these contain the *content* without excessive inline styling
10
  from src.about import (
11
  CITATION_BUTTON_LABEL,
12
  CITATION_BUTTON_TEXT,
13
- EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab
14
  INTRODUCTION_TEXT,
15
  LLM_BENCHMARKS_TEXT,
16
- TITLE, # Expected to have an ID like #main-leaderboard-title
17
  )
18
- # Import custom_css if it exists, otherwise it will be defined below
19
  try:
20
- from src.display.css_html_js import custom_css
21
  except ImportError:
22
- print("Warning: src.display.css_html_js not found. Starting with empty custom_css.")
23
- custom_css = "" # Start fresh if not found
24
 
25
- from src.envs import REPO_ID # Keep if needed for restart_space or other functions
26
- from src.submission.submit import add_new_eval # Keep if using the submit tab
27
- print("Successfully imported from src module.")
28
- # Option 2: Placeholder definitions (REMOVE IF USING OPTION 1)
29
  except ImportError:
30
- print("Warning: Using placeholder values because src module imports failed.")
31
- CITATION_BUTTON_LABEL="Citation"
32
- CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..."
33
- EVALUATION_QUEUE_TEXT="Current evaluation queue:"
34
- # Example placeholders with structure for CSS
35
- TITLE="""<h1 id="main-leaderboard-title" align="center">🏆 MLE-Dojo Benchmark Leaderboard (Placeholder)</h1>"""
36
- INTRODUCTION_TEXT="""
37
- <div class="introduction-section">
38
- <p>Welcome to the MLE-Dojo Benchmark Leaderboard (Placeholder Content).</p>
39
- <p>Edit <code>src/about.py</code> to set your actual title and introduction text.</p>
40
- </div>
41
- """
42
- LLM_BENCHMARKS_TEXT="""
43
- ## About Section (Placeholder)
44
- Information about the benchmarks will go here. Edit <code>src/about.py</code>.
45
- """
46
- custom_css="" # Start with empty CSS
47
- REPO_ID="your/space-id" # Replace with actual ID if needed
48
- def add_new_eval(*args): return "Submission placeholder."
49
- # --- End Placeholder Definitions ---
50
-
51
-
52
- # --- Elo Leaderboard Configuration ---
53
- # (Keep your data definition as is)
 
 
 
 
 
54
  data = [
55
- {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
56
- {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
57
- {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096}, # Fill details later
58
- {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
59
- {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
60
- {'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
61
- {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
62
- {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  ]
64
  master_df = pd.DataFrame(data)
 
65
  CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
66
  DEFAULT_CATEGORY = "Overall"
67
  category_to_column = {
68
- "MLE-Lite": "MLE-Lite_Elo", "Tabular": "Tabular_Elo",
69
- "NLP": "NLP_Elo", "CV": "CV_Elo", "Overall": "Overall"
 
 
 
70
  }
71
 
72
- # --- Helper function to update leaderboard ---
73
- def update_leaderboard(category):
74
- """
75
- Selects relevant columns, sorts by the chosen category's Elo score,
76
- adds Rank, formats model name as a link, and returns the DataFrame.
77
- """
78
- score_column = category_to_column.get(category)
79
- if score_column is None or score_column not in master_df.columns:
80
- print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
81
- score_column = category_to_column[DEFAULT_CATEGORY]
82
- if score_column not in master_df.columns:
83
- print(f"Error: Default column '{score_column}' also not found.")
84
- # Return empty df with desired display columns
85
- return pd.DataFrame({
86
- "Rank": [], "Model": [], "Organizer": [], "License": [], "Elo Score": []
87
- })
88
-
89
- cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
90
- df = master_df[cols_to_select].copy()
91
- df.sort_values(by=score_column, ascending=False, inplace=True)
92
- df.reset_index(drop=True, inplace=True)
93
- df.insert(0, 'Rank', df.index + 1)
94
-
95
- # Format Model Name as HTML Hyperlink - use a CSS class for styling
96
- df['Model'] = df.apply(
97
- lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' class='model-link'>{row['model_name']}</a>",
98
- axis=1
99
  )
100
 
101
- # Rename columns for final display
102
- df.rename(columns={score_column: 'Elo Score', 'organizer': 'Organizer', 'license': 'License'}, inplace=True)
103
- final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
104
- df = df[final_columns]
105
- return df
106
-
107
- # --- Mock/Placeholder functions/data for other tabs ---
108
- print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
109
- finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
110
- running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
111
- pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
112
- EVAL_COLS = ["Model", "Status", "Requested", "Started"]
113
- EVAL_TYPES = ["str", "str", "str", "str"]
114
-
115
- # --- Keep restart function if relevant ---
116
- def restart_space():
117
- print(f"Attempting to restart space: {REPO_ID}")
118
- # Replace with your actual space restart mechanism if needed
119
-
120
- # --- Enhanced CSS Definition ---
121
- # Define all styles here. Assumes TITLE has id="main-leaderboard-title"
122
- # and INTRODUCTION_TEXT is wrapped in class="introduction-section" (or rendered by gr.Markdown).
123
-
124
- enhanced_css = """
125
- /* Base and Theme Overrides */
126
- body {
127
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
128
- font-size: 1.3em; /* Base font size */
129
- line-height: 1.6;
130
- background-color: #f8f9fa; /* Light background */
131
- color: #343a40; /* Default text color */
132
- }
133
-
134
- /* Container adjustments for better spacing */
135
- .gradio-container {
136
- max-width: 1200px !important; /* Limit max width */
137
- margin: 0 auto !important; /* Center the container */
138
- padding: 2rem !important; /* Add padding around the whole app */
139
- }
140
-
141
- /* --- Title Styling --- */
142
- /* Targets the h1 tag with the specific ID from src/about.py */
143
- #main-leaderboard-title {
144
- font-size: 3.2em; /* Large title */
145
- font-weight: 700; /* Bolder */
146
- color: #212529; /* Darker color for title */
147
- text-align: center; /* Ensure centering */
148
- margin-bottom: 1.5rem; /* Space below title */
149
- padding-bottom: 0.5rem; /* Space within the element */
150
- border-bottom: 2px solid #dee2e6; /* Subtle underline */
151
- }
152
 
153
- /* --- Introduction Text Styling --- */
154
- /* Targets the wrapper div or the markdown component */
155
- .introduction-section p, .introduction-wrapper .prose p { /* Target paragraphs within the section */
156
- font-family: 'Georgia',
157
- font-size: 1.5em; !important; /* Slightly larger than base */
158
- color: #495057; /* Slightly lighter text color */
159
- margin-bottom: 1rem; /* Space between paragraphs */
160
- max-width: 900px; /* Limit width for readability */
161
- margin-left: auto; /* Center the text block */
162
- margin-right: auto; /* Center the text block */
163
- text-align: center; /* Center align intro text */
164
- }
165
- .introduction-section, .introduction-wrapper {
166
- font-family: 'Georgia',
167
- font-size: 1.5em; !important; /* Slightly larger than base */
168
- margin-bottom: 2.5rem; /* Space below the intro block */
169
- }
170
 
 
 
 
171
 
172
- /* --- General Markdown and Header Styling --- */
173
- .markdown-text h2, .tabitem .prose h2 { /* Target section headers */
174
- font-size: 1.8em;
175
- font-weight: 500;
176
- color: #343a40;
177
- margin-top: 2.5rem; /* More space above sections */
178
- margin-bottom: 1.2rem;
179
- padding-bottom: 0.4rem;
180
- border-bottom: 1px solid #e9ecef;
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  }
182
- .markdown-text p, .tabitem .prose p {
183
- font-size: 1.5em; /* Standard paragraph size */
184
  margin-bottom: 1rem;
185
- color: #495057;
186
  }
187
- .markdown-text a, .tabitem .prose a { /* Style links within markdown */
188
- font-size: 1.3em;
189
- color: #007bff;
190
- text-decoration: none;
191
  }
192
- .markdown-text a:hover, .tabitem .prose a:hover {
 
 
 
 
 
 
 
193
  font-size: 1.3em;
194
- text-decoration: underline;
 
 
195
  }
196
 
197
- /* --- Tab Styling --- */
198
- .tab-buttons button { /* Style tab buttons */
199
- font-size: 1.3em !important;
200
- padding: 5px 10px !important;
201
- font-weight: 500;
202
  }
203
 
204
- /* --- Leaderboard Table Styling --- */
205
- #leaderboard-table {
206
- margin-top: 1.5rem; /* Space above table */
207
- font-size: 1.5em; /* Ensure table font size is consistent */
208
- border: 1px solid #dee2e6;
209
- box-shadow: 0 2px 4px rgba(0,0,0,0.05); /* Subtle shadow */
 
 
210
  }
 
 
211
  #leaderboard-table th {
212
- background-color: #e9ecef; /* Header background */
213
  font-size: 1.3em;
214
- font-weight: 500; /* Header font weight */
215
- padding: 10px 12px; /* Header padding */
216
- text-align: left;
217
- color: #495057;
218
- white-space: nowrap; /* Prevent header text wrapping */
219
  }
220
  #leaderboard-table td {
221
  font-size: 1.1em;
222
- padding: 8px 12px; /* Cell padding */
223
- border-bottom: 1px solid #e9ecef; /* Horizontal lines */
224
- vertical-align: middle; /* Center cell content vertically */
225
  }
226
- #leaderboard-table tr:nth-child(even) td {
227
- font-size: 1.1em;
228
- background-color: #f8f9fa; /* Zebra striping */
229
- }
230
- #leaderboard-table tr:hover td {
231
- font-size: 1.1em;
232
- background-color: #e2e6ea; /* Hover effect */
233
- }
234
- /* Style for the model links within the table */
235
  #leaderboard-table .model-link {
236
- color: #0056b3; /* Slightly darker blue for links */
237
- font-size: 1.1em;
238
  font-weight: 500;
239
  text-decoration: none;
240
  }
241
  #leaderboard-table .model-link:hover {
242
- font-size: 1.1em;
243
  text-decoration: underline;
244
- color: #003d80;
245
- }
246
-
247
- /* --- Radio Button / Category Selector Styling --- */
248
- .gradio-radio label span { /* Target the label text */
249
- font-size: 1.3em !important;
250
- font-weight: 500;
251
- color: #343a40;
252
- }
253
- .gradio-radio fieldset { /* Adjust spacing around radio buttons */
254
- margin-top: 0.5rem;
255
- margin-bottom: 1.5rem;
256
- }
257
- .gradio-radio fieldset label { /* Style individual radio choices */
258
- padding: 8px 12px !important;
259
- }
260
-
261
-
262
- /* --- Accordion Styling --- */
263
- .gradio-accordion > button { /* Accordion header */
264
- font-size: 1.2em !important;
265
- font-weight: 600;
266
- padding: 12px 15px !important;
267
- background-color: #f1f3f5 !important;
268
- border-bottom: 1px solid #dee2e6 !important;
269
- }
270
- .gradio-accordion > div { /* Accordion content area */
271
- padding: 15px !important;
272
- border: 1px solid #dee2e6 !important;
273
- border-top: none !important;
274
  }
275
-
276
- /* --- Textbox/Button Styling (e.g., Citation) --- */
277
- #citation-button textarea {
278
- font-family: 'Courier New', Courier, monospace; /* Monospace for code/citation */
279
- font-size: 0.95em !important;
280
- background-color: #e9ecef;
281
- color: #343a40;
282
- }
283
- #citation-button label span {
284
- font-weight: 600;
285
- }
286
-
287
-
288
- /* ---- INTRODUCTION TEXT ---- */
289
- .introduction-section {
290
- font-size: 1.4rem !important;
291
- line-height: 1.75;
292
- color: #344054;
293
- text-align: center;
294
- max-width: 900px;
295
- margin: 0 auto 3rem auto;
296
- }
297
-
298
- .introduction-section p {
299
- margin-bottom: 1rem;
300
- font-family: Georgia, serif;
301
- font-size: 1.2em;
302
- }
303
-
304
- @media (max-width: 768px) {
305
- .introduction-section {
306
- font-size: 1.2rem !important;
307
- }
308
- }
309
-
310
-
311
  """
312
 
313
- # Combine any existing CSS with the new enhanced CSS
314
- # Prioritize enhanced_css rules by placing it last or using more specific selectors
315
- final_css = custom_css + "\n" + enhanced_css
 
 
 
316
 
317
- # --- Gradio App Definition ---
318
- # Use a theme for base styling and apply custom CSS overrides
319
- demo = gr.Blocks(css=final_css, theme=gr.themes.Soft(
320
- # Optional: Customize theme variables if needed
321
- # primary_hue=gr.themes.colors.blue,
322
- # secondary_hue=gr.themes.colors.gray,
323
- # neutral_hue=gr.themes.colors.cool_gray,
324
- ))
325
 
326
  with demo:
327
- # Render TITLE from src/about.py (expects <h1 id="main-leaderboard-title">...)
328
  gr.HTML(TITLE)
329
 
330
- # Render INTRODUCTION_TEXT from src/about.py
331
- # Add a wrapper class for CSS targeting if the text itself doesn't have one
332
  with gr.Row():
333
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="introduction-wrapper") # Use this class for CSS
334
 
335
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
336
- with gr.TabItem("🏅 MLE-Dojo Benchmark", elem_id="llm-benchmark-tab-table", id=0):
 
337
  with gr.Column():
338
- # Use standard Markdown for the section header, CSS will style it
339
  gr.Markdown("## Model Elo Rankings by Category", elem_classes="markdown-text")
 
340
  category_selector = gr.Radio(
341
  choices=CATEGORIES,
342
- label="Select Category:", # Label is styled via CSS
343
  value=DEFAULT_CATEGORY,
344
  interactive=True,
345
- elem_classes="gradio-radio" # Add class for styling
346
  )
 
347
  leaderboard_df_component = gr.Dataframe(
348
  value=update_leaderboard(DEFAULT_CATEGORY),
349
- headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
350
  datatype=["number", "html", "str", "str", "number"],
351
  interactive=False,
352
  row_count=(len(master_df), "fixed"),
353
  col_count=(5, "fixed"),
354
  wrap=True,
355
- elem_id="leaderboard-table" # Used for specific table CSS
356
  )
357
- category_selector.change(
358
- fn=update_leaderboard,
359
- inputs=category_selector,
360
- outputs=leaderboard_df_component
361
- )
362
-
363
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-about", id=1):
364
- # Render LLM_BENCHMARKS_TEXT using Markdown, styled by CSS
365
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") # Apply standard markdown styling
366
 
367
- # --- Submit Tab (Keep commented out or uncomment and ensure imports/variables are defined) ---
368
- # with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-submit", id=2):
369
- # # ... (Your submission form code - apply elem_classes="markdown-text" to gr.Markdown) ...
370
- # pass # Placeholder
371
 
 
 
 
372
 
373
- # --- Citation Row (at the bottom, outside Tabs) ---
374
- with gr.Accordion("📙 Citation", open=False, elem_classes="gradio-accordion"): # Add class
375
- citation_button = gr.Textbox(
376
  value=CITATION_BUTTON_TEXT,
377
  label=CITATION_BUTTON_LABEL,
378
- lines=8, # Adjusted lines slightly
379
- elem_id="citation-button", # Used for specific CSS
380
  show_copy_button=True,
381
  )
382
 
383
- # --- Scheduler and Launch ---
 
 
384
  if __name__ == "__main__":
385
  try:
386
- scheduler = BackgroundScheduler()
387
- if callable(restart_space):
388
- if REPO_ID and REPO_ID != "your/space-id":
389
- scheduler.add_job(restart_space, "interval", seconds=1800)
390
- scheduler.start()
391
- print("Scheduler started for space restart.")
392
- else:
393
- print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
394
  else:
395
- print("Warning: restart_space function not available; space restart job not scheduled.")
396
- except Exception as e:
397
- print(f"Failed to initialize or start scheduler: {e}")
398
 
399
- print("Launching Gradio App...")
400
- # demo.queue() # Consider adding queue() for better handling under load
401
- demo.launch()
 
1
  # -*- coding: utf-8 -*-
2
+ """Gradio frontend for the MLE‑Dojo leaderboard.
3
+
4
+ Changes made in this version
5
+ ----------------------------
6
+ 1. **Fixed CSS syntax errors** (missing semicolons, misplaced `!important`).
7
+ 2. **Introduced a single, clean rule‑set** for the introduction block so the
8
+ font size, family and alignment are now reliably applied.
9
+ 3. All new comments are in English for clarity.
10
+ """
11
+
12
  import gradio as gr
13
  import pandas as pd
14
  from apscheduler.schedulers.background import BackgroundScheduler
 
15
 
16
+ # ---------------------------------------------------------------------------
17
+ # Optional imports from the project package. If they fail we fall back to
18
+ # placeholders so the app still launches locally.
19
+ # ---------------------------------------------------------------------------
20
  try:
 
21
  from src.about import (
22
  CITATION_BUTTON_LABEL,
23
  CITATION_BUTTON_TEXT,
24
+ EVALUATION_QUEUE_TEXT, # still referenced in commentedout submit tab
25
  INTRODUCTION_TEXT,
26
  LLM_BENCHMARKS_TEXT,
27
+ TITLE, # contains <h1 id="main-leaderboard-title">
28
  )
29
+
30
  try:
31
+ from src.display.css_html_js import custom_css # extra project CSS
32
  except ImportError:
33
+ print("Warning: src.display.css_html_js not found. Using empty CSS.")
34
+ custom_css = ""
35
 
36
+ from src.envs import REPO_ID
37
+ from src.submission.submit import add_new_eval
 
 
38
  except ImportError:
39
+ # -------- PLACEHOLDERS so the Space can still run --------
40
+ print("Warning: using placeholder values because src module imports failed.")
41
+ CITATION_BUTTON_LABEL = "Citation"
42
+ CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark…"
43
+ EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
44
+ TITLE = (
45
+ "<h1 id=\"main-leaderboard-title\" align=\"center\">🏆 MLE-Dojo "
46
+ "Benchmark Leaderboard (Placeholder)</h1>"
47
+ )
48
+ INTRODUCTION_TEXT = (
49
+ "<div class=\"introduction-section\">"
50
+ "<p>Welcome to the MLE‑Dojo Benchmark Leaderboard (placeholder).</p>"
51
+ "<p>Edit <code>src/about.py</code> to change this text.</p>"
52
+ "</div>"
53
+ )
54
+ LLM_BENCHMARKS_TEXT = (
55
+ "## About Section (placeholder)\nInformation about the benchmarks goes here."
56
+ )
57
+ custom_css = ""
58
+ REPO_ID = "your/space-id"
59
+
60
+ # Dummy function so the callback in the (commented) submit tab still works
61
+ def add_new_eval(*_):
62
+ return "Submission placeholder."
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # Leaderboard data (static demo data for now)
66
+ # ---------------------------------------------------------------------------
67
+
68
  data = [
69
+ {
70
+ "model_name": "gpt-4o-mini",
71
+ "url": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/",
72
+ "organizer": "OpenAI",
73
+ "license": "Proprietary",
74
+ "MLE-Lite_Elo": 753,
75
+ "Tabular_Elo": 839,
76
+ "NLP_Elo": 758,
77
+ "CV_Elo": 754,
78
+ "Overall": 778,
79
+ },
80
+ {
81
+ "model_name": "gpt-4o",
82
+ "url": "https://openai.com/index/hello-gpt-4o/",
83
+ "organizer": "OpenAI",
84
+ "license": "Proprietary",
85
+ "MLE-Lite_Elo": 830,
86
+ "Tabular_Elo": 861,
87
+ "NLP_Elo": 903,
88
+ "CV_Elo": 761,
89
+ "Overall": 841,
90
+ },
91
+ {
92
+ "model_name": "o3-mini",
93
+ "url": "https://openai.com/index/openai-o3-mini/",
94
+ "organizer": "OpenAI",
95
+ "license": "Proprietary",
96
+ "MLE-Lite_Elo": 1108,
97
+ "Tabular_Elo": 1019,
98
+ "NLP_Elo": 1056,
99
+ "CV_Elo": 1207,
100
+ "Overall": 1096,
101
+ },
102
+ {
103
+ "model_name": "deepseek-v3",
104
+ "url": "https://api-docs.deepseek.com/news/news1226",
105
+ "organizer": "DeepSeek",
106
+ "license": "DeepSeek",
107
+ "MLE-Lite_Elo": 1004,
108
+ "Tabular_Elo": 1015,
109
+ "NLP_Elo": 1028,
110
+ "CV_Elo": 1067,
111
+ "Overall": 1023,
112
+ },
113
+ {
114
+ "model_name": "deepseek-r1",
115
+ "url": "https://api-docs.deepseek.com/news/news250120",
116
+ "organizer": "DeepSeek",
117
+ "license": "DeepSeek",
118
+ "MLE-Lite_Elo": 1137,
119
+ "Tabular_Elo": 1053,
120
+ "NLP_Elo": 1103,
121
+ "CV_Elo": 1083,
122
+ "Overall": 1100,
123
+ },
124
+ {
125
+ "model_name": "gemini-2.0-flash",
126
+ "url": "https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash",
127
+ "organizer": "Google",
128
+ "license": "Proprietary",
129
+ "MLE-Lite_Elo": 847,
130
+ "Tabular_Elo": 923,
131
+ "NLP_Elo": 860,
132
+ "CV_Elo": 978,
133
+ "Overall": 895,
134
+ },
135
+ {
136
+ "model_name": "gemini-2.0-pro",
137
+ "url": "https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/",
138
+ "organizer": "Google",
139
+ "license": "Proprietary",
140
+ "MLE-Lite_Elo": 1064,
141
+ "Tabular_Elo": 1139,
142
+ "NLP_Elo": 1028,
143
+ "CV_Elo": 973,
144
+ "Overall": 1054,
145
+ },
146
+ {
147
+ "model_name": "gemini-2.5-pro",
148
+ "url": "https://deepmind.google/technologies/gemini/pro/",
149
+ "organizer": "Google",
150
+ "license": "Proprietary",
151
+ "MLE-Lite_Elo": 1257,
152
+ "Tabular_Elo": 1150,
153
+ "NLP_Elo": 1266,
154
+ "CV_Elo": 1177,
155
+ "Overall": 1214,
156
+ },
157
  ]
158
  master_df = pd.DataFrame(data)
159
+
160
  CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
161
  DEFAULT_CATEGORY = "Overall"
162
  category_to_column = {
163
+ "MLE-Lite": "MLE-Lite_Elo",
164
+ "Tabular": "Tabular_Elo",
165
+ "NLP": "NLP_Elo",
166
+ "CV": "CV_Elo",
167
+ "Overall": "Overall",
168
  }
169
 
170
+ # ---------------------------------------------------------------------------
171
+ # Helper to slice & rank the DataFrame when category radio changes
172
+ # ---------------------------------------------------------------------------
173
+
174
+ def update_leaderboard(category: str) -> pd.DataFrame:
175
+ """Return a DataFrame limited to the selected category and sorted by score."""
176
+ score_column = category_to_column.get(category, category_to_column[DEFAULT_CATEGORY])
177
+
178
+ cols = ["model_name", "url", "organizer", "license", score_column]
179
+ df = master_df[cols].copy()
180
+ df = df.sort_values(score_column, ascending=False).reset_index(drop=True)
181
+ df.insert(0, "Rank", df.index + 1)
182
+
183
+ # Convert model name → clickable link (HTML will be rendered in the table)
184
+ df["Model"] = df.apply(
185
+ lambda r: (
186
+ f"<a href='{r.url if pd.notna(r.url) else '#'}' target='_blank' "
187
+ f"class='model-link'>{r.model_name}</a>"
188
+ ),
189
+ axis=1,
 
 
 
 
 
 
 
190
  )
191
 
192
+ df = df.rename(columns={
193
+ score_column: "Elo Score",
194
+ "organizer": "Organizer",
195
+ "license": "License",
196
+ })
197
+ return df[["Rank", "Model", "Organizer", "License", "Elo Score"]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ # ---------------------------------------------------------------------------
200
+ # Basic placeholder DataFrames for the (currently disabled) evaluation queue
201
+ # ---------------------------------------------------------------------------
202
+ print("Warning: evaluation queue fetching is disabled/mocked.")
203
+ empty_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ # ---------------------------------------------------------------------------
206
+ # Helper for HF Spaces restart (optional)
207
+ # ---------------------------------------------------------------------------
208
 
209
+ def restart_space():
210
+ """Restart the current Hugging Face Space (if running in one)."""
211
+ print(f"Attempting to restart space: {REPO_ID}")
212
+ # Insert actual restart logic if needed.
213
+
214
+ # ---------------------------------------------------------------------------
215
+ # CSS: project CSS (custom_css) + enhanced overrides
216
+ # ---------------------------------------------------------------------------
217
+
218
+ # --- CLEAN introduction typography override (this replaces the buggy version) ---
219
+ intro_css = """
220
+ /* --------------------------------------------------
221
+ INTRODUCTION BLOCK (font, size, alignment)
222
+ -------------------------------------------------- */
223
+ .introduction-wrapper, .introduction-section {
224
+ font-family: Georgia, serif;
225
+ font-size: 1.4rem !important; /* ≈22–23 px */
226
+ line-height: 1.75;
227
+ color: #344054;
228
+ text-align: center;
229
+ max-width: 900px;
230
+ margin: 0 auto 3rem auto;
231
  }
232
+ .introduction-wrapper p, .introduction-section p {
 
233
  margin-bottom: 1rem;
 
234
  }
235
+ @media (max-width: 768px) {
236
+ .introduction-wrapper, .introduction-section {
237
+ font-size: 1.2rem !important;
238
+ }
239
  }
240
+ """
241
+
242
+ # --- Existing CSS (base layout, table, etc.) ---
243
+ base_css = """
244
+ /* Base & layout overrides (truncated for brevity) */
245
+ body {
246
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen,
247
+ Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
248
  font-size: 1.3em;
249
+ line-height: 1.6;
250
+ background-color: #f8f9fa;
251
+ color: #343a40;
252
  }
253
 
254
+ .gradio-container {
255
+ max-width: 1200px !important;
256
+ margin: 0 auto !important;
257
+ padding: 2rem !important;
 
258
  }
259
 
260
+ #main-leaderboard-title {
261
+ font-size: 3.2em;
262
+ font-weight: 700;
263
+ color: #212529;
264
+ text-align: center;
265
+ margin-bottom: 1.5rem;
266
+ padding-bottom: 0.5rem;
267
+ border-bottom: 2px solid #dee2e6;
268
  }
269
+
270
+ /* Leaderboard table (only key parts kept) */
271
  #leaderboard-table th {
272
+ background-color: #e9ecef;
273
  font-size: 1.3em;
274
+ font-weight: 500;
 
 
 
 
275
  }
276
  #leaderboard-table td {
277
  font-size: 1.1em;
 
 
 
278
  }
 
 
 
 
 
 
 
 
 
279
  #leaderboard-table .model-link {
280
+ color: #0056b3;
 
281
  font-weight: 500;
282
  text-decoration: none;
283
  }
284
  #leaderboard-table .model-link:hover {
 
285
  text-decoration: underline;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  """
288
 
289
+ # Concatenate user‑defined, base and intro CSS (order = priority)
290
+ final_css = f"{custom_css}\n{base_css}\n{intro_css}"
291
+
292
+ # ---------------------------------------------------------------------------
293
+ # Build the Gradio UI
294
+ # ---------------------------------------------------------------------------
295
 
296
+ demo = gr.Blocks(css=final_css, theme=gr.themes.Soft())
 
 
 
 
 
 
 
297
 
298
  with demo:
299
+ # Title
300
  gr.HTML(TITLE)
301
 
302
+ # Introduction (Markdown wrapped so CSS can target .introduction-wrapper)
 
303
  with gr.Row():
304
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="introduction-wrapper")
305
 
306
+ with gr.Tabs(elem_classes="tab-buttons"):
307
+ # ------------------ Leaderboard tab ------------------
308
+ with gr.TabItem("🏅 MLE-Dojo Benchmark", id=0):
309
  with gr.Column():
 
310
  gr.Markdown("## Model Elo Rankings by Category", elem_classes="markdown-text")
311
+
312
  category_selector = gr.Radio(
313
  choices=CATEGORIES,
314
+ label="Select Category:",
315
  value=DEFAULT_CATEGORY,
316
  interactive=True,
317
+ elem_classes="gradio-radio",
318
  )
319
+
320
  leaderboard_df_component = gr.Dataframe(
321
  value=update_leaderboard(DEFAULT_CATEGORY),
322
+ headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
323
  datatype=["number", "html", "str", "str", "number"],
324
  interactive=False,
325
  row_count=(len(master_df), "fixed"),
326
  col_count=(5, "fixed"),
327
  wrap=True,
328
+ elem_id="leaderboard-table",
329
  )
 
 
 
 
 
 
 
 
 
330
 
331
+ category_selector.change(update_leaderboard, category_selector, leaderboard_df_component)
 
 
 
332
 
333
+ # ------------------ About tab ------------------
334
+ with gr.TabItem("📝 About", id=1):
335
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
336
 
337
+ # Citation accordion (bottom of page)
338
+ with gr.Accordion("📙 Citation", open=False, elem_classes="gradio-accordion"):
339
+ gr.Textbox(
340
  value=CITATION_BUTTON_TEXT,
341
  label=CITATION_BUTTON_LABEL,
342
+ lines=8,
343
+ elem_id="citation-button",
344
  show_copy_button=True,
345
  )
346
 
347
+ # ---------------------------------------------------------------------------
348
+ # Scheduler (optional) & launch
349
+ # ---------------------------------------------------------------------------
350
  if __name__ == "__main__":
351
  try:
352
+ if callable(restart_space) and REPO_ID != "your/space-id":
353
+ scheduler = BackgroundScheduler()
354
+ scheduler.add_job(restart_space, "interval", seconds=1800)
355
+ scheduler.start()
356
+ print("Scheduler started for space restart.")
 
 
 
357
  else:
358
+ print("Space restart scheduler not started (no REPO_ID or restart function).")
359
+ except Exception as exc:
360
+ print(f"Scheduler init failed: {exc}")
361
 
362
+ print("Launching Gradio app…")
363
+ demo.launch()