Jerrycool commited on
Commit
3dd92ec
·
verified ·
1 Parent(s): 3aab004

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -278
app.py CHANGED
@@ -1,305 +1,251 @@
1
  import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
- # Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
5
 
6
- # --- Make sure these imports work relative to your file structure ---
7
- # Option 1: If src is a directory in the same folder as your script:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  try:
9
-     from src.about import (
10
-         CITATION_BUTTON_LABEL,
11
-         CITATION_BUTTON_TEXT,
12
-         EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab
13
-         INTRODUCTION_TEXT,
14
-         LLM_BENCHMARKS_TEXT,
15
-         TITLE,
16
-     )
17
-     from src.display.css_html_js import custom_css # Assuming this exists but might be empty
18
-     from src.envs import REPO_ID # Keep if needed for restart_space or other functions
19
-     from src.submission.submit import add_new_eval # Keep if using the submit tab
20
-     print("Successfully imported from src module.")
21
- # Option 2: If you don't have these files, define placeholders (REMOVE THIS if using Option 1)
22
  except ImportError:
23
-     print("Warning: Using placeholder values because src module imports failed.")
24
-     CITATION_BUTTON_LABEL="Citation"
25
-     CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..."
26
-     EVALUATION_QUEUE_TEXT="Current evaluation queue:"
27
-     INTRODUCTION_TEXT="Welcome to the MLE-Dojo Benchmark Leaderboard."
28
-     LLM_BENCHMARKS_TEXT="Information about the benchmarks..."
29
-     TITLE="<h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>"
30
-     custom_css="" # Start with empty CSS if not imported
31
-     REPO_ID="your/space-id" # Replace with actual ID if needed
32
-     def add_new_eval(*args): return "Submission placeholder."
33
- # --- End Placeholder Definitions ---
34
-
35
-
36
- # --- Elo Leaderboard Configuration ---
37
- # Enhanced data with Rank (placeholder), Organizer, License, and URL
38
- # !!! IMPORTANT: Replace placeholder URLs with actual model/project pages. !!!
39
- # Verify organizer and license information for accuracy.
 
40
  data = [
41
- {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
42
- {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
43
- {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096}, # Fill details later
44
- {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
45
- {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
46
- {'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
47
- {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
48
- {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
49
  ]
50
-
51
- # Create a master DataFrame
52
- # Note: Columns 'organizer' and 'license' are created in lowercase here.
53
  master_df = pd.DataFrame(data)
54
 
55
- # Define categories for selection (user-facing)
56
- CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"] # Overall first
57
- DEFAULT_CATEGORY = "Overall" # Set a default category
58
-
59
- # Map user-facing categories to DataFrame column names
60
- category_to_column = {
61
-     "MLE-Lite": "MLE-Lite_Elo",
62
-     "Tabular": "Tabular_Elo",
63
-     "NLP": "NLP_Elo",
64
-     "CV": "CV_Elo",
65
-     "Overall": "Overall"
66
  }
67
 
68
- # --- Helper function to update leaderboard ---
69
- def update_leaderboard(category):
70
-     """
71
-     Selects relevant columns, sorts by the chosen category's Elo score,
72
-     adds Rank, formats model name as a link, and returns the DataFrame.
73
-     """
74
-     score_column = category_to_column.get(category)
75
-     if score_column is None or score_column not in master_df.columns:
76
-         print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
77
-         score_column = category_to_column[DEFAULT_CATEGORY]
78
-         # Check fallback column too
79
-         if score_column not in master_df.columns:
80
-             # Return empty df with correct columns if still invalid
81
-             # Use lowercase keys here consistent with master_df for the empty case
82
-             print(f"Error: Default column '{score_column}' also not found.")
83
-             return pd.DataFrame({
84
-                 "Rank": [],
85
-                 "Model": [],
86
-                 "Elo Score": [],
87
-                 "Organizer": [], # Changed 'organizer' -> 'Organizer' for consistency in empty case
88
-                 "License": []   # Changed 'license' -> 'License' for consistency in empty case
89
-             })
90
-
91
-     # Select base columns + the score column for sorting
92
-     # Ensure 'organizer' and 'license' are selected correctly (lowercase)
93
-     cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
94
-     df = master_df[cols_to_select].copy()
95
-
96
-     # Sort by the selected 'Elo Score' descending
97
-     df.sort_values(by=score_column, ascending=False, inplace=True)
98
-
99
-     # Add Rank based on the sorted order
100
-     df.reset_index(drop=True, inplace=True)
101
-     df.insert(0, 'Rank', df.index + 1)
102
-
103
-     # Format Model Name as HTML Hyperlink
104
-     # The resulting column name will be 'Model' (capitalized)
105
-     df['Model'] = df.apply(
106
-         lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none;'>{row['model_name']}</a>",
107
-         axis=1
108
-     )
109
-
110
-     # Rename the score column to 'Elo Score' for consistent display
111
-     df.rename(columns={score_column: 'Elo Score'}, inplace=True)
112
-
113
-     # Rename 'organizer' and 'license' to match desired display headers
114
-     df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
115
-
116
-     # Select and reorder columns for final display using the ACTUAL column names in df
117
-     # Use capitalized 'Organizer' and 'License' here because they have been renamed.
118
-     final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
119
-     df = df[final_columns]
120
 
121
-     # Note: The DataFrame returned now has columns:
122
-     # 'Rank', 'Model', 'Organizer', 'License', 'Elo Score'
123
-     return df
 
 
 
124
 
125
- # --- Mock/Placeholder functions/data for other tabs ---
126
- # (If the Submit tab is used, ensure these variables are appropriately populated or handled)
127
- print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
128
- finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
129
- running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
130
- pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
131
- EVAL_COLS = ["Model", "Status", "Requested", "Started"] # Define for the dataframe headers
132
- EVAL_TYPES = ["str", "str", "str", "str"] # Define for the dataframe types
133
 
134
- # --- Keep restart function if relevant ---
135
- def restart_space():
136
-     # Make sure REPO_ID is correctly defined/imported if this function is used
137
-     print(f"Attempting to restart space: {REPO_ID}")
138
-     # Replace with your actual space restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID))
 
 
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
- # --- Gradio App Definition ---
 
 
142
 
143
- # ***** FONT SIZE INCREASED HERE *****
144
- # Add CSS rules to make the base font size larger.
145
- # Adjust the '1.2em' value (e.g., to '1.4em', '16px') to change the size.
146
- # The !important flag helps override theme defaults.
147
- # If the imported custom_css already has content, append to it.
148
- font_size_css = """
149
- body {
150
-     font-size: 1.5em !important; /* Increase base font size */
151
- }
152
- /* Optional: Target specific elements if needed */
153
- /*
154
- #leaderboard-table th, #leaderboard-table td {
155
-     font-size: 1em !important; /* Adjust table font size relative to new body size */
156
-     padding: 5px 7px !important; /* Increase padding for better spacing */
157
  }
158
- h1, .markdown-text h1 { font-size: 2.2em !important; } /* Make main title larger */
159
- h2, .markdown-text h2 { font-size: 1.8em !important; } /* Make section titles larger */
160
- button { font-size: 1.1em !important; padding: 8px 16px !important; } /* Slightly larger buttons */
161
- .gr-input, .gr-dropdown, .gr-textbox textarea { font-size: 1em !important; } /* Ensure inputs scale too */
162
- */
163
  """
164
- # Append the new CSS to any existing custom_css
165
- custom_css += font_size_css
166
 
167
- # Add basic table styling if not already present
168
- if "table {" not in custom_css:
169
-     custom_css += """
170
- table { width: 100%; border-collapse: collapse; margin-top: 10px; margin-bottom: 10px; }
171
- th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; white-space: normal; vertical-align: top; } /* Allow wrapping, top align */
172
- th { background-color: #f2f2f2; font-weight: bold; }
173
- tr:nth-child(even) { background-color: #f9f9f9; }
174
- tr:hover { background-color: #e9e9e9; }
175
- td a { color: #007bff; text-decoration: none; }
176
- td a:hover { text-decoration: underline; }
177
- """
178
 
179
- # Use a theme for better default styling
180
- demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 
 
 
181
 
182
  with demo:
183
-     # Use the TITLE variable imported or defined above
184
-     gr.HTML(TITLE)
185
-
186
-     # Use the INTRODUCTION_TEXT variable imported or defined above
187
-     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
188
-
189
-     with gr.Tabs(elem_classes="tab-buttons") as tabs:
190
-         with gr.TabItem("🏅 MLE-Dojo Benchmark", elem_id="llm-benchmark-tab-table", id=0):
191
-             with gr.Column():
192
-                 gr.Markdown("## Model Elo Rankings by Category")
193
-                 category_selector = gr.Radio(
194
-                     choices=CATEGORIES,
195
-                     label="Select Category:",
196
-                     value=DEFAULT_CATEGORY,
197
-                     interactive=True,
198
-                 )
199
-                 leaderboard_df_component = gr.Dataframe(
200
-                     # Initialize with sorted data for the default category
201
-                     value=update_leaderboard(DEFAULT_CATEGORY),
202
-                     # Headers for DISPLAY should match the *renamed* columns from update_leaderboard
203
-                     headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
204
-                     # Datatype maps to the final df columns: Rank, Model, Organizer, License, Elo Score
205
-                     datatype=["number", "html", "str", "str", "number"],
206
-                     interactive=False,
207
-                     # --- FIX APPLIED: Removed unsupported 'height' argument ---
208
-                     # row_count determines the number of rows to display
209
-                     row_count=(len(master_df), "fixed"), # Display all rows
210
-                     col_count=(5, "fixed"),
211
-                     wrap=True, # Allow text wrapping in cells
212
-                     elem_id="leaderboard-table" # CSS hook for custom styling
213
-                 )
214
-                 # Link the radio button change to the update function
215
-                 category_selector.change(
216
-                     fn=update_leaderboard,
217
-                     inputs=category_selector,
218
-                     outputs=leaderboard_df_component
219
-                 )
220
-
221
-         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-about", id=1):
222
-             # Use the LLM_BENCHMARKS_TEXT variable imported or defined above
223
-             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
224
-
225
-         # --- Submit Tab (Commented out as in original request) ---
226
-         # Make sure EVALUATION_QUEUE_TEXT and add_new_eval are imported/defined if uncommented
227
-         # with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-submit", id=2):
228
-         #     with gr.Column():
229
-         #          with gr.Row():
230
-         #              gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") # Requires import/definition
231
-         #          with gr.Column():
232
-         #              with gr.Accordion(f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
233
-         #                   finished_eval_table = gr.components.Dataframe(
234
-         #                       value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
235
-         #                  )
236
-         #              with gr.Accordion(f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
237
-         #                   running_eval_table = gr.components.Dataframe(
238
-         #                       value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
239
-         #                  )
240
-         #              with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
241
-         #                  pending_eval_table = gr.components.Dataframe(
242
-         #                      value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
243
-         #                  )
244
-         #     with gr.Row():
245
-         #          gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
246
-         #     with gr.Row():
247
-         #          with gr.Column():
248
-         #              model_name_textbox = gr.Textbox(label="Model name (on Hugging Face Hub)")
249
-         #              revision_name_textbox = gr.Textbox(label="Revision / Commit Hash", placeholder="main")
250
-         #              model_type = gr.Dropdown(choices=["Type A", "Type B", "Type C"], label="Model type", multiselect=False, value=None, interactive=True) # Example choices
251
-         #          with gr.Column():
252
-         #              precision = gr.Dropdown(choices=["float16", "bfloat16", "float32", "int8", "auto"], label="Precision", multiselect=False, value="auto", interactive=True)
253
-         #              weight_type = gr.Dropdown(choices=["Original", "Adapter", "Delta"], label="Weights type", multiselect=False, value="Original", interactive=True)
254
-         #              base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
255
-         #     submit_button = gr.Button("Submit Eval")
256
-         #     submission_result = gr.Markdown()
257
-         #     # Ensure add_new_eval is correctly imported/defined and handles these inputs
258
-         #     submit_button.click(
259
-         #          add_new_eval, # Requires import/definition
260
-         #          [ model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type, ],
261
-         #          submission_result,
262
-         #      )
263
-
264
-     # --- Citation Row (at the bottom, outside Tabs) ---
265
-     with gr.Accordion("📙 Citation", open=False):
266
-         # Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables imported or defined above
267
-         citation_button = gr.Textbox(
268
-             value=CITATION_BUTTON_TEXT,
269
-             label=CITATION_BUTTON_LABEL,
270
-             lines=10, # Adjust lines if needed for new font size
271
-             elem_id="citation-button",
272
-             show_copy_button=True,
273
-         )
274
-
275
- # IGNORE_WHEN_COPYING_START
276
- # content_copy  download
277
- # Use code with caution.
278
- # IGNORE_WHEN_COPYING_END
279
-
280
- # --- Keep scheduler if relevant ---
281
- # Only start scheduler if the script is run directly
282
- if __name__ == "__main__":
283
-     try:
284
-         scheduler = BackgroundScheduler()
285
-         # Add job only if restart_space is callable (i.e., not a placeholder or failed import)
286
-         if callable(restart_space):
287
-              # Check if REPO_ID seems valid before scheduling
288
-              if REPO_ID and REPO_ID != "your/space-id":
289
-                  scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
290
-                  scheduler.start()
291
-              else:
292
-                  print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
293
-         else:
294
-              print("Warning: restart_space function not available; space restart job not scheduled.")
295
-     except Exception as e:
296
-         print(f"Failed to initialize or start scheduler: {e}")
297
 
 
 
 
298
 
299
- # --- Launch the app ---
300
- # Ensures the app launches only when the script is run directly
301
  if __name__ == "__main__":
302
-     # Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
303
-     # Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions above.
304
-     print("Launching Gradio App...")
305
-     demo.launch()
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
 
4
 
5
+ """
6
+ MLE‑Dojo Benchmark Leaderboard Polished Edition
7
+ -------------------------------------------------
8
+ This version focuses on premium typography, elegant color palette, and richer
9
+ UI controls (including ascending/descending sort) while remaining completely
10
+ self‑contained.
11
+
12
+ *️⃣ HOW TO USE
13
+ -------------------------------------------------
14
+ 1. Install deps → `pip install gradio pandas apscheduler`
15
+ 2. Launch → `python mle_dojo_leaderboard_app.py`
16
+ 3. Tailor any of the placeholder values (TITLE, INTRODUCTION_TEXT, etc.) to your
17
+ project or import them from your own `src` package — the try/except block at
18
+ the top handles either workflow gracefully.
19
+ """
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Placeholder fall‑back imports (remove once your own src/ is in PYTHONPATH)
23
+ # ---------------------------------------------------------------------------
24
  try:
25
+ from src.about import (
26
+ CITATION_BUTTON_LABEL,
27
+ CITATION_BUTTON_TEXT,
28
+ EVALUATION_QUEUE_TEXT,
29
+ INTRODUCTION_TEXT,
30
+ LLM_BENCHMARKS_TEXT,
31
+ TITLE,
32
+ )
33
+ from src.display.css_html_js import custom_css # optional
34
+ from src.envs import REPO_ID
35
+ from src.submission.submit import add_new_eval
36
+ print(" Imported UI copy & helpers from src package.")
 
37
  except ImportError:
38
+ print("⚠️ Falling back to local placeholders customise as needed.")
39
+ CITATION_BUTTON_LABEL = "Citation"
40
+ CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark"
41
+ EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
42
+ INTRODUCTION_TEXT = "Welcome to the **MLEDojo Benchmark Leaderboard** — compare LLM agents across real‑world ML engineering tasks."
43
+ LLM_BENCHMARKS_TEXT = "Further details about tasks, metrics, and evaluation pipelines."
44
+ TITLE = (
45
+ "<h1 class='hero-title gradient-text'>\U0001F3C6 MLE‑Dojo Benchmark Leaderboard</h1>"
46
+ "<p class='subtitle'>Interactive, reproducible &amp; community‑driven ML agent benchmarking</p>"
47
+ )
48
+ custom_css = "" # will be extended below
49
+ REPO_ID = "your/space-id"
50
+ def add_new_eval(*_):
51
+ return "Submission placeholder."
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # Data extend / refresh as new checkpoints are evaluated
55
+ # ---------------------------------------------------------------------------
56
  data = [
57
+ {"model_name": "gpt-4o-mini", "url": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 753, "Tabular_Elo": 839, "NLP_Elo": 758, "CV_Elo": 754, "Overall": 778},
58
+ {"model_name": "gpt-4o", "url": "https://openai.com/index/hello-gpt-4o/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 830, "Tabular_Elo": 861, "NLP_Elo": 903, "CV_Elo": 761, "Overall": 841},
59
+ {"model_name": "o3-mini", "url": "https://openai.com/index/openai-o3-mini/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 1108, "Tabular_Elo": 1019, "NLP_Elo": 1056, "CV_Elo": 1207, "Overall": 1096},
60
+ {"model_name": "deepseek-v3", "url": "https://api-docs.deepseek.com/news/news1226", "organizer": "DeepSeek", "license": "DeepSeek", "MLE-Lite_Elo": 1004, "Tabular_Elo": 1015, "NLP_Elo": 1028, "CV_Elo": 1067, "Overall": 1023},
61
+ {"model_name": "deepseek-r1", "url": "https://api-docs.deepseek.com/news/news250120", "organizer": "DeepSeek", "license": "DeepSeek", "MLE-Lite_Elo": 1137, "Tabular_Elo": 1053, "NLP_Elo": 1103, "CV_Elo": 1083, "Overall": 1100},
62
+ {"model_name": "gemini-2.0-flash", "url": "https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 847, "Tabular_Elo": 923, "NLP_Elo": 860, "CV_Elo": 978, "Overall": 895},
63
+ {"model_name": "gemini-2.0-pro", "url": "https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 1064, "Tabular_Elo": 1139, "NLP_Elo": 1028, "CV_Elo": 973, "Overall": 1054},
64
+ {"model_name": "gemini-2.5-pro", "url": "https://deepmind.google/technologies/gemini/pro/", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 1257, "Tabular_Elo": 1150, "NLP_Elo": 1266, "CV_Elo": 1177, "Overall": 1214},
65
  ]
 
 
 
66
  master_df = pd.DataFrame(data)
67
 
68
+ # ---------------------------------------------------------------------------
69
+ # Category helpers
70
+ # ---------------------------------------------------------------------------
71
+ CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
72
+ DEFAULT_CATEGORY = "Overall"
73
+ CATEGORY_MAP = {
74
+ "Overall": "Overall",
75
+ "MLE-Lite": "MLE-Lite_Elo",
76
+ "Tabular": "Tabular_Elo",
77
+ "NLP": "NLP_Elo",
78
+ "CV": "CV_Elo",
79
  }
80
 
81
+ # ---------------------------------------------------------------------------
82
+ # Leaderboard Update Routine
83
+ # ---------------------------------------------------------------------------
84
+
85
+ def update_leaderboard(category: str, ascending: bool):
86
+ """Return a fresh, nicely formatted DataFrame based on user selections."""
87
+ score_col = CATEGORY_MAP.get(category, CATEGORY_MAP[DEFAULT_CATEGORY])
88
+
89
+ df = (
90
+ master_df[["model_name", "url", "organizer", "license", score_col]].copy()
91
+ .sort_values(by=score_col, ascending=ascending)
92
+ .reset_index(drop=True)
93
+ )
94
+
95
+ # Add Rank & hyperlink the model name
96
+ df.insert(0, "Rank", df.index + 1)
97
+ df["Model"] = (
98
+ df.apply(lambda r: f"<a href='{r.url}' target='_blank'>{r.model_name}</a>", axis=1)
99
+ )
100
+
101
+ df.rename(columns={
102
+ "organizer": "Organizer",
103
+ "license": "License",
104
+ score_col: "Elo Score",
105
+ }, inplace=True)
106
+
107
+ return df[["Rank", "Model", "Organizer", "License", "Elo Score"]]
108
+
109
+ # ---------------------------------------------------------------------------
110
+ # Custom CSS — premium typography & subtle surfaces
111
+ # ---------------------------------------------------------------------------
112
+ custom_css += """
113
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
114
+
115
+ html, body {
116
+ font-family: 'Inter', 'Helvetica Neue', Arial, sans-serif !important;
117
+ font-size: 17px !important; /* slightly larger default */
118
+ color: #1f2937;
119
+ background-color: #f9fafb;
120
+ line-height: 1.55;
121
+ }
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ /* Gradient text utility */
124
+ .gradient-text {
125
+ background: linear-gradient(90deg, #0284c7 0%, #6366f1 100%);
126
+ -webkit-background-clip: text;
127
+ -webkit-text-fill-color: transparent;
128
+ }
129
 
130
+ /* Markdown tweaks */
131
+ .markdown-text h2 {
132
+ font-weight: 600;
133
+ margin-top: 1.2em;
134
+ }
 
 
 
135
 
136
+ /* Radio buttons & checkboxes */
137
+ .gr-radio, .gr-checkbox {
138
+ padding: 0.35em 0.75em;
139
+ border-radius: 0.5rem;
140
+ background-color: #ffffff;
141
+ box-shadow: 0 1px 2px rgba(0,0,0,0.06);
142
+ }
143
 
144
+ /* Data table */
145
+ #leaderboard-table table {
146
+ width: 100%;
147
+ border-collapse: collapse;
148
+ }
149
+ #leaderboard-table th {
150
+ background-color: #e2e8f0;
151
+ font-weight: 600;
152
+ text-transform: uppercase;
153
+ font-size: 0.85rem;
154
+ letter-spacing: 0.03em;
155
+ padding: 0.6em;
156
+ }
157
+ #leaderboard-table td {
158
+ padding: 0.55em 0.6em;
159
+ vertical-align: top;
160
+ }
161
+ #leaderboard-table tr:nth-child(even) { background-color: #f8fafc; }
162
+ #leaderboard-table tr:hover { background-color: #eef2ff; }
163
 
164
+ /* Links */
165
+ a { color: #2563eb; text-decoration: none; }
166
+ a:hover { text-decoration: underline; }
167
 
168
+ /* Accordion style tweak */
169
+ .gr-accordion .label {
170
+ font-weight: 600;
171
+ font-size: 1rem;
 
 
 
 
 
 
 
 
 
 
172
  }
 
 
 
 
 
173
  """
 
 
174
 
175
+ # ---------------------------------------------------------------------------
176
+ # Gradio App ✨
177
+ # ---------------------------------------------------------------------------
 
 
 
 
 
 
 
 
178
 
179
+ demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft(
180
+ primary_hue="indigo",
181
+ neutral_hue="slate",
182
+ font=["Inter", "Helvetica Neue", "Arial", "sans-serif"],
183
+ ))
184
 
185
  with demo:
186
+ gr.HTML(TITLE)
187
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
188
+
189
+ with gr.Tabs():
190
+ # ---------- Leaderboard Tab ----------
191
+ with gr.TabItem("🏅 Leaderboard"):
192
+ gr.Markdown("### Model Elo Rankings by Category")
193
+ with gr.Row():
194
+ category_selector = gr.Radio(
195
+ choices=CATEGORIES,
196
+ value=DEFAULT_CATEGORY,
197
+ label="Category",
198
+ interactive=True,
199
+ )
200
+ order_checkbox = gr.Checkbox(
201
+ label="⬆️ Ascending order (lower Elo first)",
202
+ value=False,
203
+ )
204
+ leaderboard_table = gr.Dataframe(
205
+ value=update_leaderboard(DEFAULT_CATEGORY, False),
206
+ headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
207
+ datatype=["number", "html", "str", "str", "number"],
208
+ row_count=(len(master_df), "fixed"),
209
+ col_count=(5, "fixed"),
210
+ interactive=False,
211
+ elem_id="leaderboard-table",
212
+ )
213
+ # wire‑up events
214
+ category_selector.change(update_leaderboard, [category_selector, order_checkbox], leaderboard_table)
215
+ order_checkbox.change(update_leaderboard, [category_selector, order_checkbox], leaderboard_table)
216
+
217
+ # ---------- About Tab ----------
218
+ with gr.TabItem("ℹ️ About"):
219
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
220
+
221
+ # ---------- (Optional) Submit Tab ----------
222
+ # You can re‑enable this section when your `add_new_eval()` & REPO_ID are ready.
223
+ # with gr.TabItem("🚀 Submit"):
224
+ # pass
225
+
226
+ # ---------- Citation Accordion ----------
227
+ with gr.Accordion("📖 Citation", open=False):
228
+ gr.Textbox(
229
+ value=CITATION_BUTTON_TEXT,
230
+ label=CITATION_BUTTON_LABEL,
231
+ lines=10,
232
+ show_copy_button=True,
233
+ )
234
+
235
+ # ---------------------------------------------------------------------------
236
+ # Scheduler (optional) — restart the HF Space every 30 min to free memory
237
+ # ---------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
+ def restart_space():
240
+ print(f"🔄 Restarting Space → {REPO_ID}")
241
+ # Example: `HfApi().restart_space(repo_id=REPO_ID)`
242
 
 
 
243
  if __name__ == "__main__":
244
+ if REPO_ID != "your/space-id":
245
+ scheduler = BackgroundScheduler()
246
+ scheduler.add_job(restart_space, "interval", seconds=1800)
247
+ scheduler.start()
248
+ print("🗓️ Background scheduler active (30 min restart).")
249
+
250
+ print("🚀 Launching Gradio app…")
251
+ demo.launch()