Jerrycool commited on
Commit
839c9e4
Β·
verified Β·
1 Parent(s): e4014fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -67
app.py CHANGED
@@ -5,29 +5,31 @@ from apscheduler.schedulers.background import BackgroundScheduler
5
 
6
  # --- Make sure these imports work relative to your file structure ---
7
  # Option 1: If src is a directory in the same folder as your script:
8
- from src.about import (
9
- CITATION_BUTTON_LABEL,
10
- CITATION_BUTTON_TEXT,
11
- EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab
12
- INTRODUCTION_TEXT,
13
- LLM_BENCHMARKS_TEXT,
14
- TITLE,
15
- )
16
- from src.display.css_html_js import custom_css
17
- from src.envs import REPO_ID # Keep if needed for restart_space or other functions
18
- from src.submission.submit import add_new_eval # Keep if using the submit tab
19
-
 
20
  # Option 2: If you don't have these files, define placeholders (REMOVE THIS if using Option 1)
21
- # print("Warning: Using placeholder values for src module imports.")
22
- # CITATION_BUTTON_LABEL="Citation"
23
- # CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..."
24
- # EVALUATION_QUEUE_TEXT="Current evaluation queue:"
25
- # INTRODUCTION_TEXT="Welcome to the MLE-Dojo Benchmark Leaderboard."
26
- # LLM_BENCHMARKS_TEXT="Information about the benchmarks..."
27
- # TITLE="<h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>"
28
- # custom_css=""
29
- # REPO_ID="your/space-id" # Replace with actual ID if needed
30
- # def add_new_eval(*args): return "Submission placeholder."
 
31
  # --- End Placeholder Definitions ---
32
 
33
 
@@ -36,14 +38,14 @@ from src.submission.submit import add_new_eval # Keep if using the submit tab
36
  # !!! IMPORTANT: Replace placeholder URLs with actual model/project pages. !!!
37
  # Verify organizer and license information for accuracy.
38
  data = [
39
- {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
40
- {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
41
- {'model_name': 'o3-mini', 'url': 'https://placeholder.url/o3-mini', 'organizer': 'Unknown', 'license': 'Unknown', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096}, # Fill details later
42
- {'model_name': 'deepseek-v3', 'url': 'https://deepseek.com/', 'organizer': 'DeepSeek AI', 'license': 'DeepSeek License', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
43
- {'model_name': 'deepseek-r1', 'url': 'https://deepseek.com/', 'organizer': 'DeepSeek AI', 'license': 'DeepSeek License', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
44
- {'model_name': 'gemini-2.0-flash', 'url': 'https://deepmind.google/technologies/gemini/flash/', 'organizer': 'Google', 'license': 'Proprietary (API)', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
45
- {'model_name': 'gemini-2.0-pro', 'url': 'https://deepmind.google/technologies/gemini/#introduction', 'organizer': 'Google', 'license': 'Proprietary (API)', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
46
- {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/2-5-pro/', 'organizer': 'Google', 'license': 'Proprietary (API)', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
47
  ]
48
 
49
  # Create a master DataFrame
@@ -73,15 +75,17 @@ def update_leaderboard(category):
73
  if score_column is None or score_column not in master_df.columns:
74
  print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
75
  score_column = category_to_column[DEFAULT_CATEGORY]
76
- if score_column not in master_df.columns: # Check fallback column too
77
- # Return empty df with correct columns if still invalid
78
- # Use lowercase keys here consistent with master_df for the empty case
 
 
79
  return pd.DataFrame({
80
  "Rank": [],
81
  "Model": [],
82
- "organizer": [], # lowercase
83
- "license": [], # lowercase
84
- "Elo Score": []
85
  })
86
 
87
  # Select base columns + the score column for sorting
@@ -106,14 +110,16 @@ def update_leaderboard(category):
106
  # Rename the score column to 'Elo Score' for consistent display
107
  df.rename(columns={score_column: 'Elo Score'}, inplace=True)
108
 
 
 
 
109
  # Select and reorder columns for final display using the ACTUAL column names in df
110
- # Use lowercase 'organizer' and 'license' here because they haven't been renamed.
111
- final_columns = ["Rank", "Model", "organizer", "license", "Elo Score"]
112
  df = df[final_columns]
113
 
114
  # Note: The DataFrame returned now has columns:
115
- # 'Rank', 'Model', 'organizer', 'license', 'Elo Score'
116
-
117
  return df
118
 
119
  # --- Mock/Placeholder functions/data for other tabs ---
@@ -125,23 +131,50 @@ pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "S
125
  EVAL_COLS = ["Model", "Status", "Requested", "Started"] # Define for the dataframe headers
126
  EVAL_TYPES = ["str", "str", "str", "str"] # Define for the dataframe types
127
 
128
-
129
  # --- Keep restart function if relevant ---
130
  def restart_space():
131
  # Make sure REPO_ID is correctly defined/imported if this function is used
132
  print(f"Attempting to restart space: {REPO_ID}")
133
  # Replace with your actual space restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID))
134
 
 
135
  # --- Gradio App Definition ---
136
- # Add custom CSS rules here or ensure custom_css is imported correctly
137
- # Example CSS rules you might want in your custom_css:
138
- # table { width: 100%; border-collapse: collapse; }
139
- # th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; white-space: normal; } /* Allow wrapping */
140
- # th { background-color: #f2f2f2; font-weight: bold; }
141
- # tr:nth-child(even) { background-color: #f9f9f9; }
142
- # tr:hover { background-color: #e9e9e9; }
143
- # td a { color: #007bff; text-decoration: none; }
144
- # td a:hover { text-decoration: underline; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  # Use a theme for better default styling
147
  demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
@@ -149,6 +182,7 @@ demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
149
  with demo:
150
  # Use the TITLE variable imported or defined above
151
  gr.HTML(TITLE)
 
152
  # Use the INTRODUCTION_TEXT variable imported or defined above
153
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
154
 
@@ -165,16 +199,16 @@ with demo:
165
  leaderboard_df_component = gr.Dataframe(
166
  # Initialize with sorted data for the default category
167
  value=update_leaderboard(DEFAULT_CATEGORY),
168
- # Headers for DISPLAY remain capitalized
169
  headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
170
- # Datatype maps to the final df columns: Rank, Model, organizer, license, Elo Score
171
  datatype=["number", "html", "str", "str", "number"],
172
  interactive=False,
173
  # --- FIX APPLIED: Removed unsupported 'height' argument ---
174
  # row_count determines the number of rows to display
175
- row_count=(len(master_df), "fixed"),
176
  col_count=(5, "fixed"),
177
- wrap=True, # Allow text wrapping
178
  elem_id="leaderboard-table" # CSS hook for custom styling
179
  )
180
  # Link the radio button change to the update function
@@ -185,7 +219,7 @@ with demo:
185
  )
186
 
187
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-about", id=1):
188
- # Use the LLM_BENCHMARKS_TEXT variable imported or defined above
189
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
190
 
191
  # --- Submit Tab (Commented out as in original request) ---
@@ -227,26 +261,45 @@ with demo:
227
  # submission_result,
228
  # )
229
 
230
-
231
  # --- Citation Row (at the bottom, outside Tabs) ---
232
  with gr.Accordion("πŸ“™ Citation", open=False):
233
- # Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables imported or defined above
234
- citation_button = gr.Textbox(
235
- value=CITATION_BUTTON_TEXT,
236
- label=CITATION_BUTTON_LABEL,
237
- lines=10,
238
- elem_id="citation-button",
239
- show_copy_button=True,
240
- )
 
 
 
 
 
241
 
242
  # --- Keep scheduler if relevant ---
243
- # scheduler = BackgroundScheduler()
244
- # scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
245
- # scheduler.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
  # --- Launch the app ---
248
  # Ensures the app launches only when the script is run directly
249
  if __name__ == "__main__":
250
  # Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
251
  # Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions above.
 
252
  demo.launch()
 
5
 
6
  # --- Make sure these imports work relative to your file structure ---
7
  # Option 1: If src is a directory in the same folder as your script:
8
+ try:
9
+ from src.about import (
10
+ CITATION_BUTTON_LABEL,
11
+ CITATION_BUTTON_TEXT,
12
+ EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab
13
+ INTRODUCTION_TEXT,
14
+ LLM_BENCHMARKS_TEXT,
15
+ TITLE,
16
+ )
17
+ from src.display.css_html_js import custom_css # Assuming this exists but might be empty
18
+ from src.envs import REPO_ID # Keep if needed for restart_space or other functions
19
+ from src.submission.submit import add_new_eval # Keep if using the submit tab
20
+ print("Successfully imported from src module.")
21
  # Option 2: If you don't have these files, define placeholders (REMOVE THIS if using Option 1)
22
+ except ImportError:
23
+ print("Warning: Using placeholder values because src module imports failed.")
24
+ CITATION_BUTTON_LABEL="Citation"
25
+ CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..."
26
+ EVALUATION_QUEUE_TEXT="Current evaluation queue:"
27
+ INTRODUCTION_TEXT="Welcome to the MLE-Dojo Benchmark Leaderboard."
28
+ LLM_BENCHMARKS_TEXT="Information about the benchmarks..."
29
+ TITLE="<h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>"
30
+ custom_css="" # Start with empty CSS if not imported
31
+ REPO_ID="your/space-id" # Replace with actual ID if needed
32
+ def add_new_eval(*args): return "Submission placeholder."
33
  # --- End Placeholder Definitions ---
34
 
35
 
 
38
  # !!! IMPORTANT: Replace placeholder URLs with actual model/project pages. !!!
39
  # Verify organizer and license information for accuracy.
40
  data = [
41
+ {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
42
+ {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
43
+ {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096}, # Fill details later
44
+ {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
45
+ {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
46
+ {'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
47
+ {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
48
+ {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
49
  ]
50
 
51
  # Create a master DataFrame
 
75
  if score_column is None or score_column not in master_df.columns:
76
  print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
77
  score_column = category_to_column[DEFAULT_CATEGORY]
78
+ # Check fallback column too
79
+ if score_column not in master_df.columns:
80
+ # Return empty df with correct columns if still invalid
81
+ # Use lowercase keys here consistent with master_df for the empty case
82
+ print(f"Error: Default column '{score_column}' also not found.")
83
  return pd.DataFrame({
84
  "Rank": [],
85
  "Model": [],
86
+ "Elo Score": [],
87
+ "Organizer": [], # Changed 'organizer' -> 'Organizer' for consistency in empty case
88
+ "License": [] # Changed 'license' -> 'License' for consistency in empty case
89
  })
90
 
91
  # Select base columns + the score column for sorting
 
110
  # Rename the score column to 'Elo Score' for consistent display
111
  df.rename(columns={score_column: 'Elo Score'}, inplace=True)
112
 
113
+ # Rename 'organizer' and 'license' to match desired display headers
114
+ df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
115
+
116
  # Select and reorder columns for final display using the ACTUAL column names in df
117
+ # Use capitalized 'Organizer' and 'License' here because they have been renamed.
118
+ final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
119
  df = df[final_columns]
120
 
121
  # Note: The DataFrame returned now has columns:
122
+ # 'Rank', 'Model', 'Organizer', 'License', 'Elo Score'
 
123
  return df
124
 
125
  # --- Mock/Placeholder functions/data for other tabs ---
 
131
  EVAL_COLS = ["Model", "Status", "Requested", "Started"] # Define for the dataframe headers
132
  EVAL_TYPES = ["str", "str", "str", "str"] # Define for the dataframe types
133
 
 
134
  # --- Keep restart function if relevant ---
135
  def restart_space():
136
  # Make sure REPO_ID is correctly defined/imported if this function is used
137
  print(f"Attempting to restart space: {REPO_ID}")
138
  # Replace with your actual space restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID))
139
 
140
+
141
  # --- Gradio App Definition ---
142
+
143
+ # ***** FONT SIZE INCREASED HERE *****
144
+ # Add CSS rules to make the base font size larger.
145
+ # Adjust the '1.2em' value (e.g., to '1.4em', '16px') to change the size.
146
+ # The !important flag helps override theme defaults.
147
+ # If the imported custom_css already has content, append to it.
148
+ font_size_css = """
149
+ body {
150
+ font-size: 1.2em !important; /* Increase base font size */
151
+ }
152
+ /* Optional: Target specific elements if needed */
153
+ /*
154
+ #leaderboard-table th, #leaderboard-table td {
155
+ font-size: 1em !important; /* Adjust table font size relative to new body size */
156
+ padding: 10px 14px !important; /* Increase padding for better spacing */
157
+ }
158
+ h1, .markdown-text h1 { font-size: 2.2em !important; } /* Make main title larger */
159
+ h2, .markdown-text h2 { font-size: 1.8em !important; } /* Make section titles larger */
160
+ button { font-size: 1.1em !important; padding: 8px 16px !important; } /* Slightly larger buttons */
161
+ .gr-input, .gr-dropdown, .gr-textbox textarea { font-size: 1em !important; } /* Ensure inputs scale too */
162
+ */
163
+ """
164
+ # Append the new CSS to any existing custom_css
165
+ custom_css += font_size_css
166
+
167
+ # Add basic table styling if not already present
168
+ if "table {" not in custom_css:
169
+ custom_css += """
170
+ table { width: 100%; border-collapse: collapse; margin-top: 10px; margin-bottom: 10px; }
171
+ th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; white-space: normal; vertical-align: top; } /* Allow wrapping, top align */
172
+ th { background-color: #f2f2f2; font-weight: bold; }
173
+ tr:nth-child(even) { background-color: #f9f9f9; }
174
+ tr:hover { background-color: #e9e9e9; }
175
+ td a { color: #007bff; text-decoration: none; }
176
+ td a:hover { text-decoration: underline; }
177
+ """
178
 
179
  # Use a theme for better default styling
180
  demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 
182
  with demo:
183
  # Use the TITLE variable imported or defined above
184
  gr.HTML(TITLE)
185
+
186
  # Use the INTRODUCTION_TEXT variable imported or defined above
187
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
188
 
 
199
  leaderboard_df_component = gr.Dataframe(
200
  # Initialize with sorted data for the default category
201
  value=update_leaderboard(DEFAULT_CATEGORY),
202
+ # Headers for DISPLAY should match the *renamed* columns from update_leaderboard
203
  headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
204
+ # Datatype maps to the final df columns: Rank, Model, Organizer, License, Elo Score
205
  datatype=["number", "html", "str", "str", "number"],
206
  interactive=False,
207
  # --- FIX APPLIED: Removed unsupported 'height' argument ---
208
  # row_count determines the number of rows to display
209
+ row_count=(len(master_df), "fixed"), # Display all rows
210
  col_count=(5, "fixed"),
211
+ wrap=True, # Allow text wrapping in cells
212
  elem_id="leaderboard-table" # CSS hook for custom styling
213
  )
214
  # Link the radio button change to the update function
 
219
  )
220
 
221
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-about", id=1):
222
+ # Use the LLM_BENCHMARKS_TEXT variable imported or defined above
223
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
224
 
225
  # --- Submit Tab (Commented out as in original request) ---
 
261
  # submission_result,
262
  # )
263
 
 
264
  # --- Citation Row (at the bottom, outside Tabs) ---
265
  with gr.Accordion("πŸ“™ Citation", open=False):
266
+ # Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables imported or defined above
267
+ citation_button = gr.Textbox(
268
+ value=CITATION_BUTTON_TEXT,
269
+ label=CITATION_BUTTON_LABEL,
270
+ lines=10, # Adjust lines if needed for new font size
271
+ elem_id="citation-button",
272
+ show_copy_button=True,
273
+ )
274
+
275
+ # IGNORE_WHEN_COPYING_START
276
+ # content_copy download
277
+ # Use code with caution.
278
+ # IGNORE_WHEN_COPYING_END
279
 
280
  # --- Keep scheduler if relevant ---
281
+ # Only start scheduler if the script is run directly
282
+ if __name__ == "__main__":
283
+ try:
284
+ scheduler = BackgroundScheduler()
285
+ # Add job only if restart_space is callable (i.e., not a placeholder or failed import)
286
+ if callable(restart_space):
287
+ # Check if REPO_ID seems valid before scheduling
288
+ if REPO_ID and REPO_ID != "your/space-id":
289
+ scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
290
+ scheduler.start()
291
+ else:
292
+ print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
293
+ else:
294
+ print("Warning: restart_space function not available; space restart job not scheduled.")
295
+ except Exception as e:
296
+ print(f"Failed to initialize or start scheduler: {e}")
297
+
298
 
299
  # --- Launch the app ---
300
  # Ensures the app launches only when the script is run directly
301
  if __name__ == "__main__":
302
  # Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
303
  # Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions above.
304
+ print("Launching Gradio App...")
305
  demo.launch()