Jerrycool commited on
Commit
6e3d36f
Β·
verified Β·
1 Parent(s): 5d4bad7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +773 -135
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
 
4
 
5
- # --- Placeholder Imports / Definitions ---
 
6
  try:
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
@@ -10,210 +12,846 @@ try:
10
  EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
- TITLE, # We will override TITLE below
14
  )
15
  from src.display.css_html_js import custom_css
16
  from src.envs import REPO_ID
17
  from src.submission.submit import add_new_eval
 
 
18
  except ImportError:
 
19
  CITATION_BUTTON_LABEL = "Citation"
20
  CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
21
  EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
22
- INTRODUCTION_TEXT = "Welcome to the MLE-Dojo Benchmark Leaderboard."
23
- LLM_BENCHMARKS_TEXT = "Information about the benchmarks..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  custom_css = ""
25
  REPO_ID = "your/space-id"
26
  def add_new_eval(*args): return "Submission placeholder."
27
 
28
- # --- Elo Data ---
 
29
  data = [
30
- {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
31
- {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
32
- {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
33
- {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
34
- {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
35
- {'model_name': 'gemini-2.0-flash','url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
36
- {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
37
- {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
38
  ]
 
 
 
 
 
 
 
 
 
 
39
  master_df = pd.DataFrame(data)
40
 
41
- CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
 
 
 
 
 
 
 
 
 
 
42
  DEFAULT_CATEGORY = "Overall"
 
 
43
  category_to_column = {
44
- "Overall": "Overall",
45
  "MLE-Lite": "MLE-Lite_Elo",
46
  "Tabular": "Tabular_Elo",
47
  "NLP": "NLP_Elo",
48
  "CV": "CV_Elo",
 
49
  }
50
 
51
- def update_leaderboard(category):
52
- col = category_to_column.get(category, category_to_column[DEFAULT_CATEGORY])
53
- df = master_df[['model_name','url','organizer','license',col]].copy()
54
- df.sort_values(by=col, ascending=False, inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  df.reset_index(drop=True, inplace=True)
56
- df.insert(0, 'Rank', df.index+1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  df['Model'] = df.apply(
58
- lambda r: f"<a href='{r['url']}' target='_blank'>{r['model_name']}</a>",
 
 
 
 
 
 
59
  axis=1
60
  )
61
- df.rename(columns={col:'Elo Score','organizer':'Organizer','license':'License'}, inplace=True)
62
- return df[['Rank','Model','Organizer','License','Elo Score']]
63
 
64
- # --- Dark Theme + Custom CSS ---
65
- custom_css += """
66
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- body {
69
- font-family: 'Inter', sans-serif;
70
- background-color: #1e1e2f !important;
71
- color: #e0e0f0 !important;
 
 
 
 
 
 
 
 
 
72
  }
73
 
74
- /* Hero Section */
75
- .hero-section {
76
- background: linear-gradient(135deg, #6c63ff, #8f94fb);
77
- color: #fff;
78
- padding: 2rem 1rem;
79
- border-radius: .75rem;
80
- margin-bottom: 1.5rem;
81
- text-align: center;
82
- box-shadow: 0 4px 10px rgba(0,0,0,0.3);
83
  }
84
- .hero-section h1 {
85
- margin: 0;
 
86
  font-size: 2.5rem !important;
87
  font-weight: 700 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  }
89
- .hero-section h2 {
90
- margin: .5rem 0 0 !important;
91
- font-size: 1.25rem !important;
92
- font-weight: 400 !important;
93
- opacity: 0.9;
 
 
 
 
94
  }
95
 
96
- /* Tab Buttons */
97
- .tab-buttons button {
98
- border-radius: 20px !important;
99
- padding: 0.5rem 1rem !important;
100
- margin-right: 0.5rem !important;
101
- background: #3a3a4c !important;
102
- color: #e0e0f0 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  border: none !important;
104
- transition: background 0.3s !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  font-weight: 500 !important;
 
 
 
 
 
 
 
 
 
 
106
  }
107
- .tab-buttons button:hover {
108
- background: #4a4a6f !important;
 
 
 
 
109
  }
110
- .tab-buttons button[aria-selected="true"] {
111
- background: #6c63ff !important;
112
- color: #fff !important;
 
 
 
 
 
 
 
 
 
 
 
113
  }
114
 
115
- /* Category Selector Pills */
116
- #category-selector input[type="radio"] { display: none; }
117
- #category-selector label {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  display: inline-block;
119
- padding: 0.5rem 1rem;
120
- margin-right: 0.5rem;
121
- border-radius: 999px;
122
- background: #3a3a4c;
123
- color: #e0e0f0;
124
- cursor: pointer;
125
- transition: background 0.3s, color 0.3s;
126
  font-weight: 500;
 
127
  }
128
- #category-selector input[type="radio"]:checked + label {
129
- background: #6c63ff;
130
- color: #fff;
 
 
131
  }
132
 
133
- /* Table Styling */
134
- table {
135
- width: 100%;
136
- border: none;
137
- border-radius: .5rem;
138
- overflow: hidden;
139
- box-shadow: 0 2px 4px rgba(0,0,0,0.3);
140
- margin: 1rem 0;
141
  }
142
- th {
143
- background: #6c63ff !important;
144
- color: #fff !important;
 
 
145
  }
146
- td, th {
147
- padding: 0.75rem 1rem;
148
- background: #1e1e2f;
149
- color: #e0e0f0;
 
 
 
 
 
150
  }
151
- tr:nth-child(even) td {
152
- background: #2a2a3c;
 
 
 
 
 
 
 
153
  }
154
- tr:hover td {
155
- background: #3c3b52;
 
 
 
 
 
 
 
156
  }
157
- td a {
158
- color: #9afeff;
159
- text-decoration: none;
 
 
160
  }
161
- td a:hover {
162
- text-decoration: underline;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  }
164
- """
165
 
166
- # --- Override Title with Hero ---
167
- TITLE = """
168
- <div class="hero-section">
169
- <h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>
170
- <h2>Improving LLM Agents for Machine Learning Engineering</h2>
171
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  """
173
 
174
- # --- Build App ---
175
- demo = gr.Blocks(css=custom_css, theme=gr.themes.SoftDark())
 
 
 
176
 
177
  with demo:
178
- gr.HTML(TITLE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
180
 
181
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
182
- with gr.TabItem("πŸ“‹ Leaderboard"):
183
- gr.Markdown("## Model Elo Rankings by Category")
184
- category_selector = gr.Radio(
185
- choices=CATEGORIES,
186
- value=DEFAULT_CATEGORY,
187
- interactive=True,
188
- elem_id="category-selector"
189
- )
190
- leaderboard_df = gr.Dataframe(
191
- value=update_leaderboard(DEFAULT_CATEGORY),
192
- headers=["Rank","Model","Organizer","License","Elo Score"],
193
- datatype=["number","html","str","str","number"],
194
- interactive=False,
195
- row_count=(len(master_df),"fixed"),
196
- col_count=(5,"fixed"),
197
- wrap=True,
198
- elem_id="leaderboard-table"
199
- )
200
- category_selector.change(
201
- fn=update_leaderboard,
202
- inputs=category_selector,
203
- outputs=leaderboard_df
204
- )
205
-
206
- with gr.TabItem("ℹ️ About"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
- with gr.Accordion("πŸ“™ Citation", open=False):
210
- gr.Textbox(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  value=CITATION_BUTTON_TEXT,
212
  label=CITATION_BUTTON_LABEL,
213
  lines=10,
214
  elem_id="citation-button",
215
- show_copy_button=True
216
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
 
218
  if __name__ == "__main__":
219
- demo.launch()
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
+ from datetime import datetime
5
 
6
+ # --- Make sure these imports work relative to your file structure ---
7
+ # Option 1: If src is a directory in the same folder as your script:
8
  try:
9
  from src.about import (
10
  CITATION_BUTTON_LABEL,
 
12
  EVALUATION_QUEUE_TEXT,
13
  INTRODUCTION_TEXT,
14
  LLM_BENCHMARKS_TEXT,
15
+ TITLE,
16
  )
17
  from src.display.css_html_js import custom_css
18
  from src.envs import REPO_ID
19
  from src.submission.submit import add_new_eval
20
+ print("Successfully imported from src module.")
21
+ # Option 2: If you don't have these files, define placeholders
22
  except ImportError:
23
+ print("Warning: Using placeholder values because src module imports failed.")
24
  CITATION_BUTTON_LABEL = "Citation"
25
  CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
26
  EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
27
+ INTRODUCTION_TEXT = """
28
+ # Welcome to the MLE-Dojo Benchmark Leaderboard
29
+
30
+ This leaderboard tracks the performance of various AI models across multiple machine learning engineering domains.
31
+ Our comprehensive evaluation system uses ELO ratings to provide a fair comparison between different models.
32
+
33
+ ## How to read this leaderboard
34
+ - Select a domain category to view specialized rankings
35
+ - Higher ELO scores indicate better performance
36
+ - Click on any model name to learn more about it
37
+ """
38
+ LLM_BENCHMARKS_TEXT = """
39
+ # About the MLE-Dojo Benchmark
40
+
41
+ ## Evaluation Methodology
42
+ The MLE-Dojo benchmark evaluates models across various domains including:
43
+
44
+ - **MLE-Lite**: Basic machine learning engineering tasks
45
+ - **Tabular**: Data manipulation, analysis, and modeling with structured data
46
+ - **NLP**: Natural language processing tasks including classification, generation, and understanding
47
+ - **CV**: Computer vision tasks including image classification, object detection, and generation
48
+
49
+ Our evaluation uses a sophisticated ELO rating system that considers the relative performance of models against each other.
50
+
51
+ ## Contact
52
+ For more information or to submit your model, please contact us at [email protected]
53
+ """
54
+ TITLE = "<h1>πŸ† MLE-Dojo Benchmark Leaderboard</h1>"
55
  custom_css = ""
56
  REPO_ID = "your/space-id"
57
  def add_new_eval(*args): return "Submission placeholder."
58
 
59
+ # --- Elo Leaderboard Configuration ---
60
+ # Enhanced data with Rank (placeholder), Organizer, License, and URL
61
  data = [
62
+ {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
63
+ {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
64
+ {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
65
+ {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
66
+ {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
67
+ {'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
68
+ {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
69
+ {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
70
  ]
71
+
72
+ # Add organization logos (for visual enhancement)
73
+ org_logos = {
74
+ 'OpenAI': 'πŸ“±', # You can replace these with actual icon URLs in production
75
+ 'DeepSeek': 'πŸ”',
76
+ 'Google': '🌐',
77
+ 'Default': 'πŸ€–'
78
+ }
79
+
80
+ # Create a master DataFrame
81
  master_df = pd.DataFrame(data)
82
 
83
+ # Add last updated timestamp
84
+ last_updated = datetime.now().strftime("%B %d, %Y at %H:%M:%S")
85
+
86
+ # Define categories with fancy icons
87
+ CATEGORIES = [
88
+ ("πŸ† Overall", "Overall"),
89
+ ("πŸ’‘ MLE-Lite", "MLE-Lite"),
90
+ ("πŸ“Š Tabular", "Tabular"),
91
+ ("πŸ“ NLP", "NLP"),
92
+ ("πŸ‘οΈ CV", "CV")
93
+ ]
94
  DEFAULT_CATEGORY = "Overall"
95
+
96
+ # Map user-facing categories to DataFrame column names
97
  category_to_column = {
 
98
  "MLE-Lite": "MLE-Lite_Elo",
99
  "Tabular": "Tabular_Elo",
100
  "NLP": "NLP_Elo",
101
  "CV": "CV_Elo",
102
+ "Overall": "Overall"
103
  }
104
 
105
+ # --- Helper function to update leaderboard ---
106
+ def update_leaderboard(category_label):
107
+ """
108
+ Enhanced function to update the leaderboard with visual improvements
109
+ """
110
+ # Extract the category value from the label if it's a tuple (icon, value)
111
+ if isinstance(category_label, tuple):
112
+ category = category_label[1]
113
+ else:
114
+ # For backward compatibility or direct values
115
+ category = category_label.split(" ")[-1] if " " in category_label else category_label
116
+
117
+ score_column = category_to_column.get(category)
118
+ if score_column is None or score_column not in master_df.columns:
119
+ print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
120
+ score_column = category_to_column[DEFAULT_CATEGORY]
121
+ if score_column not in master_df.columns:
122
+ print(f"Error: Default column '{score_column}' also not found.")
123
+ return pd.DataFrame({
124
+ "Rank": [],
125
+ "Model": [],
126
+ "Organizer": [],
127
+ "License": [],
128
+ "Elo Score": []
129
+ })
130
+
131
+ # Select base columns + the score column for sorting
132
+ cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
133
+ df = master_df[cols_to_select].copy()
134
+
135
+ # Sort by the selected 'Elo Score' descending
136
+ df.sort_values(by=score_column, ascending=False, inplace=True)
137
+
138
+ # Add Rank with medal emojis for top 3
139
  df.reset_index(drop=True, inplace=True)
140
+
141
+ # Create fancy rank with medals for top positions
142
+ def get_rank_display(idx):
143
+ if idx == 0:
144
+ return "πŸ₯‡ 1"
145
+ elif idx == 1:
146
+ return "πŸ₯ˆ 2"
147
+ elif idx == 2:
148
+ return "πŸ₯‰ 3"
149
+ else:
150
+ return f"{idx + 1}"
151
+
152
+ df.insert(0, 'Rank', df.index.map(get_rank_display))
153
+
154
+ # Add organization icons to model names
155
  df['Model'] = df.apply(
156
+ lambda row: f"""<div style="display: flex; align-items: center;">
157
+ <span style="font-size: 1.5em; margin-right: 10px;">{org_logos.get(row['organizer'], org_logos['Default'])}</span>
158
+ <a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank'
159
+ style='color: #0066cc; text-decoration: none; font-weight: 500; font-size: 1.05em;'>
160
+ {row['model_name']}
161
+ </a>
162
+ </div>""",
163
  axis=1
164
  )
 
 
165
 
166
+ # Format Elo scores with visual indicators
167
+ df['Elo Display'] = df[score_column].apply(
168
+ lambda score: f"""<div style="display: flex; align-items: center;">
169
+ <span style="font-weight: bold; color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'}">
170
+ {score}
171
+ </span>
172
+ <div style="margin-left: 10px; height: 12px; width: 60px; background-color: #eaeaea; border-radius: 6px; overflow: hidden;">
173
+ <div style="height: 100%; width: {min(100, max(5, (score-700)/7))}%; background-color: {'#1a5fb4' if score >= 1000 else '#2ec27e' if score >= 900 else '#e5a50a' if score >= 800 else '#ff7800'};"></div>
174
+ </div>
175
+ </div>"""
176
+ )
177
+
178
+ # Rename columns for display
179
+ df.rename(columns={score_column: 'Elo Score'}, inplace=True)
180
+ df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
181
+
182
+ # Select and reorder columns for final display
183
+ final_columns = ["Rank", "Model", "Organizer", "License", "Elo Display"]
184
+ df = df[final_columns]
185
+
186
+ # Rename for display
187
+ df.columns = ["Rank", "Model", "Organization", "License", f"Elo Score ({category})"]
188
+
189
+ return df
190
+
191
+ # --- Mock/Placeholder functions/data for other tabs ---
192
+ print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
193
+ finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
194
+ running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
195
+ pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
196
+ EVAL_COLS = ["Model", "Status", "Requested", "Started"]
197
+ EVAL_TYPES = ["str", "str", "str", "str"]
198
+
199
+ # --- Keep restart function if relevant ---
200
+ def restart_space():
201
+ print(f"Attempting to restart space: {REPO_ID}")
202
+ # Replace with your actual space restart mechanism if needed
203
 
204
+ # --- Enhanced CSS for beauty and readability ---
205
+ enhanced_css = """
206
+ /* Base styling */
207
+ :root {
208
+ --primary-color: #1a5fb4;
209
+ --secondary-color: #2ec27e;
210
+ --accent-color: #e5a50a;
211
+ --warning-color: #ff7800;
212
+ --text-color: #333333;
213
+ --background-color: #ffffff;
214
+ --card-background: #f9f9f9;
215
+ --border-color: #e0e0e0;
216
+ --shadow-color: rgba(0, 0, 0, 0.1);
217
  }
218
 
219
+ /* Typography */
220
+ body, .gradio-container {
221
+ font-family: 'Inter', 'Segoe UI', Roboto, -apple-system, BlinkMacSystemFont, system-ui, sans-serif !important;
222
+ font-size: 16px !important;
223
+ line-height: 1.6 !important;
224
+ color: var(--text-color) !important;
225
+ background-color: var(--background-color) !important;
 
 
226
  }
227
+
228
+ /* Headings */
229
+ h1 {
230
  font-size: 2.5rem !important;
231
  font-weight: 700 !important;
232
+ margin-bottom: 1.5rem !important;
233
+ color: var(--primary-color) !important;
234
+ text-align: center !important;
235
+ letter-spacing: -0.02em !important;
236
+ line-height: 1.2 !important;
237
+ }
238
+
239
+ h2 {
240
+ font-size: 1.8rem !important;
241
+ font-weight: 600 !important;
242
+ margin-top: 1.5rem !important;
243
+ margin-bottom: 1rem !important;
244
+ color: var(--primary-color) !important;
245
+ letter-spacing: -0.01em !important;
246
+ }
247
+
248
+ h3 {
249
+ font-size: 1.4rem !important;
250
+ font-weight: 600 !important;
251
+ margin-top: 1.2rem !important;
252
+ margin-bottom: 0.8rem !important;
253
+ color: var(--text-color) !important;
254
+ }
255
+
256
+ /* Tabs styling */
257
+ .tabs {
258
+ margin-top: 1rem !important;
259
+ border-radius: 12px !important;
260
+ overflow: hidden !important;
261
+ box-shadow: 0 4px 12px var(--shadow-color) !important;
262
+ }
263
+
264
+ .tab-nav button {
265
+ font-size: 1.1rem !important;
266
+ font-weight: 500 !important;
267
+ padding: 0.8rem 1.5rem !important;
268
+ border-radius: 0 !important;
269
+ transition: all 0.2s ease !important;
270
+ }
271
+
272
+ .tab-nav button.selected {
273
+ background-color: var(--primary-color) !important;
274
+ color: white !important;
275
+ font-weight: 600 !important;
276
+ }
277
+
278
+ /* Card styling */
279
+ .gradio-container .gr-box, .gradio-container .gr-panel {
280
+ border-radius: 12px !important;
281
+ border: 1px solid var(--border-color) !important;
282
+ box-shadow: 0 4px 12px var(--shadow-color) !important;
283
+ overflow: hidden !important;
284
+ }
285
+
286
+ /* Table styling */
287
+ table {
288
+ width: 100% !important;
289
+ border-collapse: separate !important;
290
+ border-spacing: 0 !important;
291
+ margin: 1.5rem 0 !important;
292
+ border-radius: 8px !important;
293
+ overflow: hidden !important;
294
+ box-shadow: 0 4px 12px var(--shadow-color) !important;
295
  }
296
+
297
+ th {
298
+ background-color: #f0f5ff !important;
299
+ color: var(--primary-color) !important;
300
+ font-weight: 600 !important;
301
+ padding: 1rem !important;
302
+ font-size: 1.1rem !important;
303
+ text-align: left !important;
304
+ border-bottom: 2px solid var(--primary-color) !important;
305
  }
306
 
307
+ td {
308
+ padding: 1rem !important;
309
+ border-bottom: 1px solid var(--border-color) !important;
310
+ font-size: 1rem !important;
311
+ vertical-align: middle !important;
312
+ }
313
+
314
+ tr:nth-child(even) {
315
+ background-color: #f8fafd !important;
316
+ }
317
+
318
+ tr:hover {
319
+ background-color: #edf2fb !important;
320
+ }
321
+
322
+ tr:first-child td {
323
+ border-top: none !important;
324
+ }
325
+
326
+ /* Button styling */
327
+ button.primary, .gr-button.primary {
328
+ background-color: var(--primary-color) !important;
329
+ color: white !important;
330
+ font-weight: 500 !important;
331
+ padding: 0.8rem 1.5rem !important;
332
+ border-radius: 8px !important;
333
  border: none !important;
334
+ cursor: pointer !important;
335
+ transition: all 0.2s ease !important;
336
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1) !important;
337
+ }
338
+
339
+ button.primary:hover, .gr-button.primary:hover {
340
+ background-color: #0b4a9e !important;
341
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important;
342
+ transform: translateY(-1px) !important;
343
+ }
344
+
345
+ /* Radio buttons */
346
+ .gr-radio {
347
+ display: flex !important;
348
+ flex-wrap: wrap !important;
349
+ gap: 10px !important;
350
+ margin: 1rem 0 !important;
351
+ }
352
+
353
+ .gr-radio label {
354
+ background-color: #f5f7fa !important;
355
+ border: 1px solid var(--border-color) !important;
356
+ border-radius: 8px !important;
357
+ padding: 0.7rem 1.2rem !important;
358
+ font-size: 1rem !important;
359
  font-weight: 500 !important;
360
+ cursor: pointer !important;
361
+ transition: all 0.2s ease !important;
362
+ display: flex !important;
363
+ align-items: center !important;
364
+ gap: 8px !important;
365
+ }
366
+
367
+ .gr-radio label:hover {
368
+ background-color: #eaeef3 !important;
369
+ border-color: #c0c9d6 !important;
370
  }
371
+
372
+ .gr-radio label.selected {
373
+ background-color: #e0e9f7 !important;
374
+ border-color: var(--primary-color) !important;
375
+ color: var(--primary-color) !important;
376
+ font-weight: 600 !important;
377
  }
378
+
379
+ /* Input fields */
380
+ input, textarea, select {
381
+ font-size: 1rem !important;
382
+ padding: 0.8rem !important;
383
+ border-radius: 8px !important;
384
+ border: 1px solid var(--border-color) !important;
385
+ transition: all 0.2s ease !important;
386
+ }
387
+
388
+ input:focus, textarea:focus, select:focus {
389
+ border-color: var(--primary-color) !important;
390
+ box-shadow: 0 0 0 2px rgba(26, 95, 180, 0.2) !important;
391
+ outline: none !important;
392
  }
393
 
394
+ /* Accordion styling */
395
+ .gr-accordion {
396
+ border-radius: 8px !important;
397
+ overflow: hidden !important;
398
+ margin: 1rem 0 !important;
399
+ border: 1px solid var(--border-color) !important;
400
+ }
401
+
402
+ .gr-accordion-header {
403
+ padding: 1rem !important;
404
+ background-color: #f5f7fa !important;
405
+ font-weight: 600 !important;
406
+ font-size: 1.1rem !important;
407
+ color: var(--text-color) !important;
408
+ }
409
+
410
+ .gr-accordion-content {
411
+ padding: 1rem !important;
412
+ background-color: white !important;
413
+ }
414
+
415
+ /* Markdown text improvements */
416
+ .markdown-text {
417
+ font-size: 1.05rem !important;
418
+ line-height: 1.7 !important;
419
+ }
420
+
421
+ .markdown-text p {
422
+ margin-bottom: 1rem !important;
423
+ }
424
+
425
+ .markdown-text ul, .markdown-text ol {
426
+ margin-left: 1.5rem !important;
427
+ margin-bottom: 1rem !important;
428
+ }
429
+
430
+ .markdown-text li {
431
+ margin-bottom: 0.5rem !important;
432
+ }
433
+
434
+ .markdown-text strong {
435
+ font-weight: 600 !important;
436
+ color: #333 !important;
437
+ }
438
+
439
+ /* Status indicators */
440
+ .status-badge {
441
  display: inline-block;
442
+ padding: 0.3rem 0.7rem;
443
+ border-radius: 99px;
444
+ font-size: 0.85rem;
 
 
 
 
445
  font-weight: 500;
446
+ text-align: center;
447
  }
448
+
449
+ .status-pending {
450
+ background-color: #fff8e0;
451
+ color: #b58a00;
452
+ border: 1px solid #ffd74d;
453
  }
454
 
455
+ .status-running {
456
+ background-color: #e0f2ff;
457
+ color: #0066cc;
458
+ border: 1px solid #66b3ff;
 
 
 
 
459
  }
460
+
461
+ .status-completed {
462
+ background-color: #e6f7ef;
463
+ color: #00875a;
464
+ border: 1px solid #57d9a3;
465
  }
466
+
467
+ /* Footer */
468
+ .footer {
469
+ margin-top: 2rem;
470
+ padding: 1rem;
471
+ text-align: center;
472
+ font-size: 0.9rem;
473
+ color: #666;
474
+ border-top: 1px solid var(--border-color);
475
  }
476
+
477
+ /* Enhanced leaderboard title */
478
+ .leaderboard-header {
479
+ display: flex;
480
+ align-items: center;
481
+ justify-content: space-between;
482
+ margin-bottom: 1.5rem;
483
+ padding-bottom: 1rem;
484
+ border-bottom: 2px solid var(--border-color);
485
  }
486
+
487
+ .leaderboard-title {
488
+ font-size: 2.2rem;
489
+ font-weight: 700;
490
+ color: var(--primary-color);
491
+ margin: 0;
492
+ display: flex;
493
+ align-items: center;
494
+ gap: 0.5rem;
495
  }
496
+
497
+ .leaderboard-subtitle {
498
+ font-size: 1.1rem;
499
+ color: #666;
500
+ margin-top: 0.5rem;
501
  }
502
+
503
+ .timestamp {
504
+ font-size: 0.85rem;
505
+ color: #666;
506
+ font-style: italic;
507
+ }
508
+
509
+ /* Category selector buttons */
510
+ .category-buttons {
511
+ display: flex;
512
+ flex-wrap: wrap;
513
+ gap: 10px;
514
+ margin-bottom: 1.5rem;
515
+ }
516
+
517
+ .category-button {
518
+ padding: 0.7rem 1.2rem;
519
+ background-color: #f0f5ff;
520
+ border: 1px solid #d0e0ff;
521
+ border-radius: 8px;
522
+ font-weight: 500;
523
+ cursor: pointer;
524
+ transition: all 0.2s ease;
525
+ display: flex;
526
+ align-items: center;
527
+ gap: 8px;
528
+ }
529
+
530
+ .category-button:hover {
531
+ background-color: #e0ebff;
532
+ border-color: #b0d0ff;
533
+ }
534
+
535
+ .category-button.active {
536
+ background-color: var(--primary-color);
537
+ color: white;
538
+ border-color: var(--primary-color);
539
  }
 
540
 
541
+ /* Logo and brand styling */
542
+ .logo {
543
+ font-size: 2.5em;
544
+ margin-right: 0.5rem;
545
+ }
546
+
547
+ /* Medal styling for top ranks */
548
+ .rank-1 {
549
+ color: #ffd700;
550
+ font-weight: bold;
551
+ }
552
+
553
+ .rank-2 {
554
+ color: #c0c0c0;
555
+ font-weight: bold;
556
+ }
557
+
558
+ .rank-3 {
559
+ color: #cd7f32;
560
+ font-weight: bold;
561
+ }
562
  """
563
 
564
+ # Combine with any existing CSS
565
+ custom_css = enhanced_css + custom_css
566
+
567
+ # --- Gradio App Definition ---
568
+ demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
569
 
570
  with demo:
571
+ # Enhanced header with timestamp
572
+ gr.HTML(f"""
573
+ <div class="leaderboard-header">
574
+ <div>
575
+ <div class="leaderboard-title">
576
+ <span class="logo">πŸ†</span> MLE-Dojo Benchmark Leaderboard
577
+ </div>
578
+ <div class="leaderboard-subtitle">
579
+ Comprehensive evaluation of AI models across multiple domains
580
+ </div>
581
+ </div>
582
+ <div class="timestamp">
583
+ Last updated: {last_updated}
584
+ </div>
585
+ </div>
586
+ """)
587
+
588
+ # Introduction with enhanced styling
589
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
590
 
591
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
592
+ with gr.TabItem("πŸ“Š Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
593
+ with gr.Column():
594
+ gr.HTML("""
595
+ <h2 style="display: flex; align-items: center; gap: 10px;">
596
+ <span style="font-size: 1.3em;">πŸ“ˆ</span> Model Performance Rankings
597
+ </h2>
598
+ <p class="leaderboard-subtitle">Select a category to view specialized performance metrics</p>
599
+ """)
600
+
601
+ # Enhanced category selector
602
+ category_selector = gr.Radio(
603
+ choices=[x[0] for x in CATEGORIES],
604
+ label="Select Performance Domain:",
605
+ value="πŸ† Overall",
606
+ interactive=True,
607
+ elem_classes="fancy-radio"
608
+ )
609
+
610
+ # Visual separator
611
+ gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
612
+
613
+ # Enhanced leaderboard table
614
+ leaderboard_df_component = gr.Dataframe(
615
+ value=update_leaderboard(DEFAULT_CATEGORY),
616
+ headers=["Rank", "Model", "Organization", "License", f"Elo Score ({DEFAULT_CATEGORY})"],
617
+ datatype=["html", "html", "str", "str", "html"],
618
+ interactive=False,
619
+ row_count=(len(master_df), "fixed"),
620
+ col_count=(5, "fixed"),
621
+ wrap=True,
622
+ elem_id="leaderboard-table",
623
+ )
624
+
625
+ # Stats cards (visual enhancement)
626
+ with gr.Row():
627
+ with gr.Column(scale=1):
628
+ gr.HTML(f"""
629
+ <div style="background-color: #f0f5ff; padding: 20px; border-radius: 12px; text-align: center;">
630
+ <div style="font-size: 2em;">πŸ”</div>
631
+ <div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
632
+ <div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
633
+ </div>
634
+ """)
635
+ with gr.Column(scale=1):
636
+ gr.HTML(f"""
637
+ <div style="background-color: #e6f7ef; padding: 20px; border-radius: 12px; text-align: center;">
638
+ <div style="font-size: 2em;">🌐</div>
639
+ <div style="font-size: 2em; font-weight: bold; color: #00875a;">{master_df['organizer'].nunique()}</div>
640
+ <div style="font-size: 1.1em; color: #666;">Organizations</div>
641
+ </div>
642
+ """)
643
+ with gr.Column(scale=1):
644
+ gr.HTML(f"""
645
+ <div style="background-color: #fff8e0; padding: 20px; border-radius: 12px; text-align: center;">
646
+ <div style="font-size: 2em;">πŸ…</div>
647
+ <div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)}</div>
648
+ <div style="font-size: 1.1em; color: #666;">Performance Domains</div>
649
+ </div>
650
+ """)
651
+
652
+ # Link the radio button change to the update function
653
+ category_selector.change(
654
+ fn=update_leaderboard,
655
+ inputs=category_selector,
656
+ outputs=leaderboard_df_component
657
+ )
658
+
659
+ with gr.TabItem("πŸ“š About", elem_id="llm-benchmark-tab-about", id=1):
660
+ # Enhanced about section
661
+ gr.HTML("""
662
+ <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
663
+ <div style="font-size: 4em;">πŸ§ͺ</div>
664
+ <div>
665
+ <h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
666
+ <p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
667
+ </div>
668
+ </div>
669
+ """)
670
+
671
+ # Use the LLM_BENCHMARKS_TEXT variable
672
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
673
+
674
+ # Add methodology cards for visual enhancement
675
+ with gr.Row():
676
+ with gr.Column():
677
+ gr.HTML("""
678
+ <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
679
+ <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ’‘</div>
680
+ <h3 style="text-align: center; margin-top: 0;">MLE-Lite</h3>
681
+ <p>Evaluates a model's ability to handle basic machine learning engineering tasks including
682
+ data preprocessing, feature engineering, model selection, and basic deployment.</p>
683
+ </div>
684
+ """)
685
+ with gr.Column():
686
+ gr.HTML("""
687
+ <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
688
+ <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ“Š</div>
689
+ <h3 style="text-align: center; margin-top: 0;">Tabular</h3>
690
+ <p>Tests a model's ability to process, analyze and model structured data, including
691
+ statistical analysis,statistical analysis, predictive modeling, and data visualization with tabular datasets.</p>
692
+ </div>
693
+ """)
694
+
695
+ with gr.Row():
696
+ with gr.Column():
697
+ gr.HTML("""
698
+ <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
699
+ <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ“</div>
700
+ <h3 style="text-align: center; margin-top: 0;">NLP</h3>
701
+ <p>Evaluates natural language processing capabilities including text classification,
702
+ sentiment analysis, entity recognition, text generation, and language understanding.</p>
703
+ </div>
704
+ """)
705
+ with gr.Column():
706
+ gr.HTML("""
707
+ <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
708
+ <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ‘οΈ</div>
709
+ <h3 style="text-align: center; margin-top: 0;">CV</h3>
710
+ <p>Tests computer vision capabilities including image classification, object detection,
711
+ image generation, and visual understanding tasks across various domains.</p>
712
+ </div>
713
+ """)
714
 
715
+ # Optional: Uncomment if you want to re-enable the Submit tab
716
+ # with gr.TabItem("πŸš€ Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
717
+ # with gr.Column():
718
+ # gr.HTML("""
719
+ # <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
720
+ # <div style="font-size: 4em;">πŸš€</div>
721
+ # <div>
722
+ # <h2 style="margin: 0;">Submit Your Model for Evaluation</h2>
723
+ # <p style="margin: 5px 0 0 0; color: #666;">Add your model to the MLE-Dojo leaderboard</p>
724
+ # </div>
725
+ # </div>
726
+ # """)
727
+ #
728
+ # with gr.Row():
729
+ # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
730
+ #
731
+ # with gr.Column():
732
+ # with gr.Accordion(f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
733
+ # finished_eval_table = gr.components.Dataframe(
734
+ # value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
735
+ # )
736
+ # with gr.Accordion(f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
737
+ # running_eval_table = gr.components.Dataframe(
738
+ # value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
739
+ # )
740
+ # with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
741
+ # pending_eval_table = gr.components.Dataframe(
742
+ # value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
743
+ # )
744
+ #
745
+ # gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
746
+ #
747
+ # gr.HTML("""
748
+ # <h2 style="display: flex; align-items: center; gap: 10px;">
749
+ # <span style="font-size: 1.3em;">πŸ“</span> Model Submission Form
750
+ # </h2>
751
+ # """)
752
+ #
753
+ # with gr.Row():
754
+ # with gr.Column():
755
+ # model_name_textbox = gr.Textbox(
756
+ # label="Model Name (on Hugging Face Hub)",
757
+ # placeholder="Enter your model name...",
758
+ # elem_classes="enhanced-input"
759
+ # )
760
+ # revision_name_textbox = gr.Textbox(
761
+ # label="Revision / Commit Hash",
762
+ # placeholder="main",
763
+ # elem_classes="enhanced-input"
764
+ # )
765
+ # model_type = gr.Dropdown(
766
+ # choices=["Type A", "Type B", "Type C"],
767
+ # label="Model Type",
768
+ # multiselect=False,
769
+ # value=None,
770
+ # interactive=True,
771
+ # elem_classes="enhanced-dropdown"
772
+ # )
773
+ # with gr.Column():
774
+ # precision = gr.Dropdown(
775
+ # choices=["float16", "bfloat16", "float32", "int8", "auto"],
776
+ # label="Precision",
777
+ # multiselect=False,
778
+ # value="auto",
779
+ # interactive=True,
780
+ # elem_classes="enhanced-dropdown"
781
+ # )
782
+ # weight_type = gr.Dropdown(
783
+ # choices=["Original", "Adapter", "Delta"],
784
+ # label="Weights Type",
785
+ # multiselect=False,
786
+ # value="Original",
787
+ # interactive=True,
788
+ # elem_classes="enhanced-dropdown"
789
+ # )
790
+ # base_model_name_textbox = gr.Textbox(
791
+ # label="Base Model (for delta or adapter weights)",
792
+ # placeholder="Only needed for adapter/delta weights",
793
+ # elem_classes="enhanced-input"
794
+ # )
795
+ #
796
+ # submit_button = gr.Button(
797
+ # "Submit for Evaluation",
798
+ # elem_classes="primary-button"
799
+ # )
800
+ # submission_result = gr.Markdown()
801
+ # submit_button.click(
802
+ # add_new_eval,
803
+ # [model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type],
804
+ # submission_result,
805
+ # )
806
+
807
+ # Enhanced citation section
808
+ with gr.Accordion("πŸ“„ Citation", open=False, elem_classes="citation-accordion"):
809
+ gr.HTML("""
810
+ <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
811
+ <div style="font-size: 2.5em;">πŸ“„</div>
812
+ <div>
813
+ <h3 style="margin: 0;">How to Cite This Benchmark</h3>
814
+ <p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research</p>
815
+ </div>
816
+ </div>
817
+ """)
818
+
819
+ citation_button = gr.Textbox(
820
  value=CITATION_BUTTON_TEXT,
821
  label=CITATION_BUTTON_LABEL,
822
  lines=10,
823
  elem_id="citation-button",
824
+ show_copy_button=True,
825
  )
826
+
827
+ # Footer
828
+ gr.HTML("""
829
+ <div class="footer">
830
+ <p>Β© 2025 MLE-Dojo Benchmark. All rights reserved.</p>
831
+ <p style="margin-top: 5px; display: flex; justify-content: center; gap: 20px;">
832
+ <a href="#" style="color: #1a5fb4; text-decoration: none;">Privacy Policy</a>
833
+ <a href="#" style="color: #1a5fb4; text-decoration: none;">Terms of Service</a>
834
+ <a href="#" style="color: #1a5fb4; text-decoration: none;">Contact Us</a>
835
+ </p>
836
+ </div>
837
+ """)
838
+
839
+ # --- Keep scheduler if relevant ---
840
+ if __name__ == "__main__":
841
+ try:
842
+ scheduler = BackgroundScheduler()
843
+ if callable(restart_space):
844
+ if REPO_ID and REPO_ID != "your/space-id":
845
+ scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
846
+ scheduler.start()
847
+ else:
848
+ print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
849
+ else:
850
+ print("Warning: restart_space function not available; space restart job not scheduled.")
851
+ except Exception as e:
852
+ print(f"Failed to initialize or start scheduler: {e}")
853
 
854
+ # --- Launch the app ---
855
  if __name__ == "__main__":
856
+ print("Launching Enhanced Gradio App...")
857
+ demo.launch()