Jerrycool commited on
Commit
1117820
·
verified ·
1 Parent(s): 5134a55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -108
app.py CHANGED
@@ -1,8 +1,14 @@
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
 
5
- # --- Placeholder Imports / Definitions ---
6
  try:
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
@@ -10,14 +16,12 @@ try:
10
  EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
- TITLE, # We will override TITLE below for styling
14
  )
15
  from src.display.css_html_js import custom_css
16
  from src.envs import REPO_ID
17
  from src.submission.submit import add_new_eval
18
- print("Successfully imported from src module.")
19
  except ImportError:
20
- print("Warning: Using placeholder values because src module imports failed.")
21
  CITATION_BUTTON_LABEL = "Citation"
22
  CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
23
  EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
@@ -25,225 +29,247 @@ except ImportError:
25
  LLM_BENCHMARKS_TEXT = "Information about the benchmarks..."
26
  custom_css = ""
27
  REPO_ID = "your/space-id"
28
- def add_new_eval(*args): return "Submission placeholder."
29
 
30
- # --- Elo Data ---
 
 
 
31
  data = [
32
- {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
33
- {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
34
- {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
35
- {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
36
- {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
37
- {'model_name': 'gemini-2.0-flash','url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
38
- {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
39
- {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  ]
41
  master_df = pd.DataFrame(data)
42
 
 
43
  CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
44
  DEFAULT_CATEGORY = "Overall"
45
  category_to_column = {
46
  "Overall": "Overall",
47
- "MLE-Lite": "MLE-Lite_Elo",
48
  "Tabular": "Tabular_Elo",
49
  "NLP": "NLP_Elo",
50
  "CV": "CV_Elo",
51
  }
52
 
53
- def update_leaderboard(category):
 
54
  col = category_to_column.get(category, category_to_column[DEFAULT_CATEGORY])
55
- df = master_df[['model_name','url','organizer','license',col]].copy()
56
  df.sort_values(by=col, ascending=False, inplace=True)
57
  df.reset_index(drop=True, inplace=True)
58
- df.insert(0, 'Rank', df.index+1)
59
- df['Model'] = df.apply(
60
- lambda r: f"<a href='{r['url']}' target='_blank'>{r['model_name']}</a>",
61
- axis=1
 
 
 
62
  )
63
- df.rename(columns={col:'Elo Score','organizer':'Organizer','license':'License'}, inplace=True)
64
- return df[['Rank','Model','Organizer','License','Elo Score']]
65
 
66
- # --- Dark Mode Styling ---
67
- font_size_css = """
68
- body { font-size: 1em !important; }
69
- """
70
- custom_css += font_size_css
71
 
 
72
  dark_css = """
 
73
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
74
- @import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css');
75
-
76
  body {
77
  font-family: 'Inter', sans-serif;
78
  background-color: #121212;
79
  color: #e0e0e0;
 
80
  }
81
 
82
- /* Hero Section */
83
  .hero-section {
84
  background: linear-gradient(135deg, #333, #222);
85
  color: #e0e0e0;
86
- padding: 2rem 1rem;
87
  border-radius: .75rem;
88
  margin-bottom: 1.5rem;
89
  text-align: center;
90
- box-shadow: 0 4px 10px rgba(0,0,0,0.5);
91
  }
92
  .hero-section h1 {
93
  margin: 0;
94
- font-size: 2.5rem !important;
95
- font-weight: 700 !important;
 
 
 
 
 
 
 
96
  }
97
  .hero-section h2 {
98
- margin: .5rem 0 0 !important;
99
- font-size: 1.25rem !important;
100
- font-weight: 400 !important;
101
- opacity: 0.8;
102
  }
103
 
104
- /* Tab Buttons */
105
  .tab-buttons button {
106
  border-radius: 20px !important;
107
- padding: 0.5rem 1rem !important;
108
- margin-right: 0.5rem !important;
109
  background: #1e1e1e !important;
110
  color: #e0e0e0 !important;
111
  border: none !important;
112
- transition: background 0.3s !important;
113
  font-weight: 500 !important;
 
114
  }
115
- .tab-buttons button:hover {
116
- background: #2c2c2c !important;
117
- }
118
  .tab-buttons button[aria-selected="true"] {
119
  background: #444 !important;
120
  color: #fff !important;
121
  }
122
 
123
- /* Category Selector Pills */
124
- #category-selector input[type="radio"] { display: none; }
125
  #category-selector label {
126
  display: inline-block;
127
- padding: 0.5rem 1rem;
128
- margin-right: 0.5rem;
129
  border-radius: 999px;
130
- background: #1e1e1e;
131
  cursor: pointer;
132
- transition: background 0.3s, color 0.3s;
133
- font-weight: 500;
 
134
  color: #e0e0e0;
135
  }
136
  #category-selector input[type="radio"]:checked + label {
137
- background: #444;
138
  color: #fff;
139
  }
140
 
141
- /* Table Styling */
 
 
 
 
142
  .dataframe-container table {
143
  width: 100%;
 
144
  border: none;
145
- box-shadow: 0 2px 4px rgba(0,0,0,0.5);
146
- border-radius: 0.5rem;
147
- overflow: hidden;
148
  }
149
- .dataframe-container table th {
150
- background: #2c2c2c;
151
  color: #e0e0e0;
 
 
 
152
  }
153
- .dataframe-container table tr:nth-child(odd) {
154
- background-color: #1e1e1e !important;
155
- }
156
- .dataframe-container table tr:nth-child(even) {
157
- background-color: #252525 !important;
158
  }
159
- .dataframe-container table td, .dataframe-container table th {
160
- padding: 0.75rem 1rem;
161
- color: #e0e0e0;
162
- }
163
- .dataframe-container table td a {
164
  color: #8ab4f8;
165
  text-decoration: none;
166
  }
167
- .dataframe-container table td a:hover {
168
  color: #a3c9ff;
169
  text-decoration: underline;
170
  }
171
-
172
- /* Enable scrollbar */
173
- #leaderboard-table .dataframe-container {
174
- max-height: 400px !important;
175
- overflow-y: auto !important;
176
- }
177
  """
 
178
  custom_css += dark_css
179
 
180
- # --- Override Title ---
181
  TITLE = """
182
- <div class=\"hero-section\">
183
- <h1><i class=\"fas fa-trophy\"></i> MLE-Dojo Benchmark Leaderboard</h1>
184
  <h2>Improving LLM Agents for Machine Learning Engineering</h2>
185
  </div>
186
  """
187
 
188
- # --- Build App ---
189
- # Use Dark theme for Gradio
190
  demo = gr.Blocks(css=custom_css, theme=gr.themes.Base())
191
 
192
  with demo:
193
- # Inject FontAwesome JS/CSS explicitly
194
- gr.HTML("""
195
- <link rel=\"stylesheet\"
196
- href=\"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css\"
197
- crossorigin=\"anonymous\" referrerpolicy=\"no-referrer\"/>
198
- <script src=\"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/js/all.min.js\"
199
- crossorigin=\"anonymous\" referrerpolicy=\"no-referrer\"></script>
200
- """
201
  )
202
 
203
- # Header & Intro
204
  gr.HTML(TITLE)
205
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
206
 
207
- # Tabs
208
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
209
- with gr.TabItem("<i class='fas fa-list'></i> Leaderboard"):
210
- gr.Markdown("## Model Elo Rankings by Category")
 
211
  category_selector = gr.Radio(
212
  choices=CATEGORIES,
213
- label="Select Category:",
214
  value=DEFAULT_CATEGORY,
215
  interactive=True,
216
- elem_id="category-selector"
 
217
  )
218
  leaderboard_df = gr.Dataframe(
219
  value=update_leaderboard(DEFAULT_CATEGORY),
220
- headers=["Rank","Model","Organizer","License","Elo Score"],
221
- datatype=["number","html","str","str","number"],
222
  interactive=False,
223
- row_count=(len(master_df),"fixed"),
224
- col_count=(5,"fixed"),
225
  wrap=True,
226
- elem_id="leaderboard-table"
227
  )
228
  category_selector.change(
229
  fn=update_leaderboard,
230
  inputs=category_selector,
231
- outputs=leaderboard_df
232
  )
233
 
234
- with gr.TabItem("<i class='fas fa-info-circle'></i> About"):
 
235
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
236
 
237
- # Citation Accordion
238
  with gr.Accordion("📙 Citation", open=False):
239
  gr.Textbox(
240
  value=CITATION_BUTTON_TEXT,
241
  label=CITATION_BUTTON_LABEL,
242
  lines=10,
243
  elem_id="citation-button",
244
- show_copy_button=True
245
  )
246
 
247
  if __name__ == "__main__":
248
- print("Launching Gradio App in Dark Mode...")
249
- demo.launch()
 
1
+ """
2
+ app.py — MLE-Dojo Dark-Theme Leaderboard
3
+ ---------------------------------------
4
+ Run: python app.py
5
+ """
6
+
7
  import gradio as gr
8
  import pandas as pd
9
  from apscheduler.schedulers.background import BackgroundScheduler
10
 
11
+ # ---------- Placeholder / Fallback Imports ----------
12
  try:
13
  from src.about import (
14
  CITATION_BUTTON_LABEL,
 
16
  EVALUATION_QUEUE_TEXT,
17
  INTRODUCTION_TEXT,
18
  LLM_BENCHMARKS_TEXT,
19
+ TITLE, # 将被覆盖
20
  )
21
  from src.display.css_html_js import custom_css
22
  from src.envs import REPO_ID
23
  from src.submission.submit import add_new_eval
 
24
  except ImportError:
 
25
  CITATION_BUTTON_LABEL = "Citation"
26
  CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
27
  EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
 
29
  LLM_BENCHMARKS_TEXT = "Information about the benchmarks..."
30
  custom_css = ""
31
  REPO_ID = "your/space-id"
 
32
 
33
+ def add_new_eval(*args):
34
+ return "Submission placeholder."
35
+
36
+ # ---------- Elo Data ----------
37
  data = [
38
+ dict(model_name="gpt-4o-mini", url="https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/",
39
+ organizer="OpenAI", license="Proprietary", MLE_Lite_Elo=753, Tabular_Elo=839,
40
+ NLP_Elo=758, CV_Elo=754, Overall=778),
41
+ dict(model_name="gpt-4o", url="https://openai.com/index/hello-gpt-4o/",
42
+ organizer="OpenAI", license="Proprietary", MLE_Lite_Elo=830, Tabular_Elo=861,
43
+ NLP_Elo=903, CV_Elo=761, Overall=841),
44
+ dict(model_name="o3-mini", url="https://openai.com/index/openai-o3-mini/",
45
+ organizer="OpenAI", license="Proprietary", MLE_Lite_Elo=1108, Tabular_Elo=1019,
46
+ NLP_Elo=1056, CV_Elo=1207, Overall=1096),
47
+ dict(model_name="deepseek-v3", url="https://api-docs.deepseek.com/news/news1226",
48
+ organizer="DeepSeek", license="DeepSeek", MLE_Lite_Elo=1004, Tabular_Elo=1015,
49
+ NLP_Elo=1028, CV_Elo=1067, Overall=1023),
50
+ dict(model_name="deepseek-r1", url="https://api-docs.deepseek.com/news/news250120",
51
+ organizer="DeepSeek", license="DeepSeek", MLE_Lite_Elo=1137, Tabular_Elo=1053,
52
+ NLP_Elo=1103, CV_Elo=1083, Overall=1100),
53
+ dict(model_name="gemini-2.0-flash", url="https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash",
54
+ organizer="Google", license="Proprietary", MLE_Lite_Elo=847, Tabular_Elo=923,
55
+ NLP_Elo=860, CV_Elo=978, Overall=895),
56
+ dict(model_name="gemini-2.0-pro", url="https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/",
57
+ organizer="Google", license="Proprietary", MLE_Lite_Elo=1064, Tabular_Elo=1139,
58
+ NLP_Elo=1028, CV_Elo=973, Overall=1054),
59
+ dict(model_name="gemini-2.5-pro", url="https://deepmind.google/technologies/gemini/pro/",
60
+ organizer="Google", license="Proprietary", MLE_Lite_Elo=1257, Tabular_Elo=1150,
61
+ NLP_Elo=1266, CV_Elo=1177, Overall=1214),
62
  ]
63
  master_df = pd.DataFrame(data)
64
 
65
+ # ---------- Category Logic ----------
66
  CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
67
  DEFAULT_CATEGORY = "Overall"
68
  category_to_column = {
69
  "Overall": "Overall",
70
+ "MLE-Lite": "MLE_Lite_Elo",
71
  "Tabular": "Tabular_Elo",
72
  "NLP": "NLP_Elo",
73
  "CV": "CV_Elo",
74
  }
75
 
76
+
77
+ def update_leaderboard(category: str) -> pd.DataFrame:
78
  col = category_to_column.get(category, category_to_column[DEFAULT_CATEGORY])
79
+ df = master_df[["model_name", "url", "organizer", "license", col]].copy()
80
  df.sort_values(by=col, ascending=False, inplace=True)
81
  df.reset_index(drop=True, inplace=True)
82
+ df.insert(0, "Rank", df.index + 1)
83
+ df["Model"] = df.apply(
84
+ lambda r: f"<a href='{r['url']}' target='_blank'>{r['model_name']}</a>", axis=1
85
+ )
86
+ df.rename(
87
+ columns={col: "Elo Score", "organizer": "Organizer", "license": "License"},
88
+ inplace=True,
89
  )
90
+ return df[["Rank", "Model", "Organizer", "License", "Elo Score"]]
 
91
 
 
 
 
 
 
92
 
93
+ # ---------- Dark-Theme CSS ----------
94
  dark_css = """
95
+ /* ---- Google Font & Font Awesome ---- */
96
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
 
 
97
  body {
98
  font-family: 'Inter', sans-serif;
99
  background-color: #121212;
100
  color: #e0e0e0;
101
+ font-size: 15px;
102
  }
103
 
104
+ /* ---- Hero Section ---- */
105
  .hero-section {
106
  background: linear-gradient(135deg, #333, #222);
107
  color: #e0e0e0;
108
+ padding: 1.75rem 1rem;
109
  border-radius: .75rem;
110
  margin-bottom: 1.5rem;
111
  text-align: center;
112
+ box-shadow: 0 4px 12px rgba(0,0,0,0.55);
113
  }
114
  .hero-section h1 {
115
  margin: 0;
116
+ font-size: 2.2rem;
117
+ font-weight: 700;
118
+ display: inline-flex;
119
+ align-items: center;
120
+ gap: .5rem;
121
+ }
122
+ .hero-section h1 i { /* 奖杯与文字同行 & 对齐 */
123
+ margin: 0;
124
+ font-size: 1em;
125
  }
126
  .hero-section h2 {
127
+ margin: .6rem 0 0;
128
+ font-size: 1.15rem;
129
+ font-weight: 400;
130
+ opacity: .8;
131
  }
132
 
133
+ /* ---- Tabs ---- */
134
  .tab-buttons button {
135
  border-radius: 20px !important;
136
+ padding: .55rem 1.15rem !important;
137
+ margin-right: .6rem !important;
138
  background: #1e1e1e !important;
139
  color: #e0e0e0 !important;
140
  border: none !important;
141
+ font-size: .95rem !important;
142
  font-weight: 500 !important;
143
+ transition: background .28s;
144
  }
145
+ .tab-buttons button:hover { background: #2c2c2c !important; }
 
 
146
  .tab-buttons button[aria-selected="true"] {
147
  background: #444 !important;
148
  color: #fff !important;
149
  }
150
 
151
+ /* ---- Category Selector ---- */
 
152
  #category-selector label {
153
  display: inline-block;
154
+ padding: .55rem 1.2rem;
155
+ margin-right: .5rem;
156
  border-radius: 999px;
157
+ background: #1d1d1d;
158
  cursor: pointer;
159
+ transition: background .28s, color .28s;
160
+ font-weight: 600;
161
+ font-size: .95rem;
162
  color: #e0e0e0;
163
  }
164
  #category-selector input[type="radio"]:checked + label {
165
+ background: #3d3d3d;
166
  color: #fff;
167
  }
168
 
169
+ /* ---- Dataframe / Leaderboard ---- */
170
+ .dataframe-container {
171
+ max-height: 420px;
172
+ overflow-y: auto;
173
+ }
174
  .dataframe-container table {
175
  width: 100%;
176
+ border-collapse: collapse;
177
  border: none;
178
+ box-shadow: 0 2px 6px rgba(0,0,0,.55);
179
+ border-radius: .55rem;
 
180
  }
181
+ .dataframe-container thead th {
182
+ background: #272727;
183
  color: #e0e0e0;
184
+ font-weight: 600;
185
+ padding: .85rem 1rem;
186
+ font-size: .9rem;
187
  }
188
+ .dataframe-container tbody tr:nth-child(odd) { background: #1c1c1c; }
189
+ .dataframe-container tbody tr:nth-child(even) { background: #222; }
190
+ .dataframe-container td, .dataframe-container th {
191
+ padding: .8rem 1rem;
192
+ font-size: .88rem;
193
  }
194
+ .dataframe-container td a {
 
 
 
 
195
  color: #8ab4f8;
196
  text-decoration: none;
197
  }
198
+ .dataframe-container td a:hover {
199
  color: #a3c9ff;
200
  text-decoration: underline;
201
  }
 
 
 
 
 
 
202
  """
203
+
204
  custom_css += dark_css
205
 
206
+ # ---------- Override Title ----------
207
  TITLE = """
208
+ <div class="hero-section">
209
+ <h1><i class="fas fa-trophy"></i>MLE-Dojo Benchmark Leaderboard</h1>
210
  <h2>Improving LLM Agents for Machine Learning Engineering</h2>
211
  </div>
212
  """
213
 
214
+ # ---------- Build Gradio App ----------
 
215
  demo = gr.Blocks(css=custom_css, theme=gr.themes.Base())
216
 
217
  with demo:
218
+ # 注入 Font Awesome(保证奖杯可用)
219
+ gr.HTML(
220
+ """
221
+ <link rel="stylesheet"
222
+ href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"
223
+ crossorigin="anonymous" referrerpolicy="no-referrer"/>
224
+ """
 
225
  )
226
 
227
+ # -------- Header & Intro --------
228
  gr.HTML(TITLE)
229
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
230
 
231
+ # -------- Tabs --------
232
+ with gr.Tabs(elem_classes="tab-buttons"):
233
+ # --- Leaderboard Tab ---
234
+ with gr.TabItem("📊 Leaderboard"):
235
+ gr.Markdown("### Model Elo Rankings by Category")
236
  category_selector = gr.Radio(
237
  choices=CATEGORIES,
 
238
  value=DEFAULT_CATEGORY,
239
  interactive=True,
240
+ elem_id="category-selector",
241
+ label="Select Category:",
242
  )
243
  leaderboard_df = gr.Dataframe(
244
  value=update_leaderboard(DEFAULT_CATEGORY),
245
+ headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
246
+ datatype=["number", "html", "str", "str", "number"],
247
  interactive=False,
248
+ row_count=(len(master_df), "fixed"),
249
+ col_count=(5, "fixed"),
250
  wrap=True,
251
+ elem_id="leaderboard-table",
252
  )
253
  category_selector.change(
254
  fn=update_leaderboard,
255
  inputs=category_selector,
256
+ outputs=leaderboard_df,
257
  )
258
 
259
+ # --- About Tab ---
260
+ with gr.TabItem("ℹ️ About"):
261
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
262
 
263
+ # -------- Citation --------
264
  with gr.Accordion("📙 Citation", open=False):
265
  gr.Textbox(
266
  value=CITATION_BUTTON_TEXT,
267
  label=CITATION_BUTTON_LABEL,
268
  lines=10,
269
  elem_id="citation-button",
270
+ show_copy_button=True,
271
  )
272
 
273
  if __name__ == "__main__":
274
+ print("Launching Gradio App in Dark Mode")
275
+ demo.launch()