Jerrycool commited on
Commit
7153753
·
verified ·
1 Parent(s): 00fb337

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -38
app.py CHANGED
@@ -10,12 +10,14 @@ try:
10
  EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
- TITLE, # Will override below
14
  )
15
  from src.display.css_html_js import custom_css
16
  from src.envs import REPO_ID
17
  from src.submission.submit import add_new_eval
 
18
  except ImportError:
 
19
  CITATION_BUTTON_LABEL = "Citation"
20
  CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
21
  EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
@@ -27,14 +29,14 @@ except ImportError:
27
 
28
  # --- Elo Data ---
29
  data = [
30
- {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
31
- {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
32
- {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
33
- {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
34
- {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
35
  {'model_name': 'gemini-2.0-flash','url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
36
- {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
37
- {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
38
  ]
39
  master_df = pd.DataFrame(data)
40
 
@@ -61,14 +63,22 @@ def update_leaderboard(category):
61
  df.rename(columns={col:'Elo Score','organizer':'Organizer','license':'License'}, inplace=True)
62
  return df[['Rank','Model','Organizer','License','Elo Score']]
63
 
64
- # --- Dark Theme + Custom CSS ---
65
- custom_css += """
 
 
 
 
 
 
 
66
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
 
67
 
68
  body {
69
  font-family: 'Inter', sans-serif;
70
- background-color: #1e1e2f !important;
71
- color: #e0e0f0 !important;
72
  }
73
 
74
  /* Hero Section */
@@ -79,7 +89,7 @@ body {
79
  border-radius: .75rem;
80
  margin-bottom: 1.5rem;
81
  text-align: center;
82
- box-shadow: 0 4px 10px rgba(0,0,0,0.3);
83
  }
84
  .hero-section h1 {
85
  margin: 0;
@@ -98,14 +108,14 @@ body {
98
  border-radius: 20px !important;
99
  padding: 0.5rem 1rem !important;
100
  margin-right: 0.5rem !important;
101
- background: #3a3a4c !important;
102
- color: #e0e0f0 !important;
103
  border: none !important;
104
  transition: background 0.3s !important;
105
  font-weight: 500 !important;
106
  }
107
  .tab-buttons button:hover {
108
- background: #4a4a6f !important;
109
  }
110
  .tab-buttons button[aria-selected="true"] {
111
  background: #6c63ff !important;
@@ -119,8 +129,7 @@ body {
119
  padding: 0.5rem 1rem;
120
  margin-right: 0.5rem;
121
  border-radius: 999px;
122
- background: #3a3a4c;
123
- color: #e0e0f0;
124
  cursor: pointer;
125
  transition: background 0.3s, color 0.3s;
126
  font-weight: 500;
@@ -134,55 +143,52 @@ body {
134
  table {
135
  width: 100%;
136
  border: none;
137
- border-radius: .5rem;
 
138
  overflow: hidden;
139
- box-shadow: 0 2px 4px rgba(0,0,0,0.3);
140
  margin: 1rem 0;
141
  }
142
  th {
143
- background: #6c63ff !important;
144
- color: #fff !important;
145
  }
146
  td, th {
147
  padding: 0.75rem 1rem;
148
- background: #1e1e2f;
149
- color: #e0e0f0;
150
- }
151
- tr:nth-child(even) td {
152
- background: #2a2a3c;
153
- }
154
- tr:hover td {
155
- background: #3c3b52;
156
  }
 
 
157
  td a {
158
- color: #9afeff;
159
  text-decoration: none;
160
  }
161
  td a:hover {
 
162
  text-decoration: underline;
163
  }
164
  """
 
165
 
166
- # --- Override Title with Hero ---
167
  TITLE = """
168
  <div class="hero-section">
169
- <h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>
170
  <h2>Improving LLM Agents for Machine Learning Engineering</h2>
171
  </div>
172
  """
173
 
174
- # --- Build App with valid Dark theme ---
175
- # demo = gr.Blocks(css=custom_css, theme=gr.themes.Dark())
176
 
177
  with demo:
178
  gr.HTML(TITLE)
179
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
180
 
181
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
182
- with gr.TabItem("📋 Leaderboard"):
183
  gr.Markdown("## Model Elo Rankings by Category")
184
  category_selector = gr.Radio(
185
  choices=CATEGORIES,
 
186
  value=DEFAULT_CATEGORY,
187
  interactive=True,
188
  elem_id="category-selector"
@@ -203,7 +209,7 @@ with demo:
203
  outputs=leaderboard_df
204
  )
205
 
206
- with gr.TabItem("ℹ️ About"):
207
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
208
 
209
  with gr.Accordion("📙 Citation", open=False):
@@ -217,4 +223,7 @@ with demo:
217
 
218
  if __name__ == "__main__":
219
  print("Launching Gradio App...")
220
- demo.launch()
 
 
 
 
10
  EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
+ TITLE, # We will override TITLE below for styling
14
  )
15
  from src.display.css_html_js import custom_css
16
  from src.envs import REPO_ID
17
  from src.submission.submit import add_new_eval
18
+ print("Successfully imported from src module.")
19
  except ImportError:
20
+ print("Warning: Using placeholder values because src module imports failed.")
21
  CITATION_BUTTON_LABEL = "Citation"
22
  CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark..."
23
  EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
 
29
 
30
  # --- Elo Data ---
31
  data = [
32
+ {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
33
+ {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
34
+ {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
35
+ {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
36
+ {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek','license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
37
  {'model_name': 'gemini-2.0-flash','url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
38
+ {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
39
+ {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
40
  ]
41
  master_df = pd.DataFrame(data)
42
 
 
63
  df.rename(columns={col:'Elo Score','organizer':'Organizer','license':'License'}, inplace=True)
64
  return df[['Rank','Model','Organizer','License','Elo Score']]
65
 
66
+ # --- Advanced Styling ---
67
+ # Base font-size bump (if any)
68
+ font_size_css = """
69
+ body { font-size: 1em !important; }
70
+ """
71
+ custom_css += font_size_css
72
+
73
+ # Import fonts & icons + component styling
74
+ advanced_css = """
75
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
76
+ @import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css');
77
 
78
  body {
79
  font-family: 'Inter', sans-serif;
80
+ background-color: #f8f9fa;
81
+ color: #333;
82
  }
83
 
84
  /* Hero Section */
 
89
  border-radius: .75rem;
90
  margin-bottom: 1.5rem;
91
  text-align: center;
92
+ box-shadow: 0 4px 10px rgba(0,0,0,0.1);
93
  }
94
  .hero-section h1 {
95
  margin: 0;
 
108
  border-radius: 20px !important;
109
  padding: 0.5rem 1rem !important;
110
  margin-right: 0.5rem !important;
111
+ background: #e0e0e0 !important;
112
+ color: #333 !important;
113
  border: none !important;
114
  transition: background 0.3s !important;
115
  font-weight: 500 !important;
116
  }
117
  .tab-buttons button:hover {
118
+ background: #d0d0d0 !important;
119
  }
120
  .tab-buttons button[aria-selected="true"] {
121
  background: #6c63ff !important;
 
129
  padding: 0.5rem 1rem;
130
  margin-right: 0.5rem;
131
  border-radius: 999px;
132
+ background: #e0e0e0;
 
133
  cursor: pointer;
134
  transition: background 0.3s, color 0.3s;
135
  font-weight: 500;
 
143
  table {
144
  width: 100%;
145
  border: none;
146
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
147
+ border-radius: 0.5rem;
148
  overflow: hidden;
 
149
  margin: 1rem 0;
150
  }
151
  th {
152
+ background: #6c63ff;
153
+ color: #fff;
154
  }
155
  td, th {
156
  padding: 0.75rem 1rem;
 
 
 
 
 
 
 
 
157
  }
158
+ tr:nth-child(even) { background: #f7f5ff; }
159
+ tr:hover { background: #edeaff; }
160
  td a {
161
+ color: #6c63ff;
162
  text-decoration: none;
163
  }
164
  td a:hover {
165
+ color: #534bbe;
166
  text-decoration: underline;
167
  }
168
  """
169
+ custom_css += advanced_css
170
 
171
+ # --- Override Title ---
172
  TITLE = """
173
  <div class="hero-section">
174
+ <h1><i class="fas fa-trophy"></i> MLE-Dojo Benchmark Leaderboard</h1>
175
  <h2>Improving LLM Agents for Machine Learning Engineering</h2>
176
  </div>
177
  """
178
 
179
+ # --- Build App ---
180
+ demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
181
 
182
  with demo:
183
  gr.HTML(TITLE)
184
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
185
 
186
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
187
+ with gr.TabItem("<i class='fas fa-list'></i> Leaderboard"):
188
  gr.Markdown("## Model Elo Rankings by Category")
189
  category_selector = gr.Radio(
190
  choices=CATEGORIES,
191
+ label="Select Category:",
192
  value=DEFAULT_CATEGORY,
193
  interactive=True,
194
  elem_id="category-selector"
 
209
  outputs=leaderboard_df
210
  )
211
 
212
+ with gr.TabItem("<i class='fas fa-info-circle'></i> About"):
213
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
214
 
215
  with gr.Accordion("📙 Citation", open=False):
 
223
 
224
  if __name__ == "__main__":
225
  print("Launching Gradio App...")
226
+ try:
227
+ demo.launch()
228
+ except Exception as e:
229
+ print(f"Error launching app: {e}")