Jerrycool commited on
Commit
c13e962
·
verified ·
1 Parent(s): 3dd92ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -141
app.py CHANGED
@@ -3,55 +3,49 @@ import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
 
5
  """
6
- MLE‑Dojo Benchmark Leaderboard — Polished Edition
7
- -------------------------------------------------
8
- This version focuses on premium typography, elegant color palette, and richer
9
- UI controls (including ascending/descending sort) while remaining completely
10
- self‑contained.
11
-
12
- *️⃣ HOW TO USE
13
- -------------------------------------------------
14
- 1. Install deps → `pip install gradio pandas apscheduler`
15
- 2. Launch → `python mle_dojo_leaderboard_app.py`
16
- 3. Tailor any of the placeholder values (TITLE, INTRODUCTION_TEXT, etc.) to your
17
- project or import them from your own `src` package — the try/except block at
18
- the top handles either workflow gracefully.
19
  """
20
 
21
  # ---------------------------------------------------------------------------
22
- # Placeholder fall‑back imports (remove once your own src/ is in PYTHONPATH)
23
  # ---------------------------------------------------------------------------
24
  try:
25
  from src.about import (
26
  CITATION_BUTTON_LABEL,
27
  CITATION_BUTTON_TEXT,
28
- EVALUATION_QUEUE_TEXT,
29
  INTRODUCTION_TEXT,
30
  LLM_BENCHMARKS_TEXT,
31
  TITLE,
32
  )
33
- from src.display.css_html_js import custom_css # optional
34
  from src.envs import REPO_ID
35
  from src.submission.submit import add_new_eval
36
- print("✅ Imported UI copy & helpers from src package.")
37
  except ImportError:
38
- print("⚠️ Falling back to local placeholders — customise as needed.")
39
  CITATION_BUTTON_LABEL = "Citation"
40
  CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark…"
41
- EVALUATION_QUEUE_TEXT = "Current evaluation queue:"
42
- INTRODUCTION_TEXT = "Welcome to the **MLE‑Dojo Benchmark Leaderboard** compare LLM agents across real‑world ML engineering tasks."
43
- LLM_BENCHMARKS_TEXT = "Further details about tasks, metrics, and evaluation pipelines."
44
  TITLE = (
45
- "<h1 class='hero-title gradient-text'>\U0001F3C6 MLE‑Dojo Benchmark Leaderboard</h1>"
46
- "<p class='subtitle'>Interactive, reproducible &amp; community‑driven ML agent benchmarking</p>"
47
  )
48
- custom_css = "" # will be extended below
49
  REPO_ID = "your/space-id"
50
  def add_new_eval(*_):
51
  return "Submission placeholder."
52
 
53
  # ---------------------------------------------------------------------------
54
- # Data — extend / refresh as new checkpoints are evaluated
55
  # ---------------------------------------------------------------------------
56
  data = [
57
  {"model_name": "gpt-4o-mini", "url": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 753, "Tabular_Elo": 839, "NLP_Elo": 758, "CV_Elo": 754, "Overall": 778},
@@ -66,7 +60,7 @@ data = [
66
  master_df = pd.DataFrame(data)
67
 
68
  # ---------------------------------------------------------------------------
69
- # Category helpers
70
  # ---------------------------------------------------------------------------
71
  CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
72
  DEFAULT_CATEGORY = "Overall"
@@ -78,130 +72,75 @@ CATEGORY_MAP = {
78
  "CV": "CV_Elo",
79
  }
80
 
81
- # ---------------------------------------------------------------------------
82
- # Leaderboard Update Routine
83
- # ---------------------------------------------------------------------------
84
-
85
  def update_leaderboard(category: str, ascending: bool):
86
- """Return a fresh, nicely formatted DataFrame based on user selections."""
87
- score_col = CATEGORY_MAP.get(category, CATEGORY_MAP[DEFAULT_CATEGORY])
88
-
89
  df = (
90
- master_df[["model_name", "url", "organizer", "license", score_col]].copy()
91
- .sort_values(by=score_col, ascending=ascending)
92
  .reset_index(drop=True)
93
  )
94
-
95
- # Add Rank & hyperlink the model name
96
  df.insert(0, "Rank", df.index + 1)
97
- df["Model"] = (
98
- df.apply(lambda r: f"<a href='{r.url}' target='_blank'>{r.model_name}</a>", axis=1)
99
- )
100
-
101
- df.rename(columns={
102
- "organizer": "Organizer",
103
- "license": "License",
104
- score_col: "Elo Score",
105
- }, inplace=True)
106
-
107
  return df[["Rank", "Model", "Organizer", "License", "Elo Score"]]
108
 
109
  # ---------------------------------------------------------------------------
110
- # Custom CSS premium typography & subtle surfaces
111
  # ---------------------------------------------------------------------------
112
  custom_css += """
 
113
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
114
-
115
- html, body {
116
- font-family: 'Inter', 'Helvetica Neue', Arial, sans-serif !important;
117
- font-size: 17px !important; /* slightly larger default */
118
- color: #1f2937;
119
- background-color: #f9fafb;
120
  line-height: 1.55;
 
 
121
  }
122
 
123
- /* Gradient text utility */
124
  .gradient-text {
125
- background: linear-gradient(90deg, #0284c7 0%, #6366f1 100%);
126
- -webkit-background-clip: text;
127
- -webkit-text-fill-color: transparent;
128
  }
129
 
130
- /* Markdown tweaks */
131
- .markdown-text h2 {
132
- font-weight: 600;
133
- margin-top: 1.2em;
134
- }
135
 
136
- /* Radio buttons & checkboxes */
137
- .gr-radio, .gr-checkbox {
138
- padding: 0.35em 0.75em;
139
- border-radius: 0.5rem;
140
- background-color: #ffffff;
141
- box-shadow: 0 1px 2px rgba(0,0,0,0.06);
142
- }
143
 
144
- /* Data table */
145
- #leaderboard-table table {
146
- width: 100%;
147
- border-collapse: collapse;
148
- }
149
- #leaderboard-table th {
150
- background-color: #e2e8f0;
151
- font-weight: 600;
152
- text-transform: uppercase;
153
- font-size: 0.85rem;
154
- letter-spacing: 0.03em;
155
- padding: 0.6em;
156
- }
157
- #leaderboard-table td {
158
- padding: 0.55em 0.6em;
159
- vertical-align: top;
160
- }
161
- #leaderboard-table tr:nth-child(even) { background-color: #f8fafc; }
162
- #leaderboard-table tr:hover { background-color: #eef2ff; }
163
 
164
  /* Links */
165
- a { color: #2563eb; text-decoration: none; }
166
- a:hover { text-decoration: underline; }
167
 
168
- /* Accordion style tweak */
169
- .gr-accordion .label {
170
- font-weight: 600;
171
- font-size: 1rem;
172
- }
173
  """
174
 
175
  # ---------------------------------------------------------------------------
176
- # Gradio App
177
  # ---------------------------------------------------------------------------
 
178
 
179
- demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft(
180
- primary_hue="indigo",
181
- neutral_hue="slate",
182
- font=["Inter", "Helvetica Neue", "Arial", "sans-serif"],
183
- ))
184
-
185
- with demo:
186
  gr.HTML(TITLE)
187
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
188
 
189
  with gr.Tabs():
190
- # ---------- Leaderboard Tab ----------
191
- with gr.TabItem("🏅 Leaderboard"):
192
- gr.Markdown("### Model Elo Rankings by Category")
193
  with gr.Row():
194
- category_selector = gr.Radio(
195
- choices=CATEGORIES,
196
- value=DEFAULT_CATEGORY,
197
- label="Category",
198
- interactive=True,
199
- )
200
- order_checkbox = gr.Checkbox(
201
- label="⬆️ Ascending order (lower Elo first)",
202
- value=False,
203
- )
204
- leaderboard_table = gr.Dataframe(
205
  value=update_leaderboard(DEFAULT_CATEGORY, False),
206
  headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
207
  datatype=["number", "html", "str", "str", "number"],
@@ -210,42 +149,26 @@ with demo:
210
  interactive=False,
211
  elem_id="leaderboard-table",
212
  )
213
- # wire‑up events
214
- category_selector.change(update_leaderboard, [category_selector, order_checkbox], leaderboard_table)
215
- order_checkbox.change(update_leaderboard, [category_selector, order_checkbox], leaderboard_table)
216
 
217
- # ---------- About Tab ----------
218
  with gr.TabItem("ℹ️ About"):
219
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
220
 
221
- # ---------- (Optional) Submit Tab ----------
222
- # You can re‑enable this section when your `add_new_eval()` & REPO_ID are ready.
223
- # with gr.TabItem("🚀 Submit"):
224
- # pass
225
-
226
- # ---------- Citation Accordion ----------
227
  with gr.Accordion("📖 Citation", open=False):
228
- gr.Textbox(
229
- value=CITATION_BUTTON_TEXT,
230
- label=CITATION_BUTTON_LABEL,
231
- lines=10,
232
- show_copy_button=True,
233
- )
234
 
235
  # ---------------------------------------------------------------------------
236
- # Scheduler (optional) — restart the HF Space every 30 min to free memory
237
  # ---------------------------------------------------------------------------
238
 
239
  def restart_space():
240
- print(f"🔄 Restarting Space  {REPO_ID}")
241
- # Example: `HfApi().restart_space(repo_id=REPO_ID)`
242
 
243
  if __name__ == "__main__":
244
  if REPO_ID != "your/space-id":
245
  scheduler = BackgroundScheduler()
246
  scheduler.add_job(restart_space, "interval", seconds=1800)
247
  scheduler.start()
248
- print("🗓️ Background scheduler active (30 min restart).")
249
-
250
- print("🚀 Launching Gradio app…")
251
- demo.launch()
 
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
 
5
  """
6
+ MLE‑Dojo Benchmark Leaderboard — Dark Elegance Edition
7
+ =====================================================
8
+ A refined, low‑glare UI with larger table text, richer surface layering, and a
9
+ subtle neon accent that pops on dark slate backgrounds.
10
+
11
+ HOW TO
12
+ ------
13
+ * `pip install gradio pandas apscheduler`
14
+ * `python mle_dojo_leaderboard_app.py`
15
+ * Replace placeholder copy (TITLE …) with your own or keep the try/except.
 
 
 
16
  """
17
 
18
  # ---------------------------------------------------------------------------
19
+ # Import app copy (falls back to placeholders if src/ is absent)
20
  # ---------------------------------------------------------------------------
21
  try:
22
  from src.about import (
23
  CITATION_BUTTON_LABEL,
24
  CITATION_BUTTON_TEXT,
 
25
  INTRODUCTION_TEXT,
26
  LLM_BENCHMARKS_TEXT,
27
  TITLE,
28
  )
29
+ from src.display.css_html_js import custom_css # optional extra rules
30
  from src.envs import REPO_ID
31
  from src.submission.submit import add_new_eval
 
32
  except ImportError:
33
+ # ── Placeholders ───────────────────────────────────────────────────────────
34
  CITATION_BUTTON_LABEL = "Citation"
35
  CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark…"
36
+ INTRODUCTION_TEXT = "Welcome to the **MLE‑Dojo Benchmark Leaderboard** — compare LLM agents across realistic ML engineering tasks."
37
+ LLM_BENCHMARKS_TEXT = "Further details about tasks, metrics and evaluation pipelines."
 
38
  TITLE = (
39
+ "<h1 class='hero-title gradient-text'>🏆 MLE‑Dojo Benchmark Leaderboard</h1>"
40
+ "<p class='subtitle'>Interactive, reproducible &amp; community‑driven MLagent benchmarking</p>"
41
  )
42
+ custom_css = ""
43
  REPO_ID = "your/space-id"
44
  def add_new_eval(*_):
45
  return "Submission placeholder."
46
 
47
  # ---------------------------------------------------------------------------
48
+ # Data
49
  # ---------------------------------------------------------------------------
50
  data = [
51
  {"model_name": "gpt-4o-mini", "url": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 753, "Tabular_Elo": 839, "NLP_Elo": 758, "CV_Elo": 754, "Overall": 778},
 
60
  master_df = pd.DataFrame(data)
61
 
62
  # ---------------------------------------------------------------------------
63
+ # Helpers
64
  # ---------------------------------------------------------------------------
65
  CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
66
  DEFAULT_CATEGORY = "Overall"
 
72
  "CV": "CV_Elo",
73
  }
74
 
 
 
 
 
75
  def update_leaderboard(category: str, ascending: bool):
76
+ col = CATEGORY_MAP.get(category, CATEGORY_MAP[DEFAULT_CATEGORY])
 
 
77
  df = (
78
+ master_df[["model_name", "url", "organizer", "license", col]]
79
+ .sort_values(by=col, ascending=ascending)
80
  .reset_index(drop=True)
81
  )
 
 
82
  df.insert(0, "Rank", df.index + 1)
83
+ df["Model"] = df.apply(lambda r: f"<a href='{r.url}' target='_blank'>{r.model_name}</a>", axis=1)
84
+ df.rename(columns={"organizer": "Organizer", "license": "License", col: "Elo Score"}, inplace=True)
 
 
 
 
 
 
 
 
85
  return df[["Rank", "Model", "Organizer", "License", "Elo Score"]]
86
 
87
  # ---------------------------------------------------------------------------
88
+ # Dark‑mode CSS & Larger Table Fonts
89
  # ---------------------------------------------------------------------------
90
  custom_css += """
91
+ /* ————— Core Typography ————— */
92
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
93
+ html,body {
94
+ font-family: 'Inter', sans-serif !important;
95
+ font-size: 18px !important;
 
 
 
96
  line-height: 1.55;
97
+ color: #e2e8f0;
98
+ background:#0f172a;
99
  }
100
 
101
+ /* Accent gradient for titles */
102
  .gradient-text {
103
+ background:linear-gradient(90deg,#6366f1 0%,#06b6d4 100%);
104
+ -webkit-background-clip:text; -webkit-text-fill-color:transparent;
 
105
  }
106
 
107
+ /* Markdown headings */
108
+ .markdown-text h2{font-weight:600;margin-top:1.3em;color:#f1f5f9;}
 
 
 
109
 
110
+ /* Radio & checkbox containers */
111
+ .gr-radio, .gr-checkbox{background:#1e293b;border-radius:8px;padding:6px 12px;box-shadow:0 1px 3px rgba(0,0,0,.4);}
112
+ .gr-radio input:checked+label, .gr-checkbox input:checked+label{color:#38bdf8;}
 
 
 
 
113
 
114
+ /* Table Styling */
115
+ #leaderboard-table table{width:100%;border-collapse:collapse;background:#1e293b;border-radius:8px;overflow:hidden;}
116
+ #leaderboard-table th{background:#334155;font-size:0.9rem;font-weight:600;padding:0.7em;color:#f1f5f9;text-transform:uppercase;letter-spacing:.04em;}
117
+ #leaderboard-table td{padding:0.6em;font-size:1.05rem;border-top:1px solid #334155;}
118
+ #leaderboard-table tr:nth-child(even){background:#1c2431;}
119
+ #leaderboard-table tr:hover{background:#475569;}
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  /* Links */
122
+ a{color:#38bdf8;} a:hover{text-decoration:underline;}
 
123
 
124
+ /* Accordion */
125
+ .gr-accordion .label{font-weight:600;font-size:1rem;color:#f1f5f9;}
 
 
 
126
  """
127
 
128
  # ---------------------------------------------------------------------------
129
+ # Gradio App
130
  # ---------------------------------------------------------------------------
131
+ app = gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="sky", neutral_hue="slate", font=["Inter",]))
132
 
133
+ with app:
 
 
 
 
 
 
134
  gr.HTML(TITLE)
135
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
136
 
137
  with gr.Tabs():
138
+ with gr.TabItem("🏅 Leaderboard"):
139
+ gr.Markdown("### Model Elo Rankings by Category", elem_classes="markdown-text")
 
140
  with gr.Row():
141
+ category_radio = gr.Radio(CATEGORIES, value=DEFAULT_CATEGORY, label="Category")
142
+ asc_check = gr.Checkbox(label="⬆️ Ascending order", value=False)
143
+ board = gr.Dataframe(
 
 
 
 
 
 
 
 
144
  value=update_leaderboard(DEFAULT_CATEGORY, False),
145
  headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
146
  datatype=["number", "html", "str", "str", "number"],
 
149
  interactive=False,
150
  elem_id="leaderboard-table",
151
  )
152
+ category_radio.change(update_leaderboard, [category_radio, asc_check], board)
153
+ asc_check.change(update_leaderboard, [category_radio, asc_check], board)
 
154
 
 
155
  with gr.TabItem("ℹ️ About"):
156
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
157
 
 
 
 
 
 
 
158
  with gr.Accordion("📖 Citation", open=False):
159
+ gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=10, show_copy_button=True)
 
 
 
 
 
160
 
161
  # ---------------------------------------------------------------------------
162
+ # Optional scheduler (HF Space restarts)
163
  # ---------------------------------------------------------------------------
164
 
165
  def restart_space():
166
+ print(f"Restarting space {REPO_ID}")
 
167
 
168
  if __name__ == "__main__":
169
  if REPO_ID != "your/space-id":
170
  scheduler = BackgroundScheduler()
171
  scheduler.add_job(restart_space, "interval", seconds=1800)
172
  scheduler.start()
173
+ print("Launching app…")
174
+ app.launch()