galb-dai commited on
Commit
06f2a09
·
1 Parent(s): d7db717
Files changed (2) hide show
  1. app.py +99 -29
  2. src/display/css_html_js.py +23 -0
app.py CHANGED
@@ -2,7 +2,7 @@
2
 
3
  import gradio as gr
4
  import pandas as pd
5
- import plotly.graph_objects as go # NEW: for interactive chart
6
  from apscheduler.schedulers.background import BackgroundScheduler
7
  from gradio_leaderboard import Leaderboard, SelectColumns
8
  from huggingface_hub import whoami
@@ -233,42 +233,96 @@ STATIC_RESULTS = {
233
  },
234
  }
235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  def build_accuracy_figure(tier: str):
238
- """Create interactive bar chart with hover showing 'solved / total'."""
239
- results = STATIC_RESULTS.get(tier, {})
240
  total = TIER_TOTALS[tier]
241
- x = MODELS_ORDER
242
- y = [results[m] for m in x]
243
- hover = [f"{m}<br><b>{v}/{total}</b> problems solved" for m, v in zip(x, y)]
244
-
245
- fig = go.Figure(
246
- data=[
247
- go.Bar(
248
- x=x,
249
- y=y,
250
- text=[f"{v}/{total}" for v in y],
251
- textposition="auto",
252
- hovertext=hover,
253
- hoverinfo="text",
254
- marker_line_width=0.5,
 
 
 
 
 
 
255
  )
256
- ]
257
- )
 
 
 
 
258
  fig.update_layout(
259
  template="plotly_white",
260
- margin=dict(l=30, r=20, t=10, b=40),
261
- yaxis=dict(title="# Problems Solved", range=[0, total], dtick=max(5, total // 10)),
262
- xaxis=dict(title=None),
263
  height=420,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  )
265
  return fig
266
 
267
 
268
- # Precompute initial figure (Warmup)
269
  _initial_accuracy_fig = build_accuracy_figure("Warmup")
270
 
271
-
272
  # Force light theme even if HF user prefers dark
273
  blocks = gr.Blocks(
274
  css=custom_css,
@@ -278,14 +332,13 @@ blocks = gr.Blocks(
278
  with blocks:
279
 
280
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
281
- # === NEW LANDING TAB (first) ===
282
- with gr.TabItem("Model Accuracy on FormulaOne", id=0, elem_id="landing-accuracy-tab"):
283
  gr.Markdown(
284
  "The chart below summarizes static (non-live) results for model performance on FormulaOne.",
285
  elem_classes="markdown-text",
286
  )
287
 
288
- # Selector aligned to the top-right (see CSS)
289
  with gr.Row(elem_id="f1-tier-select-row"):
290
  tier_selector = gr.Radio(
291
  choices=list(TIER_TOTALS.keys()),
@@ -295,15 +348,32 @@ with blocks:
295
  elem_id="f1-tier-select",
296
  )
297
 
298
- accuracy_plot = gr.Plot(value=_initial_accuracy_fig)
299
 
300
- # Wire selector → plot
301
  tier_selector.change(
302
  lambda t: build_accuracy_figure(t),
303
  inputs=tier_selector,
304
  outputs=accuracy_plot,
305
  )
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  # Existing "What is FormulaOne" tab
308
  with gr.TabItem("What is FormulaOne", id=1, elem_id="what-is-tab"):
309
 
 
2
 
3
  import gradio as gr
4
  import pandas as pd
5
+ import plotly.graph_objects as go
6
  from apscheduler.schedulers.background import BackgroundScheduler
7
  from gradio_leaderboard import Leaderboard, SelectColumns
8
  from huggingface_hub import whoami
 
233
  },
234
  }
235
 
236
+ MODEL_RELEASES = {
237
+ "GPT-5": "2025-08-07",
238
+ "Gemini 2.5 Pro": "2025-03-25",
239
+ "Grok 4": "2025-07-09",
240
+ "Claude Opus 4": "2025-05-22",
241
+ "o3 Pro": "2025-06-10",
242
+ }
243
+
244
+ TIER_TOTALS = {"Warmup": 100, "Tier 1": 100, "Tier 2": 20}
245
+ MODELS_ORDER = ["GPT-5", "Gemini 2.5 Pro", "Grok 4", "Claude Opus 4", "o3 Pro"]
246
+
247
+ ACCURACY_PCT = {
248
+ "Warmup": {
249
+ "GPT-5": 38,
250
+ "Gemini 2.5 Pro": 35,
251
+ "Grok 4": 28,
252
+ "Claude Opus 4": 32,
253
+ "o3 Pro": 30,
254
+ },
255
+ "Tier 1": {
256
+ "GPT-5": 3,
257
+ "Gemini 2.5 Pro": 2,
258
+ "Grok 4": 1,
259
+ "Claude Opus 4": 2,
260
+ "o3 Pro": 2,
261
+ },
262
+ "Tier 2": {
263
+ "GPT-5": 0,
264
+ "Gemini 2.5 Pro": 0,
265
+ "Grok 4": 0,
266
+ "Claude Opus 4": 0,
267
+ "o3 Pro": 0,
268
+ },
269
+ }
270
+
271
 
272
  def build_accuracy_figure(tier: str):
273
+ """Interactive scatter: x = release date, y = accuracy (%). Hover shows solved/total."""
 
274
  total = TIER_TOTALS[tier]
275
+ fig = go.Figure()
276
+
277
+ for model in MODELS_ORDER:
278
+ date_str = MODEL_RELEASES[model]
279
+ y = ACCURACY_PCT[tier][model]
280
+ solved = round(y * total / 100)
281
+ fig.add_trace(
282
+ go.Scatter(
283
+ x=[date_str],
284
+ y=[y],
285
+ mode="markers",
286
+ name=model,
287
+ marker=dict(size=12, line=dict(width=1)),
288
+ hovertemplate=(
289
+ f"<b>{model}</b><br>"
290
+ "Release: %{x|%b %d, %Y}<br>"
291
+ "Accuracy: %{y:.1f}%<br>"
292
+ f"Solved: {solved}/{total}"
293
+ "<extra></extra>"
294
+ ),
295
  )
296
+ )
297
+
298
+ # Comfortable y-range (dynamic ceiling for readability)
299
+ max_y = max(ACCURACY_PCT[tier].values()) or 1
300
+ upper = max(1, math.ceil(max_y * 1.25))
301
+
302
  fig.update_layout(
303
  template="plotly_white",
 
 
 
304
  height=420,
305
+ margin=dict(l=30, r=120, t=10, b=40), # extra right room for legend
306
+ xaxis=dict(
307
+ title=None,
308
+ type="date",
309
+ tickformat="%b %Y",
310
+ showgrid=True,
311
+ ),
312
+ yaxis=dict(
313
+ title="Accuracy (%)",
314
+ range=[0, upper],
315
+ dtick=max(1, upper // 5),
316
+ showgrid=True,
317
+ ),
318
+ legend=dict(title="Models", orientation="v", y=1, x=1.02, yanchor="top"),
319
+ hovermode="closest",
320
  )
321
  return fig
322
 
323
 
 
324
  _initial_accuracy_fig = build_accuracy_figure("Warmup")
325
 
 
326
  # Force light theme even if HF user prefers dark
327
  blocks = gr.Blocks(
328
  css=custom_css,
 
332
  with blocks:
333
 
334
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
335
+ with gr.TabItem("FormulaOne", id=0, elem_id="landing-accuracy-tab"):
 
336
  gr.Markdown(
337
  "The chart below summarizes static (non-live) results for model performance on FormulaOne.",
338
  elem_classes="markdown-text",
339
  )
340
 
341
+ # Pill-style selector aligned to the top-right
342
  with gr.Row(elem_id="f1-tier-select-row"):
343
  tier_selector = gr.Radio(
344
  choices=list(TIER_TOTALS.keys()),
 
348
  elem_id="f1-tier-select",
349
  )
350
 
351
+ accuracy_plot = gr.Plot(value=_initial_accuracy_fig, elem_id="f1-accuracy-plot")
352
 
 
353
  tier_selector.change(
354
  lambda t: build_accuracy_figure(t),
355
  inputs=tier_selector,
356
  outputs=accuracy_plot,
357
  )
358
 
359
+ # Footnote (sampling + prompt details)
360
+ gr.Markdown(
361
+ """
362
+ <div class="f1-container">
363
+ <p class="f1-p" style="font-size:0.95rem;color:var(--f1-subtle);">
364
+ <em>Footnote.</em> All models were sampled with their highest available reasoning settings and a generous token budget.
365
+ We also used a diverse few-shot prompt that is highly supportive for these problems, covering many of the subtle
366
+ details inherent in the tasks (state design, invariants, and bag transformations).
367
+ </p>
368
+ </div>
369
+ """,
370
+ elem_classes="markdown-text",
371
+ )
372
+
373
+ # "Learn more" link to the explainer tab
374
+ gr.Markdown(
375
+ '<div class="f1-container"><p><a class="f1-a" href="#what-is-tab">Learn more about FormulaOne.</a></p></div>'
376
+ )
377
  # Existing "What is FormulaOne" tab
378
  with gr.TabItem("What is FormulaOne", id=1, elem_id="what-is-tab"):
379
 
src/display/css_html_js.py CHANGED
@@ -21,6 +21,29 @@ custom_css = """
21
  /* NEW: landing tab width + tier selector alignment */
22
  #landing-accuracy-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
23
  #f1-tier-select-row { justify-content: flex-end; margin-bottom: 6px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  /* Text */
26
  .f1-p, .f1-li { line-height: 1.75; color: #374151; text-wrap: pretty; overflow-wrap: break-word; hyphens: auto; }
 
21
  /* NEW: landing tab width + tier selector alignment */
22
  #landing-accuracy-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
23
  #f1-tier-select-row { justify-content: flex-end; margin-bottom: 6px; }
24
+ #f1-tier-select-row { justify-content: flex-end; margin-bottom: 6px; }
25
+ #f1-tier-select .wrap {
26
+ display: inline-flex;
27
+ gap: 6px;
28
+ padding: 4px;
29
+ background: #ffffff;
30
+ border: 1px solid var(--f1-border);
31
+ border-radius: 999px;
32
+ }
33
+ #f1-tier-select input[type="radio"] { display: none; }
34
+ #f1-tier-select label {
35
+ border: none;
36
+ border-radius: 999px;
37
+ padding: 6px 12px;
38
+ background: transparent;
39
+ cursor: pointer;
40
+ }
41
+ #f1-tier-select input[type="radio"]:checked + span {
42
+ background: #eef2ff; /* subtle non-white for selected pill */
43
+ border-radius: 999px;
44
+ padding: 6px 12px;
45
+ box-shadow: 0 1px 2px rgba(0,0,0,0.04);
46
+ }
47
 
48
  /* Text */
49
  .f1-p, .f1-li { line-height: 1.75; color: #374151; text-wrap: pretty; overflow-wrap: break-word; hyphens: auto; }