galb-dai commited on
Commit
fdc7723
·
1 Parent(s): 967ec13
Files changed (3) hide show
  1. app.py +3 -4
  2. src/about.py +3 -5
  3. src/display/css_html_js.py +37 -25
app.py CHANGED
@@ -214,9 +214,8 @@ with blocks:
214
  # Top content
215
  gr.HTML(WHAT_IS_F1_HTML_TOP)
216
 
217
- # Examples (kept inside a centered, 800px container)
218
  with gr.Group(elem_id="f1-examples", elem_classes=["f1-container"]):
219
- # centered pill title
220
  gr.HTML(
221
  '<div class="f1-tabs-body"><div class="f1-examples-chip">Examples of FormulaOne problems</div></div>'
222
  )
@@ -259,7 +258,7 @@ with blocks:
259
  choices=["Warmup", "Tier 1", "Tier 2"],
260
  value="Warmup",
261
  label=None,
262
- show_label=False, # hide caption
263
  elem_id="f1-example-radio",
264
  )
265
  tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
@@ -269,7 +268,7 @@ with blocks:
269
 
270
  # Embed video via Gradio so it renders reliably
271
  gr.Video(
272
- value="/file=assets/DominatingSetAnimation.mp4",
273
  autoplay=True,
274
  loop=True,
275
  show_label=False,
 
214
  # Top content
215
  gr.HTML(WHAT_IS_F1_HTML_TOP)
216
 
217
+ # Examples (kept inside a centered container; content itself 730px wide)
218
  with gr.Group(elem_id="f1-examples", elem_classes=["f1-container"]):
 
219
  gr.HTML(
220
  '<div class="f1-tabs-body"><div class="f1-examples-chip">Examples of FormulaOne problems</div></div>'
221
  )
 
258
  choices=["Warmup", "Tier 1", "Tier 2"],
259
  value="Warmup",
260
  label=None,
261
+ show_label=False,
262
  elem_id="f1-example-radio",
263
  )
264
  tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
 
268
 
269
  # Embed video via Gradio so it renders reliably
270
  gr.Video(
271
+ "assets/DominatingSetAnimation.mp4",
272
  autoplay=True,
273
  loop=True,
274
  show_label=False,
src/about.py CHANGED
@@ -1,7 +1,6 @@
1
  # The paper's URL for linking
2
  PAPER_URL = "https://arxiv.org/abs/2507.13337"
3
 
4
- # Top chunk — same as before (div grid table), unchanged except for minor structure.
5
  WHAT_IS_F1_HTML_TOP = f"""
6
  <div class="f1-container">
7
  <header class="text-center mb-12">
@@ -15,7 +14,6 @@ WHAT_IS_F1_HTML_TOP = f"""
15
 
16
  <p class="mb-4 f1-p"><strong>FormulaOne</strong> consists of 220 novel dynamic programming problems over graphs. The problems are organised into three categories, ranging from moderate difficulty and all the way up to research-level.</p>
17
 
18
- <!-- Clean, centered "table" using a single grid -->
19
  <div class="f1-grid-wrap" role="region" aria-label="FormulaOne categories">
20
  <div class="f1-grid-table" role="table">
21
  <div class="f1-grid-row f1-grid-head" role="row">
@@ -44,7 +42,6 @@ WHAT_IS_F1_HTML_TOP = f"""
44
  </div>
45
  """
46
 
47
- # Bottom content split around the video so we can embed it with gr.Video in app.py
48
  WHAT_IS_F1_HTML_BOTTOM_TOP = """
49
  <div class="f1-container">
50
  <section>
@@ -86,12 +83,12 @@ WHAT_IS_F1_HTML_BOTTOM_TAIL = """
86
  <h2 class="f1-h2">Model Accuracy</h2>
87
  <p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks.</p>
88
  <figure class="f1-figure">
89
- <img src="/file=assets/warmup_performance.png" alt="Plot showing model performance on FormulaOne-Warmup" class="max-w-full md:max-w-2xl mx_auto rounded-lg shadow-md">
90
  <figcaption class="f1-figcaption">Performance of frontier models on the FormulaOne-Warmup dataset.</figcaption>
91
  </figure>
92
  <p class="mb-4 f1-p">However, as the reasoning depth increases in <strong>Tier 1</strong>, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
93
  <figure class="f1-figure">
94
- <img src="/file=assets/tier1_performance.png" alt="Plot showing model performance on Tier 1" class="max-w-full md:max-w-2xl mx_auto rounded-lg shadow-md">
95
  <figcaption class="f1-figcaption">Performance of frontier reasoning models on the FormulaOne dataset.</figcaption>
96
  </figure>
97
  <p class="f1-p">This trend culminates in <strong>Tier 2</strong>, where the difficulty is characteristic of exploratory research problems. On this set of 20 problems, no current frontier model solves even a single one. This result starkly illustrates the gap that remains between high performance on existing benchmarks and the deep algorithmic reasoning required for truly complex problems.</p>
@@ -99,6 +96,7 @@ WHAT_IS_F1_HTML_BOTTOM_TAIL = """
99
  </div>
100
  """
101
 
 
102
  EVALUATION_QUEUE_TEXT = """
103
  ## Submitting to the FormulaOne Leaderboard
104
 
 
1
  # The paper's URL for linking
2
  PAPER_URL = "https://arxiv.org/abs/2507.13337"
3
 
 
4
  WHAT_IS_F1_HTML_TOP = f"""
5
  <div class="f1-container">
6
  <header class="text-center mb-12">
 
14
 
15
  <p class="mb-4 f1-p"><strong>FormulaOne</strong> consists of 220 novel dynamic programming problems over graphs. The problems are organised into three categories, ranging from moderate difficulty and all the way up to research-level.</p>
16
 
 
17
  <div class="f1-grid-wrap" role="region" aria-label="FormulaOne categories">
18
  <div class="f1-grid-table" role="table">
19
  <div class="f1-grid-row f1-grid-head" role="row">
 
42
  </div>
43
  """
44
 
 
45
  WHAT_IS_F1_HTML_BOTTOM_TOP = """
46
  <div class="f1-container">
47
  <section>
 
83
  <h2 class="f1-h2">Model Accuracy</h2>
84
  <p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks.</p>
85
  <figure class="f1-figure">
86
+ <img src="/file=assets/warmup_performance.png" alt="Plot showing model performance on FormulaOne-Warmup" class="max-w-full md:max-w-2xl mx-auto rounded-lg shadow-md">
87
  <figcaption class="f1-figcaption">Performance of frontier models on the FormulaOne-Warmup dataset.</figcaption>
88
  </figure>
89
  <p class="mb-4 f1-p">However, as the reasoning depth increases in <strong>Tier 1</strong>, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
90
  <figure class="f1-figure">
91
+ <img src="/file=assets/tier1_performance.png" alt="Plot showing model performance on Tier 1" class="max-w-full md:max-w-2xl mx-auto rounded-lg shadow-md">
92
  <figcaption class="f1-figcaption">Performance of frontier reasoning models on the FormulaOne dataset.</figcaption>
93
  </figure>
94
  <p class="f1-p">This trend culminates in <strong>Tier 2</strong>, where the difficulty is characteristic of exploratory research problems. On this set of 20 problems, no current frontier model solves even a single one. This result starkly illustrates the gap that remains between high performance on existing benchmarks and the deep algorithmic reasoning required for truly complex problems.</p>
 
96
  </div>
97
  """
98
 
99
+
100
  EVALUATION_QUEUE_TEXT = """
101
  ## Submitting to the FormulaOne Leaderboard
102
 
src/display/css_html_js.py CHANGED
@@ -6,7 +6,7 @@ custom_css = """
6
  --f1-bg: #ffffff;
7
  --f1-bg-muted: #f9fafb;
8
 
9
- /* Disable Gradio block chrome that was bleeding into the Examples switcher */
10
  --block-border-color: transparent !important;
11
  --background-fill-primary: transparent !important;
12
  }
@@ -15,7 +15,9 @@ custom_css = """
15
  .f1-container { max-width: 800px; margin: 0 auto; padding: 0 16px; }
16
  .markdown-text { font-size: 16px !important; max-width: 800px; margin: 0 auto; }
17
  #what-is-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
18
- #f1-examples { max-width: 800px; margin: 0 auto; }
 
 
19
 
20
  /* Body text */
21
  .f1-p, .f1-li {
@@ -45,75 +47,81 @@ custom_css = """
45
 
46
  /* Captions, problem name */
47
  .f1-figcaption { margin-top: 0.5rem; font-size: 0.875rem; color: #111827; text-align: center; }
48
- .f1-problem-name { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; font-weight: 600; text-align: center; }
49
 
50
- /* ===== Clean "table" (equal column widths per column; compact first two, flexible third) ===== */
 
 
 
 
 
 
 
 
 
51
  .f1-grid-wrap { text-align: center; margin: 10px auto 8px auto; }
52
  .f1-grid-table {
53
- display: inline-grid; /* center + shrink-to-fit */
54
  grid-template-columns: max-content max-content minmax(360px, auto);
55
- border: 1px solid var(--f1-border); /* full border */
56
  background: var(--f1-bg);
57
  border-radius: 8px;
58
  overflow: hidden;
59
  }
60
- .f1-grid-row { display: contents; } /* rows share the same columns */
61
  .f1-grid-cell {
62
  padding: 8px 12px;
63
  text-align: left;
64
  border-left: 1px solid var(--f1-border);
65
  border-top: 1px solid var(--f1-border);
66
  }
67
- .f1-grid-cell:nth-child(3n+1) { border-left: none; } /* first column cells: no left border */
68
- .f1-grid-head .f1-grid-cell {
69
- font-weight: 600;
70
- text-align: center;
71
- border-top: none; /* top edge of the grid */
72
- }
73
 
74
  /* ===== Examples block ===== */
75
  #f1-examples {
76
- background: var(--f1-bg-muted); /* #f9fafb */
77
  border: 1px solid var(--f1-border);
78
  border-radius: 10px;
79
  box-shadow: 0 1px 2px rgba(0,0,0,0.04);
80
  margin-bottom: 12px;
81
  }
 
 
82
  .f1-tabs-body { padding-top: 12px; text-align: center; }
83
  .f1-examples-chip {
84
  display: inline-block;
85
- background: #e5e7eb; /* slightly darker than #f9fafb */
86
  color: #111827;
87
  padding: 6px 12px;
88
  border-radius: 999px;
89
  font-weight: 700;
90
  }
91
 
92
- /* Problem content: consistent background + padding + spacing above the tabs */
93
  #f1-examples .f1-problem-markdown .markdown {
94
  background: var(--f1-bg-muted);
95
  border: 1px solid var(--f1-border);
96
  border-radius: 8px;
97
  padding: 18px;
98
- margin: 10px 14px 12px 14px; /* bottom margin creates padding before tabs */
99
  }
100
 
101
- /* Pills (Radio) — hide bullets, light gray inactive, darker selected */
102
- #f1-example-radio { border-top: 1px solid var(--f1-border); padding: 8px 10px 10px 10px; margin: 0 8px 8px; }
103
  #f1-example-radio input[type="radio"] { display: none; }
104
- #f1-example-radio .wrap { display: flex; gap: 8px; flex-wrap: wrap; justify-content: flex-start; }
105
  #f1-example-radio label {
106
  border: 1px solid var(--f1-border);
107
  border-radius: 999px;
108
- padding: 6px 12px;
109
  cursor: pointer;
110
- background: #f3f4f6; /* inactive pills */
111
  }
112
  #f1-example-radio input[type="radio"]:checked + span {
113
- background: #e5e7eb; /* active pill */
114
  border-color: var(--f1-border);
115
  border-radius: 999px;
116
- padding: 6px 12px;
117
  }
118
 
119
  /* Gradio video styling (centered, rounded, subtle shadow) */
@@ -125,12 +133,16 @@ custom_css = """
125
  #formulaone-leaderboard-tab-table .row, #formulaone-leaderboard-tab-table .column { width: 100% !important; max-width: 100% !important; }
126
  #formulaone-leaderboard-tab-table [data-testid="dropdown"], #formulaone-leaderboard-tab-table input[type="text"] { width: 100% !important; }
127
 
128
- /* Login button: force light */
129
  #hf-login-btn, #hf-login-btn button, button[data-testid="login-button"], [data-testid="login-button"] button, div[data-testid="login-button"] > button {
130
  background: #ffffff !important;
131
  color: #1f2937 !important;
132
  border: 1px solid var(--f1-border) !important;
133
  box-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.04) !important;
 
 
 
 
134
  }
135
  #hf-login-btn:hover, #hf-login-btn button:hover, button[data-testid="login-button"]:hover, [data-testid="login-button"] button:hover, div[data-testid="login-button"] > button:hover { background: #f9fafb !important; }
136
  """
 
6
  --f1-bg: #ffffff;
7
  --f1-bg-muted: #f9fafb;
8
 
9
+ /* Disable Gradio chrome that was bleeding into the examples block */
10
  --block-border-color: transparent !important;
11
  --background-fill-primary: transparent !important;
12
  }
 
15
  .f1-container { max-width: 800px; margin: 0 auto; padding: 0 16px; }
16
  .markdown-text { font-size: 16px !important; max-width: 800px; margin: 0 auto; }
17
  #what-is-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
18
+
19
+ /* Examples wrapper stays centered, content width 730px */
20
+ #f1-examples { max-width: 730px; margin: 0 auto; }
21
 
22
  /* Body text */
23
  .f1-p, .f1-li {
 
47
 
48
  /* Captions, problem name */
49
  .f1-figcaption { margin-top: 0.5rem; font-size: 0.875rem; color: #111827; text-align: center; }
 
50
 
51
+ /* Problem name: guarantee center on first render; force bg color for <code> */
52
+ #f1-examples .f1-problem-markdown .markdown p.f1-problem-name { text-align: center !important; margin: 0 0 8px 0; }
53
+ #f1-examples .f1-problem-markdown .markdown p.f1-problem-name code {
54
+ display: inline-block;
55
+ background: #f9fafb !important; /* requested override; bypasses --code-background-fill */
56
+ padding: 2px 8px;
57
+ border-radius: 6px;
58
+ }
59
+
60
+ /* ===== Clean "table" (column-consistent widths; compact first two, flexible third) ===== */
61
  .f1-grid-wrap { text-align: center; margin: 10px auto 8px auto; }
62
  .f1-grid-table {
63
+ display: inline-grid;
64
  grid-template-columns: max-content max-content minmax(360px, auto);
65
+ border: 1px solid var(--f1-border);
66
  background: var(--f1-bg);
67
  border-radius: 8px;
68
  overflow: hidden;
69
  }
70
+ .f1-grid-row { display: contents; }
71
  .f1-grid-cell {
72
  padding: 8px 12px;
73
  text-align: left;
74
  border-left: 1px solid var(--f1-border);
75
  border-top: 1px solid var(--f1-border);
76
  }
77
+ .f1-grid-cell:nth-child(3n+1) { border-left: none; }
78
+ .f1-grid-head .f1-grid-cell { font-weight: 600; text-align: center; border-top: none; }
 
 
 
 
79
 
80
  /* ===== Examples block ===== */
81
  #f1-examples {
82
+ background: var(--f1-bg-muted); /* #f9fafb */
83
  border: 1px solid var(--f1-border);
84
  border-radius: 10px;
85
  box-shadow: 0 1px 2px rgba(0,0,0,0.04);
86
  margin-bottom: 12px;
87
  }
88
+ #f1-examples .form { background: transparent !important; } /* no background on inner .form */
89
+
90
  .f1-tabs-body { padding-top: 12px; text-align: center; }
91
  .f1-examples-chip {
92
  display: inline-block;
93
+ background: #e5e7eb; /* slightly darker than #f9fafb */
94
  color: #111827;
95
  padding: 6px 12px;
96
  border-radius: 999px;
97
  font-weight: 700;
98
  }
99
 
100
+ /* Problem content: consistent background + padding; modest space before tabs */
101
  #f1-examples .f1-problem-markdown .markdown {
102
  background: var(--f1-bg-muted);
103
  border: 1px solid var(--f1-border);
104
  border-radius: 8px;
105
  padding: 18px;
106
+ margin: 10px 14px 10px 14px; /* slightly reduced bottom margin */
107
  }
108
 
109
+ /* Pills (Radio) — hide bullets; light gray inactive; darker selected; minimal bottom padding */
110
+ #f1-example-radio { border-top: 1px solid var(--f1-border); padding: 8px 10px 4px 10px; margin: 0 8px 4px; }
111
  #f1-example-radio input[type="radio"] { display: none; }
112
+ #f1-example-radio .wrap { display: flex; gap: 6px; flex-wrap: wrap; justify-content: flex-start; }
113
  #f1-example-radio label {
114
  border: 1px solid var(--f1-border);
115
  border-radius: 999px;
116
+ padding: 6px 10px;
117
  cursor: pointer;
118
+ background: #f3f4f6; /* inactive pills */
119
  }
120
  #f1-example-radio input[type="radio"]:checked + span {
121
+ background: #e5e7eb; /* active pill */
122
  border-color: var(--f1-border);
123
  border-radius: 999px;
124
+ padding: 6px 10px;
125
  }
126
 
127
  /* Gradio video styling (centered, rounded, subtle shadow) */
 
133
  #formulaone-leaderboard-tab-table .row, #formulaone-leaderboard-tab-table .column { width: 100% !important; max-width: 100% !important; }
134
  #formulaone-leaderboard-tab-table [data-testid="dropdown"], #formulaone-leaderboard-tab-table input[type="text"] { width: 100% !important; }
135
 
136
+ /* Login button: force light + width cap at 800 and center */
137
  #hf-login-btn, #hf-login-btn button, button[data-testid="login-button"], [data-testid="login-button"] button, div[data-testid="login-button"] > button {
138
  background: #ffffff !important;
139
  color: #1f2937 !important;
140
  border: 1px solid var(--f1-border) !important;
141
  box-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.04) !important;
142
+ max-width: 800px !important;
143
+ width: 100% !important;
144
+ margin-left: auto !important;
145
+ margin-right: auto !important;
146
  }
147
  #hf-login-btn:hover, #hf-login-btn button:hover, button[data-testid="login-button"]:hover, [data-testid="login-button"] button:hover, div[data-testid="login-button"] > button:hover { background: #f9fafb !important; }
148
  """