galb-dai commited on
Commit
967ec13
·
1 Parent(s): e30b579
Files changed (3) hide show
  1. app.py +23 -5
  2. src/about.py +9 -12
  3. src/display/css_html_js.py +27 -43
app.py CHANGED
@@ -6,12 +6,13 @@ from apscheduler.schedulers.background import BackgroundScheduler
6
  from gradio_leaderboard import Leaderboard, SelectColumns
7
  from huggingface_hub import whoami
8
 
9
- # NOTE: split WHAT_IS_F1_HTML into top/bottom so we can insert a Gradio-based tabbed element between them.
10
  from src.about import (
11
  CITATION_BUTTON_LABEL,
12
  CITATION_BUTTON_TEXT,
13
  EVALUATION_QUEUE_TEXT,
14
- WHAT_IS_F1_HTML_BOTTOM,
 
15
  WHAT_IS_F1_HTML_TOP,
16
  )
17
  from src.datamodel.data import F1Data
@@ -263,10 +264,27 @@ with blocks:
263
  )
264
  tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
265
 
266
- # Bottom content
267
- gr.HTML(WHAT_IS_F1_HTML_BOTTOM)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
- # Rename tab to "Leaderboard"
270
  with gr.TabItem("Leaderboard", elem_id="formulaone-leaderboard-tab-table", id=1):
271
  gr.Markdown(
272
  """
 
6
  from gradio_leaderboard import Leaderboard, SelectColumns
7
  from huggingface_hub import whoami
8
 
9
+ # NOTE: split WHAT_IS_F1_HTML into top/bottom so we can insert a Gradio-based tabbed element & video between them.
10
  from src.about import (
11
  CITATION_BUTTON_LABEL,
12
  CITATION_BUTTON_TEXT,
13
  EVALUATION_QUEUE_TEXT,
14
+ WHAT_IS_F1_HTML_BOTTOM_TAIL,
15
+ WHAT_IS_F1_HTML_BOTTOM_TOP,
16
  WHAT_IS_F1_HTML_TOP,
17
  )
18
  from src.datamodel.data import F1Data
 
264
  )
265
  tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
266
 
267
+ # Bottom content (part 1 up to where the video goes)
268
+ gr.HTML(WHAT_IS_F1_HTML_BOTTOM_TOP)
269
+
270
+ # Embed video via Gradio so it renders reliably
271
+ gr.Video(
272
+ value="/file=assets/DominatingSetAnimation.mp4",
273
+ autoplay=True,
274
+ loop=True,
275
+ show_label=False,
276
+ interactive=False,
277
+ elem_classes=["f1-video"],
278
+ )
279
+ # The caption (centered, dark; styled in CSS)
280
+ gr.HTML(
281
+ '<div class="f1-figcaption">Animation showing the design of a compressed dynamic programming state-space.</div>'
282
+ )
283
+
284
+ # Remaining content
285
+ gr.HTML(WHAT_IS_F1_HTML_BOTTOM_TAIL)
286
 
287
+ # Rename tab to "Leaderboard" and keep it at 800px max-width
288
  with gr.TabItem("Leaderboard", elem_id="formulaone-leaderboard-tab-table", id=1):
289
  gr.Markdown(
290
  """
src/about.py CHANGED
@@ -1,7 +1,7 @@
1
  # The paper's URL for linking
2
  PAPER_URL = "https://arxiv.org/abs/2507.13337"
3
 
4
- # Top chunk — self-contained (div grid table).
5
  WHAT_IS_F1_HTML_TOP = f"""
6
  <div class="f1-container">
7
  <header class="text-center mb-12">
@@ -44,8 +44,8 @@ WHAT_IS_F1_HTML_TOP = f"""
44
  </div>
45
  """
46
 
47
- # Bottom chunk width-constrained; em-dashes; corrected video; captions centered/dark; wording tweak.
48
- WHAT_IS_F1_HTML_BOTTOM = """
49
  <div class="f1-container">
50
  <section>
51
  <p class="mb-4 f1-p">The latter category is incredibly demanding, requiring resolution of many points of uncertainty, and involving an array of reasoning steps, including topological and geometric insight, knowledge of mathematical domains such as extremal graph theory and logic, combinatorial considerations, precise implementation, and more.</p>
@@ -64,13 +64,10 @@ WHAT_IS_F1_HTML_BOTTOM = """
64
  <figcaption class="f1-figcaption">An illustration of local modifications to bags: Introduce, Forget, and Join.</figcaption>
65
  </figure>
66
  <p class="mb-4 f1-p">An algorithm can then traverse this tree of bags, solving the problem piece by piece using dynamic programming. This process involves designing a “state” that summarises all necessary information about the partial solution within a bag, and then defining how this state transforms as vertices are introduced, forgotten, or bags are merged.</p>
67
- <figure class="f1-figure">
68
- <video class="w-full max-w-2xl mx-auto rounded-lg shadow-lg" autoplay loop muted playsinline>
69
- <source src="/file=assets/DominatingSetAnimation.mp4" type="video/mp4">
70
- Your browser does not support the video tag.
71
- </video>
72
- <figcaption class="f1-figcaption">Animation showing the design of a compressed dynamic programming state-space.</figcaption>
73
- </figure>
74
  <p class="f1-p">The deceptive simplicity of the problem statements belies the <strong>extraordinary difficulty</strong> of discovering the correct dynamic programming solution. This process is riddled with subtle combinatorial and logical pitfalls, demanding a profound understanding of the problem’s underlying structure. For a detailed walkthrough of the fifteen interdependent reasoning steps required to solve a single hard problem &mdash; <code>Maximal-Cluster-Graph</code> &mdash; <a href="https://arxiv.org/pdf/2507.13337#appendix.A" target="_blank" rel="noopener noreferrer" class="f1-a">see the appendix of our paper</a>.</p>
75
  </section>
76
 
@@ -89,12 +86,12 @@ WHAT_IS_F1_HTML_BOTTOM = """
89
  <h2 class="f1-h2">Model Accuracy</h2>
90
  <p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks.</p>
91
  <figure class="f1-figure">
92
- <img src="/file=assets/warmup_performance.png" alt="Plot showing model performance on FormulaOne-Warmup" class="max-w-full md:max-w-2xl mx-auto rounded-lg shadow-md">
93
  <figcaption class="f1-figcaption">Performance of frontier models on the FormulaOne-Warmup dataset.</figcaption>
94
  </figure>
95
  <p class="mb-4 f1-p">However, as the reasoning depth increases in <strong>Tier 1</strong>, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
96
  <figure class="f1-figure">
97
- <img src="/file=assets/tier1_performance.png" alt="Plot showing model performance on Tier 1" class="max-w-full md:max-w-2xl mx-auto rounded-lg shadow-md">
98
  <figcaption class="f1-figcaption">Performance of frontier reasoning models on the FormulaOne dataset.</figcaption>
99
  </figure>
100
  <p class="f1-p">This trend culminates in <strong>Tier 2</strong>, where the difficulty is characteristic of exploratory research problems. On this set of 20 problems, no current frontier model solves even a single one. This result starkly illustrates the gap that remains between high performance on existing benchmarks and the deep algorithmic reasoning required for truly complex problems.</p>
 
1
  # The paper's URL for linking
2
  PAPER_URL = "https://arxiv.org/abs/2507.13337"
3
 
4
+ # Top chunk — same as before (div grid table), unchanged except for minor structure.
5
  WHAT_IS_F1_HTML_TOP = f"""
6
  <div class="f1-container">
7
  <header class="text-center mb-12">
 
44
  </div>
45
  """
46
 
47
+ # Bottom content split around the video so we can embed it with gr.Video in app.py
48
+ WHAT_IS_F1_HTML_BOTTOM_TOP = """
49
  <div class="f1-container">
50
  <section>
51
  <p class="mb-4 f1-p">The latter category is incredibly demanding, requiring resolution of many points of uncertainty, and involving an array of reasoning steps, including topological and geometric insight, knowledge of mathematical domains such as extremal graph theory and logic, combinatorial considerations, precise implementation, and more.</p>
 
64
  <figcaption class="f1-figcaption">An illustration of local modifications to bags: Introduce, Forget, and Join.</figcaption>
65
  </figure>
66
  <p class="mb-4 f1-p">An algorithm can then traverse this tree of bags, solving the problem piece by piece using dynamic programming. This process involves designing a “state” that summarises all necessary information about the partial solution within a bag, and then defining how this state transforms as vertices are introduced, forgotten, or bags are merged.</p>
67
+ <!-- VIDEO INSERTED HERE VIA gr.Video IN app.py -->
68
+ """
69
+
70
+ WHAT_IS_F1_HTML_BOTTOM_TAIL = """
 
 
 
71
  <p class="f1-p">The deceptive simplicity of the problem statements belies the <strong>extraordinary difficulty</strong> of discovering the correct dynamic programming solution. This process is riddled with subtle combinatorial and logical pitfalls, demanding a profound understanding of the problem’s underlying structure. For a detailed walkthrough of the fifteen interdependent reasoning steps required to solve a single hard problem &mdash; <code>Maximal-Cluster-Graph</code> &mdash; <a href="https://arxiv.org/pdf/2507.13337#appendix.A" target="_blank" rel="noopener noreferrer" class="f1-a">see the appendix of our paper</a>.</p>
72
  </section>
73
 
 
86
  <h2 class="f1-h2">Model Accuracy</h2>
87
  <p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks.</p>
88
  <figure class="f1-figure">
89
+ <img src="/file=assets/warmup_performance.png" alt="Plot showing model performance on FormulaOne-Warmup" class="max-w-full md:max-w-2xl mx_auto rounded-lg shadow-md">
90
  <figcaption class="f1-figcaption">Performance of frontier models on the FormulaOne-Warmup dataset.</figcaption>
91
  </figure>
92
  <p class="mb-4 f1-p">However, as the reasoning depth increases in <strong>Tier 1</strong>, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
93
  <figure class="f1-figure">
94
+ <img src="/file=assets/tier1_performance.png" alt="Plot showing model performance on Tier 1" class="max-w-full md:max-w-2xl mx_auto rounded-lg shadow-md">
95
  <figcaption class="f1-figcaption">Performance of frontier reasoning models on the FormulaOne dataset.</figcaption>
96
  </figure>
97
  <p class="f1-p">This trend culminates in <strong>Tier 2</strong>, where the difficulty is characteristic of exploratory research problems. On this set of 20 problems, no current frontier model solves even a single one. This result starkly illustrates the gap that remains between high performance on existing benchmarks and the deep algorithmic reasoning required for truly complex problems.</p>
src/display/css_html_js.py CHANGED
@@ -5,6 +5,10 @@ custom_css = """
5
  --f1-border: #e5e7eb;
6
  --f1-bg: #ffffff;
7
  --f1-bg-muted: #f9fafb;
 
 
 
 
8
  }
9
 
10
  /* Readable width everywhere */
@@ -39,38 +43,37 @@ custom_css = """
39
  .f1-a { color: #2563eb; text-decoration: none; font-weight: 500; }
40
  .f1-a:hover { text-decoration: underline; }
41
 
42
- /* Blockquote, captions, problem name */
43
- .f1-blockquote { border-left: 4px solid #d1d5db; padding-left: 1rem; font-style: italic; color: #4b5563; }
44
- .f1-figcaption { margin-top: 0.5rem; font-size: 0.875rem; color: #111827; text-align: center; } /* centered + very dark */
45
  .f1-problem-name { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; font-weight: 600; text-align: center; }
46
 
47
- /* ===== Clean "table" using a single grid (equal column widths across rows) ===== */
48
  .f1-grid-wrap { text-align: center; margin: 10px auto 8px auto; }
49
  .f1-grid-table {
50
- display: grid; /* one grid for all rows */
51
- grid-template-columns: 1fr 1fr 1fr; /* equal column widths */
52
- border: 1px solid var(--f1-border); /* full border */
53
  background: var(--f1-bg);
54
  border-radius: 8px;
55
  overflow: hidden;
56
  }
57
- .f1-grid-row { display: contents; } /* rows don’t reset column widths */
58
  .f1-grid-cell {
59
- padding: 10px 14px;
60
  text-align: left;
61
  border-left: 1px solid var(--f1-border);
62
  border-top: 1px solid var(--f1-border);
63
  }
64
- .f1-grid-cell:nth-child(3n+1) { border-left: none; } /* first col cells: no left border */
65
  .f1-grid-head .f1-grid-cell {
66
  font-weight: 600;
67
  text-align: center;
 
68
  }
69
- .f1-grid-head .f1-grid-cell { border-top: none; } /* top border only on body rows */
70
 
71
- /* ===== Examples card: background, centered pill heading, pretty pills ===== */
72
  #f1-examples {
73
- background: var(--f1-bg-muted); /* #f9fafb */
74
  border: 1px solid var(--f1-border);
75
  border-radius: 10px;
76
  box-shadow: 0 1px 2px rgba(0,0,0,0.04);
@@ -79,23 +82,23 @@ custom_css = """
79
  .f1-tabs-body { padding-top: 12px; text-align: center; }
80
  .f1-examples-chip {
81
  display: inline-block;
82
- background: #e5e7eb; /* slightly darker gray pill */
83
  color: #111827;
84
  padding: 6px 12px;
85
  border-radius: 999px;
86
  font-weight: 700;
87
  }
88
 
89
- /* Problem content: consistent background + padding */
90
  #f1-examples .f1-problem-markdown .markdown {
91
  background: var(--f1-bg-muted);
92
  border: 1px solid var(--f1-border);
93
  border-radius: 8px;
94
  padding: 18px;
95
- margin: 10px 14px 8px 14px;
96
  }
97
 
98
- /* Bottom "tabs" using Radio -> show only pills (hide inputs) */
99
  #f1-example-radio { border-top: 1px solid var(--f1-border); padding: 8px 10px 10px 10px; margin: 0 8px 8px; }
100
  #f1-example-radio input[type="radio"] { display: none; }
101
  #f1-example-radio .wrap { display: flex; gap: 8px; flex-wrap: wrap; justify-content: flex-start; }
@@ -104,17 +107,20 @@ custom_css = """
104
  border-radius: 999px;
105
  padding: 6px 12px;
106
  cursor: pointer;
107
- background: #f3f4f6; /* light gray for inactive pills */
108
  }
109
  #f1-example-radio input[type="radio"]:checked + span {
110
- background: #e5e7eb; /* selected pill slightly darker */
111
  border-color: var(--f1-border);
112
  border-radius: 999px;
113
  padding: 6px 12px;
114
  }
115
 
116
- /* Leaderboard: center the whole tab and apply requested nesting/min-width rule with .column/.row */
117
- #formulaone-leaderboard-tab-table { max-width: 1200px; margin-left: auto; margin-right: auto; }
 
 
 
118
  #formulaone-leaderboard-tab-table .column .row .column { min-width: 80% !important; } /* exact chain rule */
119
  #formulaone-leaderboard-tab-table .row, #formulaone-leaderboard-tab-table .column { width: 100% !important; max-width: 100% !important; }
120
  #formulaone-leaderboard-tab-table [data-testid="dropdown"], #formulaone-leaderboard-tab-table input[type="text"] { width: 100% !important; }
@@ -127,28 +133,6 @@ custom_css = """
127
  box-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.04) !important;
128
  }
129
  #hf-login-btn:hover, #hf-login-btn button:hover, button[data-testid="login-button"]:hover, [data-testid="login-button"] button:hover, div[data-testid="login-button"] > button:hover { background: #f9fafb !important; }
130
-
131
- /* Misc retained */
132
- #models-to-add-text { font-size: 18px !important; }
133
- #citation-button span { font-size: 16px !important; }
134
- #citation-button textarea { font-size: 16px !important; }
135
- #citation-button > label > button { margin: 6px; transform: scale(1.3); }
136
- #leaderboard-table { margin-top: 15px }
137
- #leaderboard-table-lite { margin-top: 15px }
138
- #search-bar-table-box > div:first-child { background: none; border: none; }
139
- #search-bar { padding: 0px; }
140
- #leaderboard-table td:nth-child(2), #leaderboard-table th:nth-child(2) { max-width: 400px; overflow: auto; white-space: nowrap; }
141
- #scale-logo { border-style: none !important; box-shadow: none; display: block; margin-left: auto; margin-right: auto; max-width: 600px; }
142
- #scale-logo .download { display: none; }
143
- #filter_type{ border: 0; padding-left: 0; padding-top: 0; }
144
- #filter_type label { display: flex; }
145
- #filter_type label > span{ margin-top: var(--spacing-lg); margin-right: 0.5em; }
146
- #filter_type label > .wrap{ width: 103px; }
147
- #filter_type label > .wrap .wrap-inner{ padding: 2px; }
148
- #filter_type label > .wrap .wrap-inner input{ width: 1px }
149
- #filter-columns-type{ border:0; padding:0.5; }
150
- #filter-columns-size{ border:0; padding:0.5; }
151
- #box-filter > .form{ border: 0 }
152
  """
153
 
154
  get_window_url_params = """
 
5
  --f1-border: #e5e7eb;
6
  --f1-bg: #ffffff;
7
  --f1-bg-muted: #f9fafb;
8
+
9
+ /* Disable Gradio block chrome that was bleeding into the Examples switcher */
10
+ --block-border-color: transparent !important;
11
+ --background-fill-primary: transparent !important;
12
  }
13
 
14
  /* Readable width everywhere */
 
43
  .f1-a { color: #2563eb; text-decoration: none; font-weight: 500; }
44
  .f1-a:hover { text-decoration: underline; }
45
 
46
+ /* Captions, problem name */
47
+ .f1-figcaption { margin-top: 0.5rem; font-size: 0.875rem; color: #111827; text-align: center; }
 
48
  .f1-problem-name { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; font-weight: 600; text-align: center; }
49
 
50
+ /* ===== Clean "table" (equal column widths per column; compact first two, flexible third) ===== */
51
  .f1-grid-wrap { text-align: center; margin: 10px auto 8px auto; }
52
  .f1-grid-table {
53
+ display: inline-grid; /* center + shrink-to-fit */
54
+ grid-template-columns: max-content max-content minmax(360px, auto);
55
+ border: 1px solid var(--f1-border); /* full border */
56
  background: var(--f1-bg);
57
  border-radius: 8px;
58
  overflow: hidden;
59
  }
60
+ .f1-grid-row { display: contents; } /* rows share the same columns */
61
  .f1-grid-cell {
62
+ padding: 8px 12px;
63
  text-align: left;
64
  border-left: 1px solid var(--f1-border);
65
  border-top: 1px solid var(--f1-border);
66
  }
67
+ .f1-grid-cell:nth-child(3n+1) { border-left: none; } /* first column cells: no left border */
68
  .f1-grid-head .f1-grid-cell {
69
  font-weight: 600;
70
  text-align: center;
71
+ border-top: none; /* top edge of the grid */
72
  }
 
73
 
74
+ /* ===== Examples block ===== */
75
  #f1-examples {
76
+ background: var(--f1-bg-muted); /* #f9fafb */
77
  border: 1px solid var(--f1-border);
78
  border-radius: 10px;
79
  box-shadow: 0 1px 2px rgba(0,0,0,0.04);
 
82
  .f1-tabs-body { padding-top: 12px; text-align: center; }
83
  .f1-examples-chip {
84
  display: inline-block;
85
+ background: #e5e7eb; /* slightly darker than #f9fafb */
86
  color: #111827;
87
  padding: 6px 12px;
88
  border-radius: 999px;
89
  font-weight: 700;
90
  }
91
 
92
+ /* Problem content: consistent background + padding + spacing above the tabs */
93
  #f1-examples .f1-problem-markdown .markdown {
94
  background: var(--f1-bg-muted);
95
  border: 1px solid var(--f1-border);
96
  border-radius: 8px;
97
  padding: 18px;
98
+ margin: 10px 14px 12px 14px; /* bottom margin creates padding before tabs */
99
  }
100
 
101
+ /* Pills (Radio) hide bullets, light gray inactive, darker selected */
102
  #f1-example-radio { border-top: 1px solid var(--f1-border); padding: 8px 10px 10px 10px; margin: 0 8px 8px; }
103
  #f1-example-radio input[type="radio"] { display: none; }
104
  #f1-example-radio .wrap { display: flex; gap: 8px; flex-wrap: wrap; justify-content: flex-start; }
 
107
  border-radius: 999px;
108
  padding: 6px 12px;
109
  cursor: pointer;
110
+ background: #f3f4f6; /* inactive pills */
111
  }
112
  #f1-example-radio input[type="radio"]:checked + span {
113
+ background: #e5e7eb; /* active pill */
114
  border-color: var(--f1-border);
115
  border-radius: 999px;
116
  padding: 6px 12px;
117
  }
118
 
119
+ /* Gradio video styling (centered, rounded, subtle shadow) */
120
+ .f1-video video { width: 100%; max-width: 42rem; display: block; margin: 0 auto; border-radius: 12px; box-shadow: 0 2px 8px rgba(0,0,0,0.08); }
121
+
122
+ /* Leaderboard: cap width at 800px and center; keep your chain rule */
123
+ #formulaone-leaderboard-tab-table { max-width: 800px; margin-left: auto; margin-right: auto; }
124
  #formulaone-leaderboard-tab-table .column .row .column { min-width: 80% !important; } /* exact chain rule */
125
  #formulaone-leaderboard-tab-table .row, #formulaone-leaderboard-tab-table .column { width: 100% !important; max-width: 100% !important; }
126
  #formulaone-leaderboard-tab-table [data-testid="dropdown"], #formulaone-leaderboard-tab-table input[type="text"] { width: 100% !important; }
 
133
  box-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.04) !important;
134
  }
135
  #hf-login-btn:hover, #hf-login-btn button:hover, button[data-testid="login-button"]:hover, [data-testid="login-button"] button:hover, div[data-testid="login-button"] > button:hover { background: #f9fafb !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  """
137
 
138
  get_window_url_params = """