galb-dai commited on
Commit
ad81b69
·
1 Parent(s): 1293986
Files changed (3) hide show
  1. app.py +15 -6
  2. src/about.py +9 -6
  3. src/display/css_html_js.py +9 -8
app.py CHANGED
@@ -10,10 +10,15 @@ from huggingface_hub import whoami
10
  from src.about import WHAT_IS_F1_HTML_AFTER_TIER1FIG_TAIL # tail after Tier1 fig
11
  from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
12
  from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG # text between warmup/tier1 figs
13
- from src.about import WHAT_IS_F1_HTML_BOTTOM_A # up to before first figure
14
- from src.about import WHAT_IS_F1_HTML_BOTTOM_B # after first figure, before video
15
  from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
16
- from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, WHAT_IS_F1_HTML_TOP
 
 
 
 
 
17
  from src.datamodel.data import F1Data
18
  from src.display.css_html_js import custom_css
19
  from src.display.formatting import styled_error
@@ -213,7 +218,11 @@ with blocks:
213
  # Top content and categories table
214
  gr.HTML(WHAT_IS_F1_HTML_TOP)
215
 
216
- # Examples (centered; inner width 730px via CSS)
 
 
 
 
217
  with gr.Group(elem_id="f1-examples", elem_classes=["f1-container"]):
218
  gr.HTML(
219
  '<div class="f1-tabs-body"><div class="f1-examples-chip">Examples of FormulaOne problems</div></div>'
@@ -262,8 +271,8 @@ with blocks:
262
  )
263
  tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
264
 
265
- # Bottom content pieces interleaved with real Gradio media
266
- gr.HTML(WHAT_IS_F1_HTML_BOTTOM_A)
267
 
268
  # Figure 1: bag_modifications.png (use gr.Image)
269
  gr.Image(
 
10
  from src.about import WHAT_IS_F1_HTML_AFTER_TIER1FIG_TAIL # tail after Tier1 fig
11
  from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
12
  from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG # text between warmup/tier1 figs
13
+ from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure
14
+ from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading
15
  from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
16
+ from src.about import ( # ⬅️ split to insert the tabs right after the heading
17
+ CITATION_BUTTON_LABEL,
18
+ CITATION_BUTTON_TEXT,
19
+ EVALUATION_QUEUE_TEXT,
20
+ WHAT_IS_F1_HTML_TOP,
21
+ )
22
  from src.datamodel.data import F1Data
23
  from src.display.css_html_js import custom_css
24
  from src.display.formatting import styled_error
 
218
  # Top content and categories table
219
  gr.HTML(WHAT_IS_F1_HTML_TOP)
220
 
221
+ # ---- Bottom content pieces interleaved with real Gradio media ----
222
+ # Up to and including the "An Infinite Well" heading
223
+ gr.HTML(WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS)
224
+
225
+ # ===== Examples (now right after the “Infinite Well” heading; inner width 710px via CSS) =====
226
  with gr.Group(elem_id="f1-examples", elem_classes=["f1-container"]):
227
  gr.HTML(
228
  '<div class="f1-tabs-body"><div class="f1-examples-chip">Examples of FormulaOne problems</div></div>'
 
271
  )
272
  tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
273
 
274
+ # Continue the text after the heading (before the first figure)
275
+ gr.HTML(WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS)
276
 
277
  # Figure 1: bag_modifications.png (use gr.Image)
278
  gr.Image(
src/about.py CHANGED
@@ -45,8 +45,8 @@ WHAT_IS_F1_HTML_TOP = f"""
45
 
46
  # Bottom is split so we can insert real Gradio media (images/video) from app.py.
47
 
48
- # Up to before the first figure (bag_modifications.png)
49
- WHAT_IS_F1_HTML_BOTTOM_A = """
50
  <div class="f1-container">
51
  <section>
52
  <p class="mb-4 f1-p">The latter category is incredibly demanding, requiring resolution of many points of uncertainty, and involving an array of reasoning steps, including topological and geometric insight, knowledge of mathematical domains such as extremal graph theory and logic, combinatorial considerations, precise implementation, and more.</p>
@@ -55,6 +55,10 @@ WHAT_IS_F1_HTML_BOTTOM_A = """
55
 
56
  <section>
57
  <h2 class="f1-h2">An “Infinite Well” of Problems</h2>
 
 
 
 
58
  <p class="mb-4 f1-p">While the problems are often natural to state, their solutions are far from obvious. The solvability of this vast class of problems is guaranteed by an algorithmic <strong>meta-theorem</strong> due to <a href="https://en.wikipedia.org/wiki/Courcelle%27s_theorem" target="_blank" rel="noopener noreferrer" class="f1-a">Courcelle</a>, which broadly states:</p>
59
  <blockquote class="my-6 f1-blockquote">
60
  “For every sufficiently tree-like graph, any problem definable in an expressive formal logic — Monadic Second-Order (MSO) logic — can be solved by a dynamic programming algorithm that operates in time linear in the order of the graph.”
@@ -76,7 +80,7 @@ WHAT_IS_F1_HTML_AFTER_VIDEO = """
76
 
77
  <section id="evaluation">
78
  <h2 class="f1-h2">Evaluation</h2>
79
- <p class="mb-4 f1-p">To give models the best possible chance of success, we provide a generous few-shot prompt that covers a broad array of the ideas and techniques involved in solving these problems. All models were evaluated using their highest available reasoning settings and with the maximum context length permitted.</p>
80
  <p class="mb-4 f1-p">Each submitted solution is subjected to a rigorous and automated <a href="https://arxiv.org/pdf/2507.13337#section.4" target="_blank" rel="noopener noreferrer" class="f1-a">test suite</a> that measures three key aspects of its validity:</p>
81
  <ul class="list-disc list-inside space-y-2 mb-6">
82
  <li class="f1-li"><strong>Correctness:</strong> The output of the submitted algorithm must be correct on all graphs.</li>
@@ -84,14 +88,13 @@ WHAT_IS_F1_HTML_AFTER_VIDEO = """
84
  <li class="f1-li"><strong>Efficiency:</strong> The solution must be truly <a href="https://en.wikipedia.org/wiki/Parameterized_complexity" target="_blank" rel="noopener noreferrer" class="f1-a">fixed-parameter linear</a>.</li>
85
  </ul>
86
  <p class="mb-4 f1-p">To support research and encourage community contributions, the <code>FormulaOne-Warmup</code> dataset is released as a public resource for training and fine-tuning models. The complete test suite for all 100 Warmup problems is available, alongside a standalone evaluation environment, in our <a href="https://github.com/double-ai/formulaone-dataset/tree/main" target="_blank" rel="noopener noreferrer" class="f1-a">GitHub repository</a>.</p>
87
- <p class="f1-p">To maintain the integrity of the core benchmark, only a minimal subset of tests is released for the Tier 1 and Tier 2 problems. Any solutions submitted for evaluation against our benchmark are evaluated against the comprehensive withheld test-suite.</p>
88
  """
89
 
90
- # *** THIS WAS MISSING BEFORE ***
91
  # Evaluation: begins the "Model Accuracy" subsection and the Warmup paragraph, up to (but not including) the Warmup figure.
92
  WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
93
  <h2 class="f1-h2">Model Accuracy</h2>
94
- <p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks.</p>
95
  <!-- warmup_performance figure inserted via gr.Image in app.py -->
96
  """
97
 
 
45
 
46
  # Bottom is split so we can insert real Gradio media (images/video) from app.py.
47
 
48
+ # Up to (and including) the "An Infinite Well" heading — tabs are inserted immediately after
49
+ WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS = """
50
  <div class="f1-container">
51
  <section>
52
  <p class="mb-4 f1-p">The latter category is incredibly demanding, requiring resolution of many points of uncertainty, and involving an array of reasoning steps, including topological and geometric insight, knowledge of mathematical domains such as extremal graph theory and logic, combinatorial considerations, precise implementation, and more.</p>
 
55
 
56
  <section>
57
  <h2 class="f1-h2">An “Infinite Well” of Problems</h2>
58
+ """
59
+
60
+ # After the heading (and after the tabbed examples), before the first figure
61
+ WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS = """
62
  <p class="mb-4 f1-p">While the problems are often natural to state, their solutions are far from obvious. The solvability of this vast class of problems is guaranteed by an algorithmic <strong>meta-theorem</strong> due to <a href="https://en.wikipedia.org/wiki/Courcelle%27s_theorem" target="_blank" rel="noopener noreferrer" class="f1-a">Courcelle</a>, which broadly states:</p>
63
  <blockquote class="my-6 f1-blockquote">
64
  “For every sufficiently tree-like graph, any problem definable in an expressive formal logic — Monadic Second-Order (MSO) logic — can be solved by a dynamic programming algorithm that operates in time linear in the order of the graph.”
 
80
 
81
  <section id="evaluation">
82
  <h2 class="f1-h2">Evaluation</h2>
83
+ <p class="mb-4 f1-p">All models were evaluated using their highest available reasoning settings and with the maximum context length permitted. To give models the best possible chance of success, we provide a generous few-shot prompt that covers a broad array of the ideas and techniques involved in solving these problems.</p>
84
  <p class="mb-4 f1-p">Each submitted solution is subjected to a rigorous and automated <a href="https://arxiv.org/pdf/2507.13337#section.4" target="_blank" rel="noopener noreferrer" class="f1-a">test suite</a> that measures three key aspects of its validity:</p>
85
  <ul class="list-disc list-inside space-y-2 mb-6">
86
  <li class="f1-li"><strong>Correctness:</strong> The output of the submitted algorithm must be correct on all graphs.</li>
 
88
  <li class="f1-li"><strong>Efficiency:</strong> The solution must be truly <a href="https://en.wikipedia.org/wiki/Parameterized_complexity" target="_blank" rel="noopener noreferrer" class="f1-a">fixed-parameter linear</a>.</li>
89
  </ul>
90
  <p class="mb-4 f1-p">To support research and encourage community contributions, the <code>FormulaOne-Warmup</code> dataset is released as a public resource for training and fine-tuning models. The complete test suite for all 100 Warmup problems is available, alongside a standalone evaluation environment, in our <a href="https://github.com/double-ai/formulaone-dataset/tree/main" target="_blank" rel="noopener noreferrer" class="f1-a">GitHub repository</a>.</p>
91
+ <p class="f1-p">To maintain the integrity of the core benchmark, only a minimal subset of tests is released for the Tier 1 and Tier 2 problems. Solutions submitted for evaluation on our benchmark are evaluated against a withheld comprehensive test-suite.</p>
92
  """
93
 
 
94
  # Evaluation: begins the "Model Accuracy" subsection and the Warmup paragraph, up to (but not including) the Warmup figure.
95
  WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
96
  <h2 class="f1-h2">Model Accuracy</h2>
97
+ <p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks, in other words, the tasks are squarely in-distribution.</p>
98
  <!-- warmup_performance figure inserted via gr.Image in app.py -->
99
  """
100
 
src/display/css_html_js.py CHANGED
@@ -15,7 +15,8 @@ custom_css = """
15
  .f1-container { max-width: 800px; margin: 0 auto; padding: 0 16px; }
16
  .markdown-text { font-size: 16px !important; max-width: 800px; margin: 0 auto; }
17
  #what-is-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
18
- #f1-examples { max-width: 730px; margin: 0 auto; } /* requested 730px */
 
19
 
20
  /* Text */
21
  .f1-p, .f1-li { line-height: 1.75; color: #374151; text-wrap: pretty; overflow-wrap: break-word; hyphens: auto; }
@@ -29,20 +30,20 @@ custom_css = """
29
  .f1-a:hover { text-decoration: underline; }
30
 
31
  /* Captions (centered + dark) */
32
- .f1-figcaption { margin-top: 8px; font-size: 0.875rem; color: #111827; text-align: center; }
33
- .f1-figcaption-video { margin-top: 6px; } /* a bit tighter under the video */
34
 
35
  /* Problem name — force center from first render; code bg color #f9fafb */
36
- #f1-examples .f1-problem-markdown .markdown p { text-align: center !important; margin: 0 0 8px 0; }
37
  .f1-problem-markdown p code,
38
- #f1-examples .f1-problem-markdown .markdown p code {
39
- display: inline-block; background: #f9fafb !important; padding: 2px 8px; border-radius: 6px;
40
  }
41
 
42
  .f1-problem-markdown > * { margin-top: 7px; margin-bottom: 7px; }
43
 
44
- /* Problem body: padding + line-height inside Markdown + small space before tabs */
45
- #f1-examples .f1-problem-markdown .markdown { background: var(--f1-bg-muted); border: 1px solid var(--f1-border); border-radius: 8px; padding: 18px; margin: 10px 14px 10px 14px; line-height: 1.75; }
46
  #f1-examples .f1-problem-markdown .markdown p { margin: 0.35rem 0; }
47
 
48
  /* Pills (Radio) — compact spacing at bottom */
 
15
  .f1-container { max-width: 800px; margin: 0 auto; padding: 0 16px; }
16
  .markdown-text { font-size: 16px !important; max-width: 800px; margin: 0 auto; }
17
  #what-is-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
18
+ /* requested 710px */
19
+ #f1-examples { max-width: 710px; margin: 0 auto; }
20
 
21
  /* Text */
22
  .f1-p, .f1-li { line-height: 1.75; color: #374151; text-wrap: pretty; overflow-wrap: break-word; hyphens: auto; }
 
30
  .f1-a:hover { text-decoration: underline; }
31
 
32
  /* Captions (centered + dark) */
33
+ .f1-figcaption { margin-top: 4px; font-size: 0.875rem; color: #111827; text-align: center; }
34
+ .f1-figcaption-video { margin-top: 2px; } /* tighter under the video */
35
 
36
  /* Problem name — force center from first render; code bg color #f9fafb */
37
+ #f1-examples .f1-problem-markdown .markdown p:first-child { text-align: center !important; margin: 0 0 8px 0; }
38
  .f1-problem-markdown p code,
39
+ #f1-examples .f1-problem-markdown .markdown p:first-child code {
40
+ display: inline-block; background: #f9fafb !important; padding: 2px 8px; border-radius: 6px; margin-left: auto; margin-right: auto;
41
  }
42
 
43
  .f1-problem-markdown > * { margin-top: 7px; margin-bottom: 7px; }
44
 
45
+ /* Problem body: padding + comfortable line-height inside Markdown + small space before tabs */
46
+ #f1-examples .f1-problem-markdown .markdown { background: var(--f1-bg-muted); border: 1px solid var(--f1-border); border-radius: 8px; padding: 18px; margin: 10px 14px 10px 14px; line-height: 1.8; }
47
  #f1-examples .f1-problem-markdown .markdown p { margin: 0.35rem 0; }
48
 
49
  /* Pills (Radio) — compact spacing at bottom */