Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Updates.
Browse files- app.py +15 -6
- src/about.py +9 -6
- src/display/css_html_js.py +9 -8
app.py
CHANGED
@@ -10,10 +10,15 @@ from huggingface_hub import whoami
|
|
10 |
from src.about import WHAT_IS_F1_HTML_AFTER_TIER1FIG_TAIL # tail after Tier1 fig
|
11 |
from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
|
12 |
from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG # text between warmup/tier1 figs
|
13 |
-
from src.about import
|
14 |
-
from src.about import
|
15 |
from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
|
16 |
-
from src.about import
|
|
|
|
|
|
|
|
|
|
|
17 |
from src.datamodel.data import F1Data
|
18 |
from src.display.css_html_js import custom_css
|
19 |
from src.display.formatting import styled_error
|
@@ -213,7 +218,11 @@ with blocks:
|
|
213 |
# Top content and categories table
|
214 |
gr.HTML(WHAT_IS_F1_HTML_TOP)
|
215 |
|
216 |
-
#
|
|
|
|
|
|
|
|
|
217 |
with gr.Group(elem_id="f1-examples", elem_classes=["f1-container"]):
|
218 |
gr.HTML(
|
219 |
'<div class="f1-tabs-body"><div class="f1-examples-chip">Examples of FormulaOne problems</div></div>'
|
@@ -262,8 +271,8 @@ with blocks:
|
|
262 |
)
|
263 |
tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
|
264 |
|
265 |
-
#
|
266 |
-
gr.HTML(
|
267 |
|
268 |
# Figure 1: bag_modifications.png (use gr.Image)
|
269 |
gr.Image(
|
|
|
10 |
from src.about import WHAT_IS_F1_HTML_AFTER_TIER1FIG_TAIL # tail after Tier1 fig
|
11 |
from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
|
12 |
from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG # text between warmup/tier1 figs
|
13 |
+
from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure
|
14 |
+
from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading
|
15 |
from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
|
16 |
+
from src.about import ( # ⬅️ split to insert the tabs right after the heading
|
17 |
+
CITATION_BUTTON_LABEL,
|
18 |
+
CITATION_BUTTON_TEXT,
|
19 |
+
EVALUATION_QUEUE_TEXT,
|
20 |
+
WHAT_IS_F1_HTML_TOP,
|
21 |
+
)
|
22 |
from src.datamodel.data import F1Data
|
23 |
from src.display.css_html_js import custom_css
|
24 |
from src.display.formatting import styled_error
|
|
|
218 |
# Top content and categories table
|
219 |
gr.HTML(WHAT_IS_F1_HTML_TOP)
|
220 |
|
221 |
+
# ---- Bottom content pieces interleaved with real Gradio media ----
|
222 |
+
# Up to and including the "An Infinite Well" heading
|
223 |
+
gr.HTML(WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS)
|
224 |
+
|
225 |
+
# ===== Examples (now right after the “Infinite Well” heading; inner width 710px via CSS) =====
|
226 |
with gr.Group(elem_id="f1-examples", elem_classes=["f1-container"]):
|
227 |
gr.HTML(
|
228 |
'<div class="f1-tabs-body"><div class="f1-examples-chip">Examples of FormulaOne problems</div></div>'
|
|
|
271 |
)
|
272 |
tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
|
273 |
|
274 |
+
# Continue the text after the heading (before the first figure)
|
275 |
+
gr.HTML(WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS)
|
276 |
|
277 |
# Figure 1: bag_modifications.png (use gr.Image)
|
278 |
gr.Image(
|
src/about.py
CHANGED
@@ -45,8 +45,8 @@ WHAT_IS_F1_HTML_TOP = f"""
|
|
45 |
|
46 |
# Bottom is split so we can insert real Gradio media (images/video) from app.py.
|
47 |
|
48 |
-
# Up to
|
49 |
-
|
50 |
<div class="f1-container">
|
51 |
<section>
|
52 |
<p class="mb-4 f1-p">The latter category is incredibly demanding, requiring resolution of many points of uncertainty, and involving an array of reasoning steps, including topological and geometric insight, knowledge of mathematical domains such as extremal graph theory and logic, combinatorial considerations, precise implementation, and more.</p>
|
@@ -55,6 +55,10 @@ WHAT_IS_F1_HTML_BOTTOM_A = """
|
|
55 |
|
56 |
<section>
|
57 |
<h2 class="f1-h2">An “Infinite Well” of Problems</h2>
|
|
|
|
|
|
|
|
|
58 |
<p class="mb-4 f1-p">While the problems are often natural to state, their solutions are far from obvious. The solvability of this vast class of problems is guaranteed by an algorithmic <strong>meta-theorem</strong> due to <a href="https://en.wikipedia.org/wiki/Courcelle%27s_theorem" target="_blank" rel="noopener noreferrer" class="f1-a">Courcelle</a>, which broadly states:</p>
|
59 |
<blockquote class="my-6 f1-blockquote">
|
60 |
“For every sufficiently tree-like graph, any problem definable in an expressive formal logic — Monadic Second-Order (MSO) logic — can be solved by a dynamic programming algorithm that operates in time linear in the order of the graph.”
|
@@ -76,7 +80,7 @@ WHAT_IS_F1_HTML_AFTER_VIDEO = """
|
|
76 |
|
77 |
<section id="evaluation">
|
78 |
<h2 class="f1-h2">Evaluation</h2>
|
79 |
-
<p class="mb-4 f1-p">To give models the best possible chance of success, we provide a generous few-shot prompt that covers a broad array of the ideas and techniques involved in solving these problems
|
80 |
<p class="mb-4 f1-p">Each submitted solution is subjected to a rigorous and automated <a href="https://arxiv.org/pdf/2507.13337#section.4" target="_blank" rel="noopener noreferrer" class="f1-a">test suite</a> that measures three key aspects of its validity:</p>
|
81 |
<ul class="list-disc list-inside space-y-2 mb-6">
|
82 |
<li class="f1-li"><strong>Correctness:</strong> The output of the submitted algorithm must be correct on all graphs.</li>
|
@@ -84,14 +88,13 @@ WHAT_IS_F1_HTML_AFTER_VIDEO = """
|
|
84 |
<li class="f1-li"><strong>Efficiency:</strong> The solution must be truly <a href="https://en.wikipedia.org/wiki/Parameterized_complexity" target="_blank" rel="noopener noreferrer" class="f1-a">fixed-parameter linear</a>.</li>
|
85 |
</ul>
|
86 |
<p class="mb-4 f1-p">To support research and encourage community contributions, the <code>FormulaOne-Warmup</code> dataset is released as a public resource for training and fine-tuning models. The complete test suite for all 100 Warmup problems is available, alongside a standalone evaluation environment, in our <a href="https://github.com/double-ai/formulaone-dataset/tree/main" target="_blank" rel="noopener noreferrer" class="f1-a">GitHub repository</a>.</p>
|
87 |
-
<p class="f1-p">To maintain the integrity of the core benchmark, only a minimal subset of tests is released for the Tier 1 and Tier 2 problems.
|
88 |
"""
|
89 |
|
90 |
-
# *** THIS WAS MISSING BEFORE ***
|
91 |
# Evaluation: begins the "Model Accuracy" subsection and the Warmup paragraph, up to (but not including) the Warmup figure.
|
92 |
WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
|
93 |
<h2 class="f1-h2">Model Accuracy</h2>
|
94 |
-
<p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks.</p>
|
95 |
<!-- warmup_performance figure inserted via gr.Image in app.py -->
|
96 |
"""
|
97 |
|
|
|
45 |
|
46 |
# Bottom is split so we can insert real Gradio media (images/video) from app.py.
|
47 |
|
48 |
+
# Up to (and including) the "An Infinite Well" heading — tabs are inserted immediately after
|
49 |
+
WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS = """
|
50 |
<div class="f1-container">
|
51 |
<section>
|
52 |
<p class="mb-4 f1-p">The latter category is incredibly demanding, requiring resolution of many points of uncertainty, and involving an array of reasoning steps, including topological and geometric insight, knowledge of mathematical domains such as extremal graph theory and logic, combinatorial considerations, precise implementation, and more.</p>
|
|
|
55 |
|
56 |
<section>
|
57 |
<h2 class="f1-h2">An “Infinite Well” of Problems</h2>
|
58 |
+
"""
|
59 |
+
|
60 |
+
# After the heading (and after the tabbed examples), before the first figure
|
61 |
+
WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS = """
|
62 |
<p class="mb-4 f1-p">While the problems are often natural to state, their solutions are far from obvious. The solvability of this vast class of problems is guaranteed by an algorithmic <strong>meta-theorem</strong> due to <a href="https://en.wikipedia.org/wiki/Courcelle%27s_theorem" target="_blank" rel="noopener noreferrer" class="f1-a">Courcelle</a>, which broadly states:</p>
|
63 |
<blockquote class="my-6 f1-blockquote">
|
64 |
“For every sufficiently tree-like graph, any problem definable in an expressive formal logic — Monadic Second-Order (MSO) logic — can be solved by a dynamic programming algorithm that operates in time linear in the order of the graph.”
|
|
|
80 |
|
81 |
<section id="evaluation">
|
82 |
<h2 class="f1-h2">Evaluation</h2>
|
83 |
+
<p class="mb-4 f1-p">All models were evaluated using their highest available reasoning settings and with the maximum context length permitted. To give models the best possible chance of success, we provide a generous few-shot prompt that covers a broad array of the ideas and techniques involved in solving these problems.</p>
|
84 |
<p class="mb-4 f1-p">Each submitted solution is subjected to a rigorous and automated <a href="https://arxiv.org/pdf/2507.13337#section.4" target="_blank" rel="noopener noreferrer" class="f1-a">test suite</a> that measures three key aspects of its validity:</p>
|
85 |
<ul class="list-disc list-inside space-y-2 mb-6">
|
86 |
<li class="f1-li"><strong>Correctness:</strong> The output of the submitted algorithm must be correct on all graphs.</li>
|
|
|
88 |
<li class="f1-li"><strong>Efficiency:</strong> The solution must be truly <a href="https://en.wikipedia.org/wiki/Parameterized_complexity" target="_blank" rel="noopener noreferrer" class="f1-a">fixed-parameter linear</a>.</li>
|
89 |
</ul>
|
90 |
<p class="mb-4 f1-p">To support research and encourage community contributions, the <code>FormulaOne-Warmup</code> dataset is released as a public resource for training and fine-tuning models. The complete test suite for all 100 Warmup problems is available, alongside a standalone evaluation environment, in our <a href="https://github.com/double-ai/formulaone-dataset/tree/main" target="_blank" rel="noopener noreferrer" class="f1-a">GitHub repository</a>.</p>
|
91 |
+
<p class="f1-p">To maintain the integrity of the core benchmark, only a minimal subset of tests is released for the Tier 1 and Tier 2 problems. Solutions submitted for evaluation on our benchmark are evaluated against a withheld comprehensive test-suite.</p>
|
92 |
"""
|
93 |
|
|
|
94 |
# Evaluation: begins the "Model Accuracy" subsection and the Warmup paragraph, up to (but not including) the Warmup figure.
|
95 |
WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
|
96 |
<h2 class="f1-h2">Model Accuracy</h2>
|
97 |
+
<p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks, in other words, the tasks are squarely in-distribution.</p>
|
98 |
<!-- warmup_performance figure inserted via gr.Image in app.py -->
|
99 |
"""
|
100 |
|
src/display/css_html_js.py
CHANGED
@@ -15,7 +15,8 @@ custom_css = """
|
|
15 |
.f1-container { max-width: 800px; margin: 0 auto; padding: 0 16px; }
|
16 |
.markdown-text { font-size: 16px !important; max-width: 800px; margin: 0 auto; }
|
17 |
#what-is-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
|
18 |
-
|
|
|
19 |
|
20 |
/* Text */
|
21 |
.f1-p, .f1-li { line-height: 1.75; color: #374151; text-wrap: pretty; overflow-wrap: break-word; hyphens: auto; }
|
@@ -29,20 +30,20 @@ custom_css = """
|
|
29 |
.f1-a:hover { text-decoration: underline; }
|
30 |
|
31 |
/* Captions (centered + dark) */
|
32 |
-
.f1-figcaption { margin-top:
|
33 |
-
.f1-figcaption-video { margin-top:
|
34 |
|
35 |
/* Problem name — force center from first render; code bg color #f9fafb */
|
36 |
-
#f1-examples .f1-problem-markdown .markdown p { text-align: center !important; margin: 0 0 8px 0; }
|
37 |
.f1-problem-markdown p code,
|
38 |
-
#f1-examples .f1-problem-markdown .markdown p code {
|
39 |
-
display: inline-block; background: #f9fafb !important; padding: 2px 8px; border-radius: 6px;
|
40 |
}
|
41 |
|
42 |
.f1-problem-markdown > * { margin-top: 7px; margin-bottom: 7px; }
|
43 |
|
44 |
-
/* Problem body: padding + line-height inside Markdown + small space before tabs */
|
45 |
-
#f1-examples .f1-problem-markdown .markdown { background: var(--f1-bg-muted); border: 1px solid var(--f1-border); border-radius: 8px; padding: 18px; margin: 10px 14px 10px 14px; line-height: 1.
|
46 |
#f1-examples .f1-problem-markdown .markdown p { margin: 0.35rem 0; }
|
47 |
|
48 |
/* Pills (Radio) — compact spacing at bottom */
|
|
|
15 |
.f1-container { max-width: 800px; margin: 0 auto; padding: 0 16px; }
|
16 |
.markdown-text { font-size: 16px !important; max-width: 800px; margin: 0 auto; }
|
17 |
#what-is-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
|
18 |
+
/* requested 710px */
|
19 |
+
#f1-examples { max-width: 710px; margin: 0 auto; }
|
20 |
|
21 |
/* Text */
|
22 |
.f1-p, .f1-li { line-height: 1.75; color: #374151; text-wrap: pretty; overflow-wrap: break-word; hyphens: auto; }
|
|
|
30 |
.f1-a:hover { text-decoration: underline; }
|
31 |
|
32 |
/* Captions (centered + dark) */
|
33 |
+
.f1-figcaption { margin-top: 4px; font-size: 0.875rem; color: #111827; text-align: center; }
|
34 |
+
.f1-figcaption-video { margin-top: 2px; } /* tighter under the video */
|
35 |
|
36 |
/* Problem name — force center from first render; code bg color #f9fafb */
|
37 |
+
#f1-examples .f1-problem-markdown .markdown p:first-child { text-align: center !important; margin: 0 0 8px 0; }
|
38 |
.f1-problem-markdown p code,
|
39 |
+
#f1-examples .f1-problem-markdown .markdown p:first-child code {
|
40 |
+
display: inline-block; background: #f9fafb !important; padding: 2px 8px; border-radius: 6px; margin-left: auto; margin-right: auto;
|
41 |
}
|
42 |
|
43 |
.f1-problem-markdown > * { margin-top: 7px; margin-bottom: 7px; }
|
44 |
|
45 |
+
/* Problem body: padding + comfortable line-height inside Markdown + small space before tabs */
|
46 |
+
#f1-examples .f1-problem-markdown .markdown { background: var(--f1-bg-muted); border: 1px solid var(--f1-border); border-radius: 8px; padding: 18px; margin: 10px 14px 10px 14px; line-height: 1.8; }
|
47 |
#f1-examples .f1-problem-markdown .markdown p { margin: 0.35rem 0; }
|
48 |
|
49 |
/* Pills (Radio) — compact spacing at bottom */
|