Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update.
Browse files- app.py +3 -4
- src/about.py +3 -5
- src/display/css_html_js.py +37 -25
app.py
CHANGED
@@ -214,9 +214,8 @@ with blocks:
|
|
214 |
# Top content
|
215 |
gr.HTML(WHAT_IS_F1_HTML_TOP)
|
216 |
|
217 |
-
# Examples (kept inside a centered
|
218 |
with gr.Group(elem_id="f1-examples", elem_classes=["f1-container"]):
|
219 |
-
# centered pill title
|
220 |
gr.HTML(
|
221 |
'<div class="f1-tabs-body"><div class="f1-examples-chip">Examples of FormulaOne problems</div></div>'
|
222 |
)
|
@@ -259,7 +258,7 @@ with blocks:
|
|
259 |
choices=["Warmup", "Tier 1", "Tier 2"],
|
260 |
value="Warmup",
|
261 |
label=None,
|
262 |
-
show_label=False,
|
263 |
elem_id="f1-example-radio",
|
264 |
)
|
265 |
tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
|
@@ -269,7 +268,7 @@ with blocks:
|
|
269 |
|
270 |
# Embed video via Gradio so it renders reliably
|
271 |
gr.Video(
|
272 |
-
|
273 |
autoplay=True,
|
274 |
loop=True,
|
275 |
show_label=False,
|
|
|
214 |
# Top content
|
215 |
gr.HTML(WHAT_IS_F1_HTML_TOP)
|
216 |
|
217 |
+
# Examples (kept inside a centered container; content itself 730px wide)
|
218 |
with gr.Group(elem_id="f1-examples", elem_classes=["f1-container"]):
|
|
|
219 |
gr.HTML(
|
220 |
'<div class="f1-tabs-body"><div class="f1-examples-chip">Examples of FormulaOne problems</div></div>'
|
221 |
)
|
|
|
258 |
choices=["Warmup", "Tier 1", "Tier 2"],
|
259 |
value="Warmup",
|
260 |
label=None,
|
261 |
+
show_label=False,
|
262 |
elem_id="f1-example-radio",
|
263 |
)
|
264 |
tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
|
|
|
268 |
|
269 |
# Embed video via Gradio so it renders reliably
|
270 |
gr.Video(
|
271 |
+
"assets/DominatingSetAnimation.mp4",
|
272 |
autoplay=True,
|
273 |
loop=True,
|
274 |
show_label=False,
|
src/about.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
# The paper's URL for linking
|
2 |
PAPER_URL = "https://arxiv.org/abs/2507.13337"
|
3 |
|
4 |
-
# Top chunk — same as before (div grid table), unchanged except for minor structure.
|
5 |
WHAT_IS_F1_HTML_TOP = f"""
|
6 |
<div class="f1-container">
|
7 |
<header class="text-center mb-12">
|
@@ -15,7 +14,6 @@ WHAT_IS_F1_HTML_TOP = f"""
|
|
15 |
|
16 |
<p class="mb-4 f1-p"><strong>FormulaOne</strong> consists of 220 novel dynamic programming problems over graphs. The problems are organised into three categories, ranging from moderate difficulty and all the way up to research-level.</p>
|
17 |
|
18 |
-
<!-- Clean, centered "table" using a single grid -->
|
19 |
<div class="f1-grid-wrap" role="region" aria-label="FormulaOne categories">
|
20 |
<div class="f1-grid-table" role="table">
|
21 |
<div class="f1-grid-row f1-grid-head" role="row">
|
@@ -44,7 +42,6 @@ WHAT_IS_F1_HTML_TOP = f"""
|
|
44 |
</div>
|
45 |
"""
|
46 |
|
47 |
-
# Bottom content split around the video so we can embed it with gr.Video in app.py
|
48 |
WHAT_IS_F1_HTML_BOTTOM_TOP = """
|
49 |
<div class="f1-container">
|
50 |
<section>
|
@@ -86,12 +83,12 @@ WHAT_IS_F1_HTML_BOTTOM_TAIL = """
|
|
86 |
<h2 class="f1-h2">Model Accuracy</h2>
|
87 |
<p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks.</p>
|
88 |
<figure class="f1-figure">
|
89 |
-
<img src="/file=assets/warmup_performance.png" alt="Plot showing model performance on FormulaOne-Warmup" class="max-w-full md:max-w-2xl
|
90 |
<figcaption class="f1-figcaption">Performance of frontier models on the FormulaOne-Warmup dataset.</figcaption>
|
91 |
</figure>
|
92 |
<p class="mb-4 f1-p">However, as the reasoning depth increases in <strong>Tier 1</strong>, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
|
93 |
<figure class="f1-figure">
|
94 |
-
<img src="/file=assets/tier1_performance.png" alt="Plot showing model performance on Tier 1" class="max-w-full md:max-w-2xl
|
95 |
<figcaption class="f1-figcaption">Performance of frontier reasoning models on the FormulaOne dataset.</figcaption>
|
96 |
</figure>
|
97 |
<p class="f1-p">This trend culminates in <strong>Tier 2</strong>, where the difficulty is characteristic of exploratory research problems. On this set of 20 problems, no current frontier model solves even a single one. This result starkly illustrates the gap that remains between high performance on existing benchmarks and the deep algorithmic reasoning required for truly complex problems.</p>
|
@@ -99,6 +96,7 @@ WHAT_IS_F1_HTML_BOTTOM_TAIL = """
|
|
99 |
</div>
|
100 |
"""
|
101 |
|
|
|
102 |
EVALUATION_QUEUE_TEXT = """
|
103 |
## Submitting to the FormulaOne Leaderboard
|
104 |
|
|
|
1 |
# The paper's URL for linking
|
2 |
PAPER_URL = "https://arxiv.org/abs/2507.13337"
|
3 |
|
|
|
4 |
WHAT_IS_F1_HTML_TOP = f"""
|
5 |
<div class="f1-container">
|
6 |
<header class="text-center mb-12">
|
|
|
14 |
|
15 |
<p class="mb-4 f1-p"><strong>FormulaOne</strong> consists of 220 novel dynamic programming problems over graphs. The problems are organised into three categories, ranging from moderate difficulty and all the way up to research-level.</p>
|
16 |
|
|
|
17 |
<div class="f1-grid-wrap" role="region" aria-label="FormulaOne categories">
|
18 |
<div class="f1-grid-table" role="table">
|
19 |
<div class="f1-grid-row f1-grid-head" role="row">
|
|
|
42 |
</div>
|
43 |
"""
|
44 |
|
|
|
45 |
WHAT_IS_F1_HTML_BOTTOM_TOP = """
|
46 |
<div class="f1-container">
|
47 |
<section>
|
|
|
83 |
<h2 class="f1-h2">Model Accuracy</h2>
|
84 |
<p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks.</p>
|
85 |
<figure class="f1-figure">
|
86 |
+
<img src="/file=assets/warmup_performance.png" alt="Plot showing model performance on FormulaOne-Warmup" class="max-w-full md:max-w-2xl mx-auto rounded-lg shadow-md">
|
87 |
<figcaption class="f1-figcaption">Performance of frontier models on the FormulaOne-Warmup dataset.</figcaption>
|
88 |
</figure>
|
89 |
<p class="mb-4 f1-p">However, as the reasoning depth increases in <strong>Tier 1</strong>, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
|
90 |
<figure class="f1-figure">
|
91 |
+
<img src="/file=assets/tier1_performance.png" alt="Plot showing model performance on Tier 1" class="max-w-full md:max-w-2xl mx-auto rounded-lg shadow-md">
|
92 |
<figcaption class="f1-figcaption">Performance of frontier reasoning models on the FormulaOne dataset.</figcaption>
|
93 |
</figure>
|
94 |
<p class="f1-p">This trend culminates in <strong>Tier 2</strong>, where the difficulty is characteristic of exploratory research problems. On this set of 20 problems, no current frontier model solves even a single one. This result starkly illustrates the gap that remains between high performance on existing benchmarks and the deep algorithmic reasoning required for truly complex problems.</p>
|
|
|
96 |
</div>
|
97 |
"""
|
98 |
|
99 |
+
|
100 |
EVALUATION_QUEUE_TEXT = """
|
101 |
## Submitting to the FormulaOne Leaderboard
|
102 |
|
src/display/css_html_js.py
CHANGED
@@ -6,7 +6,7 @@ custom_css = """
|
|
6 |
--f1-bg: #ffffff;
|
7 |
--f1-bg-muted: #f9fafb;
|
8 |
|
9 |
-
/* Disable Gradio
|
10 |
--block-border-color: transparent !important;
|
11 |
--background-fill-primary: transparent !important;
|
12 |
}
|
@@ -15,7 +15,9 @@ custom_css = """
|
|
15 |
.f1-container { max-width: 800px; margin: 0 auto; padding: 0 16px; }
|
16 |
.markdown-text { font-size: 16px !important; max-width: 800px; margin: 0 auto; }
|
17 |
#what-is-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
|
18 |
-
|
|
|
|
|
19 |
|
20 |
/* Body text */
|
21 |
.f1-p, .f1-li {
|
@@ -45,75 +47,81 @@ custom_css = """
|
|
45 |
|
46 |
/* Captions, problem name */
|
47 |
.f1-figcaption { margin-top: 0.5rem; font-size: 0.875rem; color: #111827; text-align: center; }
|
48 |
-
.f1-problem-name { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; font-weight: 600; text-align: center; }
|
49 |
|
50 |
-
/*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
.f1-grid-wrap { text-align: center; margin: 10px auto 8px auto; }
|
52 |
.f1-grid-table {
|
53 |
-
display: inline-grid;
|
54 |
grid-template-columns: max-content max-content minmax(360px, auto);
|
55 |
-
border: 1px solid var(--f1-border);
|
56 |
background: var(--f1-bg);
|
57 |
border-radius: 8px;
|
58 |
overflow: hidden;
|
59 |
}
|
60 |
-
.f1-grid-row { display: contents; }
|
61 |
.f1-grid-cell {
|
62 |
padding: 8px 12px;
|
63 |
text-align: left;
|
64 |
border-left: 1px solid var(--f1-border);
|
65 |
border-top: 1px solid var(--f1-border);
|
66 |
}
|
67 |
-
.f1-grid-cell:nth-child(3n+1) { border-left: none; }
|
68 |
-
.f1-grid-head .f1-grid-cell {
|
69 |
-
font-weight: 600;
|
70 |
-
text-align: center;
|
71 |
-
border-top: none; /* top edge of the grid */
|
72 |
-
}
|
73 |
|
74 |
/* ===== Examples block ===== */
|
75 |
#f1-examples {
|
76 |
-
background: var(--f1-bg-muted);
|
77 |
border: 1px solid var(--f1-border);
|
78 |
border-radius: 10px;
|
79 |
box-shadow: 0 1px 2px rgba(0,0,0,0.04);
|
80 |
margin-bottom: 12px;
|
81 |
}
|
|
|
|
|
82 |
.f1-tabs-body { padding-top: 12px; text-align: center; }
|
83 |
.f1-examples-chip {
|
84 |
display: inline-block;
|
85 |
-
background: #e5e7eb;
|
86 |
color: #111827;
|
87 |
padding: 6px 12px;
|
88 |
border-radius: 999px;
|
89 |
font-weight: 700;
|
90 |
}
|
91 |
|
92 |
-
/* Problem content: consistent background + padding
|
93 |
#f1-examples .f1-problem-markdown .markdown {
|
94 |
background: var(--f1-bg-muted);
|
95 |
border: 1px solid var(--f1-border);
|
96 |
border-radius: 8px;
|
97 |
padding: 18px;
|
98 |
-
margin: 10px 14px
|
99 |
}
|
100 |
|
101 |
-
/* Pills (Radio) — hide bullets
|
102 |
-
#f1-example-radio { border-top: 1px solid var(--f1-border); padding: 8px 10px
|
103 |
#f1-example-radio input[type="radio"] { display: none; }
|
104 |
-
#f1-example-radio .wrap { display: flex; gap:
|
105 |
#f1-example-radio label {
|
106 |
border: 1px solid var(--f1-border);
|
107 |
border-radius: 999px;
|
108 |
-
padding: 6px
|
109 |
cursor: pointer;
|
110 |
-
background: #f3f4f6;
|
111 |
}
|
112 |
#f1-example-radio input[type="radio"]:checked + span {
|
113 |
-
background: #e5e7eb;
|
114 |
border-color: var(--f1-border);
|
115 |
border-radius: 999px;
|
116 |
-
padding: 6px
|
117 |
}
|
118 |
|
119 |
/* Gradio video styling (centered, rounded, subtle shadow) */
|
@@ -125,12 +133,16 @@ custom_css = """
|
|
125 |
#formulaone-leaderboard-tab-table .row, #formulaone-leaderboard-tab-table .column { width: 100% !important; max-width: 100% !important; }
|
126 |
#formulaone-leaderboard-tab-table [data-testid="dropdown"], #formulaone-leaderboard-tab-table input[type="text"] { width: 100% !important; }
|
127 |
|
128 |
-
/* Login button: force light */
|
129 |
#hf-login-btn, #hf-login-btn button, button[data-testid="login-button"], [data-testid="login-button"] button, div[data-testid="login-button"] > button {
|
130 |
background: #ffffff !important;
|
131 |
color: #1f2937 !important;
|
132 |
border: 1px solid var(--f1-border) !important;
|
133 |
box-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.04) !important;
|
|
|
|
|
|
|
|
|
134 |
}
|
135 |
#hf-login-btn:hover, #hf-login-btn button:hover, button[data-testid="login-button"]:hover, [data-testid="login-button"] button:hover, div[data-testid="login-button"] > button:hover { background: #f9fafb !important; }
|
136 |
"""
|
|
|
6 |
--f1-bg: #ffffff;
|
7 |
--f1-bg-muted: #f9fafb;
|
8 |
|
9 |
+
/* Disable Gradio chrome that was bleeding into the examples block */
|
10 |
--block-border-color: transparent !important;
|
11 |
--background-fill-primary: transparent !important;
|
12 |
}
|
|
|
15 |
.f1-container { max-width: 800px; margin: 0 auto; padding: 0 16px; }
|
16 |
.markdown-text { font-size: 16px !important; max-width: 800px; margin: 0 auto; }
|
17 |
#what-is-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
|
18 |
+
|
19 |
+
/* Examples wrapper stays centered, content width 730px */
|
20 |
+
#f1-examples { max-width: 730px; margin: 0 auto; }
|
21 |
|
22 |
/* Body text */
|
23 |
.f1-p, .f1-li {
|
|
|
47 |
|
48 |
/* Captions, problem name */
|
49 |
.f1-figcaption { margin-top: 0.5rem; font-size: 0.875rem; color: #111827; text-align: center; }
|
|
|
50 |
|
51 |
+
/* Problem name: guarantee center on first render; force bg color for <code> */
|
52 |
+
#f1-examples .f1-problem-markdown .markdown p.f1-problem-name { text-align: center !important; margin: 0 0 8px 0; }
|
53 |
+
#f1-examples .f1-problem-markdown .markdown p.f1-problem-name code {
|
54 |
+
display: inline-block;
|
55 |
+
background: #f9fafb !important; /* requested override; bypasses --code-background-fill */
|
56 |
+
padding: 2px 8px;
|
57 |
+
border-radius: 6px;
|
58 |
+
}
|
59 |
+
|
60 |
+
/* ===== Clean "table" (column-consistent widths; compact first two, flexible third) ===== */
|
61 |
.f1-grid-wrap { text-align: center; margin: 10px auto 8px auto; }
|
62 |
.f1-grid-table {
|
63 |
+
display: inline-grid;
|
64 |
grid-template-columns: max-content max-content minmax(360px, auto);
|
65 |
+
border: 1px solid var(--f1-border);
|
66 |
background: var(--f1-bg);
|
67 |
border-radius: 8px;
|
68 |
overflow: hidden;
|
69 |
}
|
70 |
+
.f1-grid-row { display: contents; }
|
71 |
.f1-grid-cell {
|
72 |
padding: 8px 12px;
|
73 |
text-align: left;
|
74 |
border-left: 1px solid var(--f1-border);
|
75 |
border-top: 1px solid var(--f1-border);
|
76 |
}
|
77 |
+
.f1-grid-cell:nth-child(3n+1) { border-left: none; }
|
78 |
+
.f1-grid-head .f1-grid-cell { font-weight: 600; text-align: center; border-top: none; }
|
|
|
|
|
|
|
|
|
79 |
|
80 |
/* ===== Examples block ===== */
|
81 |
#f1-examples {
|
82 |
+
background: var(--f1-bg-muted); /* #f9fafb */
|
83 |
border: 1px solid var(--f1-border);
|
84 |
border-radius: 10px;
|
85 |
box-shadow: 0 1px 2px rgba(0,0,0,0.04);
|
86 |
margin-bottom: 12px;
|
87 |
}
|
88 |
+
#f1-examples .form { background: transparent !important; } /* no background on inner .form */
|
89 |
+
|
90 |
.f1-tabs-body { padding-top: 12px; text-align: center; }
|
91 |
.f1-examples-chip {
|
92 |
display: inline-block;
|
93 |
+
background: #e5e7eb; /* slightly darker than #f9fafb */
|
94 |
color: #111827;
|
95 |
padding: 6px 12px;
|
96 |
border-radius: 999px;
|
97 |
font-weight: 700;
|
98 |
}
|
99 |
|
100 |
+
/* Problem content: consistent background + padding; modest space before tabs */
|
101 |
#f1-examples .f1-problem-markdown .markdown {
|
102 |
background: var(--f1-bg-muted);
|
103 |
border: 1px solid var(--f1-border);
|
104 |
border-radius: 8px;
|
105 |
padding: 18px;
|
106 |
+
margin: 10px 14px 10px 14px; /* slightly reduced bottom margin */
|
107 |
}
|
108 |
|
109 |
+
/* Pills (Radio) — hide bullets; light gray inactive; darker selected; minimal bottom padding */
|
110 |
+
#f1-example-radio { border-top: 1px solid var(--f1-border); padding: 8px 10px 4px 10px; margin: 0 8px 4px; }
|
111 |
#f1-example-radio input[type="radio"] { display: none; }
|
112 |
+
#f1-example-radio .wrap { display: flex; gap: 6px; flex-wrap: wrap; justify-content: flex-start; }
|
113 |
#f1-example-radio label {
|
114 |
border: 1px solid var(--f1-border);
|
115 |
border-radius: 999px;
|
116 |
+
padding: 6px 10px;
|
117 |
cursor: pointer;
|
118 |
+
background: #f3f4f6; /* inactive pills */
|
119 |
}
|
120 |
#f1-example-radio input[type="radio"]:checked + span {
|
121 |
+
background: #e5e7eb; /* active pill */
|
122 |
border-color: var(--f1-border);
|
123 |
border-radius: 999px;
|
124 |
+
padding: 6px 10px;
|
125 |
}
|
126 |
|
127 |
/* Gradio video styling (centered, rounded, subtle shadow) */
|
|
|
133 |
#formulaone-leaderboard-tab-table .row, #formulaone-leaderboard-tab-table .column { width: 100% !important; max-width: 100% !important; }
|
134 |
#formulaone-leaderboard-tab-table [data-testid="dropdown"], #formulaone-leaderboard-tab-table input[type="text"] { width: 100% !important; }
|
135 |
|
136 |
+
/* Login button: force light + width cap at 800 and center */
|
137 |
#hf-login-btn, #hf-login-btn button, button[data-testid="login-button"], [data-testid="login-button"] button, div[data-testid="login-button"] > button {
|
138 |
background: #ffffff !important;
|
139 |
color: #1f2937 !important;
|
140 |
border: 1px solid var(--f1-border) !important;
|
141 |
box-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.04) !important;
|
142 |
+
max-width: 800px !important;
|
143 |
+
width: 100% !important;
|
144 |
+
margin-left: auto !important;
|
145 |
+
margin-right: auto !important;
|
146 |
}
|
147 |
#hf-login-btn:hover, #hf-login-btn button:hover, button[data-testid="login-button"]:hover, [data-testid="login-button"] button:hover, div[data-testid="login-button"] > button:hover { background: #f9fafb !important; }
|
148 |
"""
|