# app.py import gradio as gr import pandas as pd from apscheduler.schedulers.background import BackgroundScheduler from gradio_leaderboard import Leaderboard, SelectColumns from huggingface_hub import whoami # HTML is split so we can inject Gradio media (images/video) where needed. from src.about import WHAT_IS_F1_HTML_AFTER_TIER1FIG_TAIL # tail after Tier1 fig from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG # text between warmup/tier1 figs from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig from src.about import ( # ⬅️ split to insert the tabs right after the heading CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, WHAT_IS_F1_HTML_BOTTOM_B, WHAT_IS_F1_HTML_TOP, ) from src.datamodel.data import F1Data from src.display.css_html_js import custom_css from src.display.formatting import styled_error from src.display.utils import AutoEvalColumn, fields from src.envs import API, CODE_PROBLEMS_REPO, REPO_ID, RESULTS_REPO, SUBMISSIONS_REPO from src.logger import get_logger from src.populate import get_leaderboard_df from src.submission.submit import add_new_solutions, fetch_user_info from src.validation.validate import MAX_INPUT_LENGTH, MIN_INPUT_LENGTH, is_submission_file_valid, is_valid logger = get_logger(__name__) ENSURE_ALL_PRESENT = False # TODO: Switch to True. SPLIT = "warmup" # TODO temp lbdb = F1Data( cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=SPLIT, ) leaderboard_df = None logger.info("Initialized LBDB") def restart_space(): logger.info("Restarting space") API.restart_space(repo_id=REPO_ID) def refresh_leaderboard_data(): """Refresh the leaderboard data from the latest results""" global leaderboard_df try: logger.info("Loading leaderboard data...") new_leaderboard_df = get_leaderboard_df(RESULTS_REPO) if new_leaderboard_df is not None: logger.info("Leaderboard data refreshed successfully") leaderboard_df = new_leaderboard_df else: logger.warning("No new leaderboard data found") return None except Exception as e: logger.error(f"Error refreshing leaderboard data: {e}") return None def init_leaderboard(dataframe: pd.DataFrame): if dataframe is None: raise ValueError("Leaderboard DataFrame is None.") lb = Leaderboard( value=dataframe, datatype=[c.type for c in fields(AutoEvalColumn)], select_columns=SelectColumns( default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden], label="Select Columns to Display:", ), search_columns=[AutoEvalColumn.system.name, AutoEvalColumn.organization.name], hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden], bool_checkboxgroup_label="Hide models", interactive=False, ) lb.col_count = (1, "fixed") return lb def add_solution_cbk( system_name: str, org: str, submission_path: str, profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, ): logger.info("Fetching user details for submission") logger.info("PROFILE %s", profile) logger.info("TOKEN %s", oauth_token) if profile is None or oauth_token is None: return styled_error("Please sign in with Hugging Face before submitting.") # Display handle and display name (may change over time) logger.info(f"User handle: {profile.username}") display_name = profile.name or profile.username logger.info(f"Display name: {display_name}") # Stable account id user_info = fetch_user_info(oauth_token) logger.info("Logged in user info: %s", user_info) stable_id = user_info.get("id") if user_info else None logger.info(f"User stable ID: {stable_id}") if not stable_id: return styled_error("Could not retrieve your stable user ID. Please try signing in again.") user_id = stable_id if not profile.username: return styled_error("Could not retrieve username. Please try signing in again.") try: # Validating the submission file. if not submission_path: return styled_error("Please upload JSONL submission file.") if not is_submission_file_valid( submission_path, is_warmup_dataset=(SPLIT == "warmup"), ): return styled_error("Failed to read JSONL submission file. Please try again later.") # Validating all user-supplied arguments. sys_type = "default" # Placeholder for val, val_name in [ (system_name, "System name"), (org, "Organisation name"), # (sys_type, "System type"), ]: if len(val) == 0: return styled_error(f"Please fill in the '{val_name}' field.") if not is_valid(val): return styled_error( f"{val_name} is invalid! Must only contain characters [a-zA-Z0-9], spaces, " + "or the special characters '-' and '.', and be of length between " + f"{MIN_INPUT_LENGTH} and {MAX_INPUT_LENGTH}." ) except Exception: logger.warning("Failed to process user submission", exc_info=True) return styled_error("An error occurred. Please try again later.") # Intentionally vague. return add_new_solutions( lbdb, profile.username, user_id, system_name, org, sys_type, # Passing the placeholder submission_path, is_warmup_dataset=(SPLIT == "warmup"), ensure_all_present=ENSURE_ALL_PRESENT, ) def gate_submission(oauth_token: gr.OAuthToken | None): """ @brief Toggles the visibility of the login box and submission panel based on the user's login status. """ logger.info("GATE TOKEN %s", oauth_token) if oauth_token is None: logger.info("GATE: NO TOKEN") return gr.update(visible=True), gr.update(visible=False) try: whoami(oauth_token.token) logger.info("GATE: TOKEN IS VALID") return gr.update(visible=False), gr.update(visible=True) except Exception: logger.info("GATE: TOKEN HAS EXPIRED") return gr.update(visible=True), gr.update(visible=False) def get_theme(): # return gr.themes.Soft( # primary_hue=gr.themes.colors.blue, # secondary_hue=gr.themes.colors.sky, # neutral_hue=gr.themes.colors.gray, # ).set( # body_background_fill="#FFFFFF", # panel_background_fill="#f3f4f6", # ) return "light" # --- Gradio-based tabs for examples (no JS in HTML) --- def _select_example_tab(choice: str): return ( gr.update(visible=(choice == "Warmup")), gr.update(visible=(choice == "Tier 1")), gr.update(visible=(choice == "Tier 2")), ) # Force light theme even if HF user prefers dark blocks = gr.Blocks( css=custom_css, theme=get_theme(), js="() => { document.body.classList.remove('dark'); document.documentElement.setAttribute('data-theme','light'); document.documentElement.setAttribute('data-color-mode','light'); }", ) with blocks: with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("What is FormulaOne", id=0, elem_id="what-is-tab"): # Top content and categories table gr.HTML(WHAT_IS_F1_HTML_TOP) # ---- Bottom content pieces interleaved with real Gradio media ---- # Up to and including the "An Infinite Well" heading gr.HTML(WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS) # ===== Examples (now right after the “Infinite Well” heading; inner width 710px via CSS) ===== with gr.Group(elem_id="f1-examples", elem_classes=["f1-container"]): gr.HTML( '
Examples of FormulaOne problems
' ) _latex = [ {"left": "$$", "right": "$$", "display": True}, {"left": "$", "right": "$", "display": False}, {"left": "\\(", "right": "\\)", "display": False}, {"left": "\\[", "right": "\\]", "display": True}, ] md_warmup = gr.Markdown( value=( '

Union-of-Paths-and-Cycles

\n' "Given a tree-like graph $G=(V,E)$ and a weight function $w:V\\to\\mathbb{N}$, compute the sum of all weights of sets $S\\subseteq V$ such that the induced subgraph $G[S]$ is a disjoint union of paths and cycles." ), latex_delimiters=_latex, elem_classes=["f1-problem-markdown"], ) md_tier1 = gr.Markdown( value=( '

Maximal-Union-of-Paths-and-Cycles

\n' "Given a tree-like graph $G=(V,E)$ and a weight function $w:V\\to\\mathbb{N}$, compute the sum of all weights of sets $S\\subseteq V$ such that $G[S]$ is a disjoint union of paths and cycles and $S$ is maximal with respect to this property." ), visible=False, latex_delimiters=_latex, elem_classes=["f1-problem-markdown"], ) md_tier2 = gr.Markdown( value=( '

Maximal-Union-of-Cycles

\n' "Given a tree-like graph $G=(V,E)$ and a weight function $w:V\\to\\mathbb{N}$, compute the sum of all weights of sets $S\\subseteq V$ such that $G[S]$ is a disjoint union of cycles and $S$ is maximal with respect to this property." ), visible=False, latex_delimiters=_latex, elem_classes=["f1-problem-markdown"], ) tab_radio = gr.Radio( choices=["Warmup", "Tier 1", "Tier 2"], value="Warmup", label=None, show_label=False, elem_id="f1-example-radio", ) tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2]) # Continue the text after the heading (before the first figure) gr.HTML(WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS) # Figure 1: bag_modifications.png (use gr.Image) gr.Image( "assets/bag_modifications.png", show_label=False, elem_classes=["f1-image"], show_share_button=False, show_download_button=False, show_fullscreen_button=False, width=550, ) gr.HTML('
An illustration of local modifications to bags.
') gr.HTML(WHAT_IS_F1_HTML_BOTTOM_B) # Video (no autoplay/loop), smaller gap to caption via CSS gr.Video( "assets/DominatingSetAnimation.mp4", autoplay=False, loop=False, show_label=False, interactive=False, elem_classes=["f1-video"], show_share_button=False, show_download_button=False, ) gr.HTML( '
Brief explanation showcasing the design of a compressed dynamic programming state-space.
' ) gr.HTML(WHAT_IS_F1_HTML_AFTER_VIDEO) # Evaluation: Warmup figure gr.HTML(WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG, padding=False) gr.Image( "assets/warmup_performance.png", width=600, show_label=False, elem_classes=["f1-image"], show_share_button=False, show_download_button=False, show_fullscreen_button=False, ) gr.HTML('
Performance of frontier models on the FormulaOne-Warmup dataset.
') # Between warmup and tier1 figs gr.HTML(WHAT_IS_F1_HTML_AFTER_WARMUPFIG) # Tier 1 figure with corrected caption text gr.Image( "assets/tier1_performance.png", width=600, show_label=False, elem_classes=["f1-image"], show_share_button=False, show_download_button=False, show_fullscreen_button=False, ) gr.HTML( '
Performance of frontier reasoning models on Tier 1 of FormulaOne.
' ) # Tail after Tier 1 fig gr.HTML(WHAT_IS_F1_HTML_AFTER_TIER1FIG_TAIL) # Rename tab to "Leaderboard" and cap at 800px width with gr.TabItem("Leaderboard", elem_id="formulaone-leaderboard-tab-table", id=1): gr.Markdown( """ Welcome to the FormulaOne leaderboard. This table tracks the performance of various systems on the FormulaOne benchmark. Use the "Select Columns to Display" dropdown to customize your view, and the search bar to find specific models or organizations. """, elem_classes="markdown-text", ) refresh_leaderboard_data() assert leaderboard_df is not None leaderboard_component = init_leaderboard(leaderboard_df) with gr.TabItem("Submit Solutions", elem_id="formulaone-submit-tab-table", id=2): logger.info("Tab submission") with gr.Column(): with gr.Row(): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") with gr.Row(): gr.Markdown("# ✉️✨ Submit your solutions", elem_classes="markdown-text") login_box = gr.Group(visible=True, elem_id="f1-login-box") with login_box: gr.Markdown("Please sign in with Hugging Face to submit") gr.LoginButton(elem_id="hf-login-btn") submit_panel = gr.Group(visible=False) with submit_panel: with gr.Row(): with gr.Column(): system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name) org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name) submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"]) logger.info("Submit button") submit_button = gr.Button("Submit", variant="primary") submission_result = gr.Markdown() submit_button.click( add_solution_cbk, [ system_name_textbox, org_textbox, submission_file, ], submission_result, ) with gr.Row(): logger.info("Citation") with gr.Accordion(CITATION_BUTTON_LABEL, open=False): gr.Code( value=CITATION_BUTTON_TEXT.strip(), elem_id="citation-block", ) blocks.load(lambda: leaderboard_df, inputs=[], outputs=[leaderboard_component]) blocks.load(gate_submission, inputs=None, outputs=[login_box, submit_panel]) logger.info("Scheduler") scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.add_job(refresh_leaderboard_data, "interval", seconds=120) scheduler.start() logger.info("Launch") blocks.queue(default_concurrency_limit=40).launch() logger.info("Done")