import gradio as gr import pandas as pd from apscheduler.schedulers.background import BackgroundScheduler from gradio.themes import Base, colors, sizes from gradio_leaderboard import Leaderboard, SelectColumns from huggingface_hub import whoami from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, TITLE from src.datamodel.data import F1Data from src.display.css_html_js import custom_css from src.display.formatting import styled_error from src.display.utils import AutoEvalColumn, ModelType, fields from src.envs import API, CODE_PROBLEMS_REPO, REPO_ID, RESULTS_REPO, SUBMISSIONS_REPO from src.logger import get_logger from src.populate import get_leaderboard_df from src.submission.submit import add_new_solutions, fetch_user_info from src.validation.validate import MAX_INPUT_LENGTH, MIN_INPUT_LENGTH, is_submission_file_valid, is_valid logger = get_logger(__name__) ENSURE_ALL_PRESENT = False # TODO: Switch to True. SPLIT = "warmup" # TODO temp lbdb = F1Data( cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=SPLIT, ) leaderboard_df = None logger.info("Initialized LBDB") def restart_space(): logger.info("Restarting space ") API.restart_space(repo_id=REPO_ID) def refresh_leaderboard_data(): """Refresh the leaderboard data from the latest results""" global leaderboard_df try: logger.info("Loading leaderboard data...") new_leaderboard_df = get_leaderboard_df(RESULTS_REPO) if new_leaderboard_df is not None: logger.info("Leaderboard data refreshed successfully") leaderboard_df = new_leaderboard_df else: logger.warning("No new leaderboard data found") return None except Exception as e: logger.error(f"Error refreshing leaderboard data: {e}") return None def init_leaderboard(dataframe: pd.DataFrame): if dataframe is None: raise ValueError("Leaderboard DataFrame is None.") lb = Leaderboard( value=dataframe, datatype=[c.type for c in fields(AutoEvalColumn)], select_columns=SelectColumns( default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden], label="Select Columns to Display:", ), search_columns=[AutoEvalColumn.system.name, AutoEvalColumn.organization.name], hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden], bool_checkboxgroup_label="Hide models", interactive=False, ) lb.col_count = (1, "fixed") return lb def add_solution_cbk( system_name: str, org: str, submission_path: str, profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, ): logger.info("Fetching user details for submission") logger.info("PROFILE %s", profile) logger.info("TOKEN %s", oauth_token) if profile is None or oauth_token is None: return styled_error("Please sign in with Hugging Face before submitting.") # Display handle and display name (may change over time) logger.info(f"User handle: {profile.username}") display_name = profile.name or profile.username logger.info(f"Display name: {display_name}") # Stable account id user_info = fetch_user_info(oauth_token) logger.info("Logged in user info: %s", user_info) stable_id = user_info.get("id") if user_info else None logger.info(f"User stable ID: {stable_id}") if not stable_id: return styled_error("Could not retrieve your stable user ID. Please try signing in again.") user_id = stable_id if not profile.username: return styled_error("Could not retrieve username. Please try signing in again.") # We rely on underscores as separators in submission ID, replace it with "-". # user_id = profile.username.replace("_", "-") try: # Validating the submission file. if not submission_path: return styled_error("Please upload JSONL submission file.") if not is_submission_file_valid( submission_path, is_warmup_dataset=(SPLIT == "warmup"), ): return styled_error("Failed to read JSONL submission file. Please try again later.") # Validating all user-supplied arguments. for val, val_name in [ (system_name, "System name"), (org, "Organisation name"), ]: if len(val) == 0: return styled_error(f"Please fill in the '{val_name}' field.") if not is_valid(val): return styled_error( f"{val_name} is invalid! Must only contain characters [a-zA-Z0-9], spaces, " + "or the special characters '-' and '.', and be of length between " + f"{MIN_INPUT_LENGTH} and {MAX_INPUT_LENGTH}." ) except Exception: logger.warning("Failed to process user submission", exc_info=True) return styled_error("An error occurred. Please try again later.") # Intentionally vague. return add_new_solutions( lbdb, profile.username, user_id, system_name, org, submission_path, is_warmup_dataset=(SPLIT == "warmup"), ensure_all_present=ENSURE_ALL_PRESENT, ) def gate_submission(oauth_token: gr.OAuthToken | None): """ @brief Toggles the visibility of the login box and submission panel based on the user's login status. """ logger.info("GATE TOKEN %s", oauth_token) if oauth_token is None: logger.info("GATE: NO TOKEN") return gr.update(visible=True), gr.update(visible=False) try: whoami(oauth_token.token) logger.info("GATE: TOKEN IS VALID") return gr.update(visible=False), gr.update(visible=True) except Exception: logger.info("GATE: TOKEN HAS EXPIRED") return gr.update(visible=True), gr.update(visible=False) def get_theme(): cyber_theme = Base( # neon-ish accents driven by hues (affects tabs, primary buttons, sliders, etc.) primary_hue=colors.cyan, # selected tab / primary controls secondary_hue=colors.pink, # secondary accents neutral_hue=colors.gray, # keep neutrals subtle # # techno font # font=gr.themes.GoogleFont("Orbitron"), # font_mono=gr.themes.GoogleFont("JetBrains Mono"), text_size=sizes.text_md, # keep defaults spacing_size=sizes.spacing_md, radius_size=sizes.radius_md, ).set( # keep overrides minimal—dark canvas; let hues do the rest body_background_fill="#0b0f14", # deep blue-black background_fill_primary="#0b0f14", # panels background_fill_secondary="#0e141a", # subtle contrast ) return cyber_theme blocks = gr.Blocks(css=custom_css, theme=get_theme()) with blocks: gr.Image( "assets/banner.png", interactive=False, show_label=False, show_download_button=False, container=False, elem_classes=["banner_image"], ) gr.HTML( """ """ ) gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 Leaderboard", elem_id="formulaone-leaderboard-tab-table", id=0): refresh_leaderboard_data() # updates leaderboard_df assert leaderboard_df is not None leaderboard_component = init_leaderboard(leaderboard_df) with gr.TabItem("🚀 Submit Solutions", elem_id="llm-benchmark-tab-table", id=2): logger.info("Tab submission") with gr.Column(): with gr.Row(): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") with gr.Row(): gr.Markdown("# ✉️✨ Submit your solutions", elem_classes="markdown-text") # Shown when logged OUT login_box = gr.Group(visible=True) with login_box: gr.Markdown("Please sign in with Hugging Face to submit") gr.LoginButton() # Shown when logged IN submit_panel = gr.Group(visible=False) with submit_panel: with gr.Row(): with gr.Column(): system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name) org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name) # sys_type_dropdown = gr.Dropdown( # choices=[t.to_str() for t in ModelType], # label=AutoEvalColumn.system_type.name, # multiselect=False, # value=ModelType.LLM.to_str(), # interactive=True, # ) submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"]) logger.info("Submit button") submit_button = gr.Button("Submit") # gr.LoginButton() submission_result = gr.Markdown() submit_button.click( add_solution_cbk, [ system_name_textbox, org_textbox, submission_file, ], submission_result, ) with gr.Row(): logger.info("Citation") with gr.Accordion(CITATION_BUTTON_LABEL, open=False): gr.Code( value=CITATION_BUTTON_TEXT.strip(), elem_id="citation-block", ) # UI refresh triggers latest data swap. # The work already happened in the background - refresh_leaderboard_data(). blocks.load(lambda: leaderboard_df, inputs=[], outputs=[leaderboard_component]) # On initial load (and after OAuth redirect), toggle the UI based on login status. blocks.load(gate_submission, inputs=None, outputs=[login_box, submit_panel]) logger.info("Scheduler") scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.add_job(refresh_leaderboard_data, "interval", seconds=120) scheduler.start() logger.info("Launch") blocks.queue(default_concurrency_limit=40).launch() logger.info("Done")