tomerz-aai's picture
wip
c57c9d4
raw
history blame
11.4 kB
import gradio as gr
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from gradio.themes import Base, colors, sizes
from gradio_leaderboard import Leaderboard, SelectColumns
from huggingface_hub import whoami
from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, TITLE
from src.datamodel.data import F1Data
from src.display.css_html_js import custom_css
from src.display.formatting import styled_error
from src.display.utils import AutoEvalColumn, ModelType, fields
from src.envs import API, CODE_PROBLEMS_REPO, REPO_ID, RESULTS_REPO, SUBMISSIONS_REPO
from src.logger import get_logger
from src.populate import get_leaderboard_df
from src.submission.submit import add_new_solutions, fetch_user_info
from src.validation.validate import MAX_INPUT_LENGTH, MIN_INPUT_LENGTH, is_submission_file_valid, is_valid
logger = get_logger(__name__)
ENSURE_ALL_PRESENT = False # TODO: Switch to True.
SPLIT = "warmup" # TODO temp
lbdb = F1Data(
cp_ds_name=CODE_PROBLEMS_REPO,
sub_ds_name=SUBMISSIONS_REPO,
res_ds_name=RESULTS_REPO,
split=SPLIT,
)
leaderboard_df = None
logger.info("Initialized LBDB")
def restart_space():
logger.info("Restarting space")
API.restart_space(repo_id=REPO_ID)
def refresh_leaderboard_data():
"""Refresh the leaderboard data from the latest results"""
global leaderboard_df
try:
logger.info("Loading leaderboard data...")
new_leaderboard_df = get_leaderboard_df(RESULTS_REPO)
if new_leaderboard_df is not None:
logger.info("Leaderboard data refreshed successfully")
leaderboard_df = new_leaderboard_df
else:
logger.warning("No new leaderboard data found")
return None
except Exception as e:
logger.error(f"Error refreshing leaderboard data: {e}")
return None
def init_leaderboard(dataframe: pd.DataFrame):
if dataframe is None:
raise ValueError("Leaderboard DataFrame is None.")
lb = Leaderboard(
value=dataframe,
datatype=[c.type for c in fields(AutoEvalColumn)],
select_columns=SelectColumns(
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
label="Select Columns to Display:",
),
search_columns=[AutoEvalColumn.system.name, AutoEvalColumn.system_type.name],
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
bool_checkboxgroup_label="Hide models",
interactive=False,
)
lb.col_count = (1, "fixed")
return lb
def add_solution_cbk(
system_name: str,
org: str,
sys_type: str,
submission_path: str,
profile: gr.OAuthProfile | None,
oauth_token: gr.OAuthToken | None,
):
logger.info("Fetching user details for submission")
logger.info("PROFILE %s", profile)
logger.info("TOKEN %s", oauth_token)
if profile is None or oauth_token is None:
return styled_error("Please sign in with Hugging Face before submitting.")
# Display handle and display name (may change over time)
logger.info(f"User handle: {profile.username}")
display_name = profile.name or profile.username
logger.info(f"Display name: {display_name}")
# Stable account id
user_info = fetch_user_info(oauth_token)
logger.info("Logged in user info: %s", user_info)
stable_id = user_info.get("id") if user_info else None
logger.info(f"User stable ID: {stable_id}")
# TODO: find a way to invalidate login status if token is outdated
# if not stable_id:
# return styled_error("Could not retrieve your stable user ID. Please try signing in again.")
# user_id = stable_id
if not profile.username:
return styled_error("Could not retrieve username. Please try signing in again.")
# We rely on underscores as separators in submission ID, replace it with "-".
user_id = profile.username.replace("_", "-")
try:
# Validating the submission file.
if not submission_path:
return styled_error("Please upload JSONL submission file.")
if not is_submission_file_valid(
submission_path,
is_warmup_dataset=(SPLIT == "warmup"),
):
return styled_error("Failed to read JSONL submission file. Please try again later.")
# Validating all user-supplied arguments.
for val, val_name in [
(system_name, "System name"),
(org, "Organisation name"),
(sys_type, "System type"),
]:
if len(val) == 0:
return styled_error(f"Please fill in the '{val_name}' field.")
if not is_valid(val):
return styled_error(
f"{val_name} is invalid! Must only contain characters [a-zA-Z0-9], spaces, "
+ "or the special characters '-' and '.', and be of length between "
+ f"{MIN_INPUT_LENGTH} and {MAX_INPUT_LENGTH}."
)
except Exception:
logger.warning("Failed to process user submission", exc_info=True)
return styled_error("An error occurred. Please try again later.") # Intentionally vague.
return add_new_solutions(
lbdb,
profile.username,
user_id,
system_name,
org,
sys_type,
submission_path,
is_warmup_dataset=(SPLIT == "warmup"),
ensure_all_present=ENSURE_ALL_PRESENT,
)
def gate_submission(oauth_token: gr.OAuthToken | None):
"""
@brief Toggles the visibility of the login box and submission panel based on the user's login status.
"""
logger.info("GATE TOKEN %s", oauth_token)
if oauth_token is None:
logger.info("GATE: NO TOKEN")
return gr.update(visible=True), gr.update(visible=False)
try:
whoami(oauth_token.token)
logger.info("GATE: TOKEN IS VALID")
return gr.update(visible=False), gr.update(visible=True)
except Exception:
logger.info("GATE: TOKEN HAS EXPIRED")
return gr.update(visible=True), gr.update(visible=False)
def get_theme():
cyber_theme = Base(
# neon-ish accents driven by hues (affects tabs, primary buttons, sliders, etc.)
primary_hue=colors.cyan, # selected tab / primary controls
secondary_hue=colors.pink, # secondary accents
neutral_hue=colors.gray, # keep neutrals subtle
# # techno font
# font=gr.themes.GoogleFont("Orbitron"),
# font_mono=gr.themes.GoogleFont("JetBrains Mono"),
text_size=sizes.text_md, # keep defaults
spacing_size=sizes.spacing_md,
radius_size=sizes.radius_md,
).set(
# keep overrides minimal—dark canvas; let hues do the rest
body_background_fill="#0b0f14", # deep blue-black
background_fill_primary="#0b0f14", # panels
background_fill_secondary="#0e141a", # subtle contrast
)
return cyber_theme
blocks = gr.Blocks(css=custom_css, theme=get_theme())
with blocks:
gr.Image(
"assets/banner.png",
interactive=False,
show_label=False,
show_download_button=False,
container=False,
elem_classes=["banner_image"],
)
gr.HTML(
"""
<style>
body {
background-color: #121212;
color: white;
margin: 0; /* Reset browser default */
}
/* Outer container margin & spacing */
.gradio-container {
max-width: 1100px;
margin: 2rem auto; /* top/bottom spacing + horizontal centering */
padding: 2rem; /* inner spacing */
background-color: rgba(0, 0, 0, 0.6); /* optional: semi-transparent panel */
border-radius: 12px; /* rounded corners */
}
</style>
"""
)
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("🏅 Leaderboard", elem_id="formulaone-leaderboard-tab-table", id=0):
refresh_leaderboard_data() # updates leaderboard_df
assert leaderboard_df is not None
leaderboard_component = init_leaderboard(leaderboard_df)
with gr.TabItem("🚀 Submit Solutions", elem_id="llm-benchmark-tab-table", id=2):
logger.info("Tab submission")
with gr.Column():
with gr.Row():
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
with gr.Row():
gr.Markdown("# ✉️✨ Submit your solutions", elem_classes="markdown-text")
# Shown when logged OUT
login_box = gr.Group(visible=True)
with login_box:
gr.Markdown("Please sign in with Hugging Face to submit")
gr.LoginButton()
# Shown when logged IN
submit_panel = gr.Group(visible=False)
with submit_panel:
with gr.Row():
with gr.Column():
system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
sys_type_dropdown = gr.Dropdown(
choices=[t.to_str() for t in ModelType],
label=AutoEvalColumn.system_type.name,
multiselect=False,
value=ModelType.LLM.to_str(),
interactive=True,
)
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
logger.info("Submit button")
submit_button = gr.Button("Submit")
# gr.LoginButton()
submission_result = gr.Markdown()
submit_button.click(
add_solution_cbk,
[
system_name_textbox,
org_textbox,
sys_type_dropdown,
submission_file,
],
submission_result,
)
with gr.Row():
logger.info("Citation")
with gr.Accordion(CITATION_BUTTON_LABEL, open=False):
gr.Code(
value=CITATION_BUTTON_TEXT.strip(),
elem_id="citation-block",
)
# UI refresh triggers latest data swap.
# The work already happened in the background - refresh_leaderboard_data().
blocks.load(lambda: leaderboard_df, inputs=[], outputs=[leaderboard_component])
# On initial load (and after OAuth redirect), toggle the UI based on login status.
blocks.load(gate_submission, inputs=None, outputs=[login_box, submit_panel])
logger.info("Scheduler")
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.add_job(refresh_leaderboard_data, "interval", seconds=120)
scheduler.start()
logger.info("Launch")
blocks.queue(default_concurrency_limit=40).launch()
logger.info("Done")