tomerz-aai's picture
wip
6c4809a
raw
history blame
15.1 kB
from http.cookies import SimpleCookie
import os
import gradio as gr
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from gradio.themes import Base, colors, sizes
from gradio_leaderboard import Leaderboard, SelectColumns
from huggingface_hub import whoami
from fastapi import Response
from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, TITLE
from src.datamodel.data import F1Data
from src.display.css_html_js import custom_css
from src.display.formatting import styled_error
from src.display.utils import AutoEvalColumn, ModelType, fields
from src.envs import API, CODE_PROBLEMS_REPO, REPO_ID, RESULTS_REPO, SUBMISSIONS_REPO
from src.logger import get_logger
from src.populate import get_leaderboard_df
from src.submission.submit import add_new_solutions, fetch_user_info
from src.validation.validate import MAX_INPUT_LENGTH, MIN_INPUT_LENGTH, is_submission_file_valid, is_valid
logger = get_logger(__name__)
ENSURE_ALL_PRESENT = False # TODO: Switch to True.
SPLIT = "warmup" # TODO temp
lbdb = F1Data(
cp_ds_name=CODE_PROBLEMS_REPO,
sub_ds_name=SUBMISSIONS_REPO,
res_ds_name=RESULTS_REPO,
split=SPLIT,
)
leaderboard_df = None
logger.info("Initialized LBDB")
def restart_space():
logger.info("Restarting space")
API.restart_space(repo_id=REPO_ID)
def refresh_leaderboard_data():
"""Refresh the leaderboard data from the latest results"""
global leaderboard_df
try:
logger.info("Loading leaderboard data...")
new_leaderboard_df = get_leaderboard_df(RESULTS_REPO)
if new_leaderboard_df is not None:
logger.info("Leaderboard data refreshed successfully")
leaderboard_df = new_leaderboard_df
else:
logger.warning("No new leaderboard data found")
return None
except Exception as e:
logger.error(f"Error refreshing leaderboard data: {e}")
return None
def init_leaderboard(dataframe: pd.DataFrame):
if dataframe is None:
raise ValueError("Leaderboard DataFrame is None.")
lb = Leaderboard(
value=dataframe,
datatype=[c.type for c in fields(AutoEvalColumn)],
select_columns=SelectColumns(
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
label="Select Columns to Display:",
),
search_columns=[AutoEvalColumn.system.name, AutoEvalColumn.system_type.name],
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
bool_checkboxgroup_label="Hide models",
interactive=False,
)
lb.col_count = (1, "fixed")
return lb
def add_solution_cbk(
system_name: str,
org: str,
sys_type: str,
submission_path: str,
profile: gr.OAuthProfile | None,
token: gr.OAuthToken | None,
):
logger.info("Fetching user details for submission")
logger.info("PROFILE %s", profile)
logger.info("TOKEN %s", token)
if profile is None or token is None:
return styled_error("Please sign in with Hugging Face before submitting.")
# Display handle and display name (may change over time)
logger.info(f"User handle: {profile.username}")
display_name = profile.name or profile.username
logger.info(f"Display name: {display_name}")
# Stable account id
user_info = fetch_user_info(token)
logger.info("Logged in user info: %s", user_info)
stable_id = user_info.get("id") if user_info else None
logger.info(f"User stable ID: {stable_id}")
# TODO: find a way to invalidate login status if token is outdated
# if not stable_id:
# return styled_error("Could not retrieve your stable user ID. Please try signing in again.")
# user_id = stable_id
if not profile.username:
return styled_error("Could not retrieve username. Please try signing in again.")
# We rely on underscores as separators in submission ID, replace it with "-".
user_id = profile.username.replace("_", "-")
try:
# Validating the submission file.
if not submission_path:
return styled_error("Please upload JSONL submission file.")
if not is_submission_file_valid(
submission_path,
is_warmup_dataset=(SPLIT == "warmup"),
):
return styled_error("Failed to read JSONL submission file. Please try again later.")
# Validating all user-supplied arguments.
for val, val_name in [
(system_name, "System name"),
(org, "Organisation name"),
(sys_type, "System type"),
]:
if len(val) == 0:
return styled_error(f"Please fill in the '{val_name}' field.")
if not is_valid(val):
return styled_error(
f"{val_name} is invalid! Must only contain characters [a-zA-Z0-9], spaces, "
+ "or the special characters '-' and '.', and be of length between "
+ f"{MIN_INPUT_LENGTH} and {MAX_INPUT_LENGTH}."
)
except Exception:
logger.warning("Failed to process user submission", exc_info=True)
return styled_error("An error occurred. Please try again later.") # Intentionally vague.
return add_new_solutions(
lbdb,
profile.username,
user_id,
system_name,
org,
sys_type,
submission_path,
is_warmup_dataset=(SPLIT == "warmup"),
ensure_all_present=ENSURE_ALL_PRESENT,
)
# def check_valid_token(oauth_token: gr.OAuthToken | None):
# logger.info("CHECK TOKEN %s", oauth_token)
# if oauth_token is None:
# logger.info("CHECK: NO TOKEN")
# return gr.update(value="")
# try:
# whoami(oauth_token.token)
# logger.info("CHECK: VALID TOKEN")
# return gr.update(value="")
# except Exception:
# logger.info("CHECK: TOKEN HAS EXPIRED")
# return gr.update(value='<script>window.location.href = "/logout";</script>')
# def gate_submission_by_prof(profile: gr.OAuthProfile | None):
# if not profile:
# return gr.update(visible=True), gr.update(visible=False)
# return gr.update(visible=False), gr.update(visible=True)
def gate_submission(oauth_token: gr.OAuthToken | None, request: gr.Request):
"""
@brief Toggles the visibility of the login box and submission panel based on the user's login status.
"""
# Log cookie sizes
cookies = request.headers.get("cookie", "")
cookie_obj = SimpleCookie()
try:
cookie_obj.load(cookies)
for key in ["spaces-jwt", "session"]:
if key in cookie_obj:
cookie_size = len(cookie_obj[key].OutputString().encode('utf-8'))
logger.info(f"Cookie {key} size: {cookie_size} bytes")
except Exception as e:
logger.error(f"Error parsing cookies: {str(e)}")
logger.info("GATE TOKEN %s", oauth_token)
if oauth_token is None:
logger.info("GATE: NO TOKEN")
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
try:
token_size = len(oauth_token.token.encode('utf-8'))
logger.info(f"Token size: {token_size} bytes")
whoami(oauth_token.token)
logger.info("GATE: TOKEN IS VALID")
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
except Exception:
logger.info("GATE: TOKEN HAS EXPIRED")
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
# Custom route to force clear HttpOnly cookies
def clear_cookies():
response = Response()
response.headers["Set-Cookie"] = [
"spaces-jwt=; Path=/; Expires=Thu, 01 Jan 1970 00:00:00 GMT; Domain=" + os.environ.get("SPACE_HOST", ""),
"session=; Path=/; Expires=Thu, 01 Jan 1970 00:00:00 GMT; Domain=" + os.environ.get("SPACE_HOST", "")
]
response.headers["Location"] = "/?t=" + str(int(os.times().elapsed * 1000))
response.status_code = 302
return response
def get_theme():
cyber_theme = Base(
# neon-ish accents driven by hues (affects tabs, primary buttons, sliders, etc.)
primary_hue=colors.cyan, # selected tab / primary controls
secondary_hue=colors.pink, # secondary accents
neutral_hue=colors.gray, # keep neutrals subtle
# # techno font
# font=gr.themes.GoogleFont("Orbitron"),
# font_mono=gr.themes.GoogleFont("JetBrains Mono"),
text_size=sizes.text_md, # keep defaults
spacing_size=sizes.spacing_md,
radius_size=sizes.radius_md,
).set(
# keep overrides minimalβ€”dark canvas; let hues do the rest
body_background_fill="#0b0f14", # deep blue-black
background_fill_primary="#0b0f14", # panels
background_fill_secondary="#0e141a", # subtle contrast
)
return cyber_theme
# JavaScript to clear cookies and reload
JS_LOGOUT = """
<a href="#" onclick="
var domain = window.location.hostname;
document.cookie = 'spaces-jwt=; Expires=Thu, 01 Jan 1970 00:00:00 GMT; Domain=' + domain;
document.cookie = 'session=; Expires=Thu, 01 Jan 1970 00:00:00 GMT; Domain=' + domain;
console.log('Cookies cleared: spaces-jwt, session on domain: ' + domain);
window.location.href='/?t='+new Date().getTime();
return false;
" style="display: inline-block; padding: 10px; background: #ff4d4f; color: white; text-decoration: none; border-radius: 5px;">Manual Logout</a>
"""
blocks = gr.Blocks(css=custom_css, theme=get_theme())
with blocks:
gr.Image(
"assets/banner.png",
interactive=False,
show_label=False,
show_download_button=False,
container=False,
elem_classes=["banner_image"],
)
gr.HTML(
"""
<style>
body {
background-color: #121212;
color: white;
margin: 0; /* Reset browser default */
}
/* Outer container margin & spacing */
.gradio-container {
max-width: 1100px;
margin: 2rem auto; /* top/bottom spacing + horizontal centering */
padding: 2rem; /* inner spacing */
background-color: rgba(0, 0, 0, 0.6); /* optional: semi-transparent panel */
border-radius: 12px; /* rounded corners */
}
</style>
"""
)
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… Leaderboard", elem_id="formulaone-leaderboard-tab-table", id=0):
refresh_leaderboard_data() # updates leaderboard_df
assert leaderboard_df is not None
leaderboard_component = init_leaderboard(leaderboard_df)
with gr.TabItem("πŸš€ Submit Solutions", elem_id="llm-benchmark-tab-table", id=2):
logger.info("Tab submission")
with gr.Column():
with gr.Row():
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
with gr.Row():
gr.Markdown("# βœ‰οΈβœ¨ Submit your solutions", elem_classes="markdown-text")
# Shown when logged OUT
login_box = gr.Group(visible=True)
with login_box:
gr.Markdown("Please sign in to continue:")
gr.LoginButton()
# Shown when login token is EXPIRED
logout_box = gr.Group(visible=False)
with logout_box:
gr.Markdown("Your session has already EXPIRED. Please sign in again")
gr.LoginButton()
# gr.Button("Manual Logout", link="/logout")
# gr.HTML(JS_LOGOUT)
# Manual logout link to server-side /logout
# gr.HTML('<a href="/logout?redirect=/" style="display: inline-block; padding: 10px; background: #ff4d4f; color: white; text-decoration: none; border-radius: 5px;">Logout (via /logout)</a>')
# Custom clear cookies link
# gr.HTML('<a href="/clear-cookies" style="display: inline-block; padding: 10px; background: #ff6666; color: white; text-decoration: none; border-radius: 5px; margin-left: 10px;">Clear Cookies</a>')
# Shown when logged IN
submit_panel = gr.Group(visible=False)
with submit_panel:
with gr.Row():
with gr.Column():
system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
sys_type_dropdown = gr.Dropdown(
choices=[t.to_str() for t in ModelType],
label=AutoEvalColumn.system_type.name,
multiselect=False,
value=ModelType.LLM.to_str(),
interactive=True,
)
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
logger.info("Submit button")
submit_button = gr.Button("Submit")
# gr.LoginButton()
submission_result = gr.Markdown()
submit_button.click(
add_solution_cbk,
[
system_name_textbox,
org_textbox,
sys_type_dropdown,
submission_file,
],
submission_result,
)
with gr.Row():
logger.info("Citation")
with gr.Accordion(CITATION_BUTTON_LABEL, open=False):
gr.Code(
value=CITATION_BUTTON_TEXT.strip(),
elem_id="citation-block",
)
# Trigger reload if token is expired
# blocks.load(check_valid_token, outputs=reload_html)
# UI refresh triggers latest data swap.
# The work already happened in the background - refresh_leaderboard_data().
blocks.load(lambda: leaderboard_df, inputs=[], outputs=[leaderboard_component])
# On initial load (and after OAuth redirect), toggle the UI based on login status.
# blocks.load(gate_submission, inputs=None, outputs=[login_box, submit_panel])
blocks.load(gate_submission, inputs=None, outputs=[login_box, logout_box, submit_panel])
# blocks.app.add_api_route("/clear-cookies", clear_cookies, methods=["GET"])
logger.info("Scheduler")
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.add_job(refresh_leaderboard_data, "interval", seconds=120)
scheduler.start()
logger.info("Launch")
blocks.queue(default_concurrency_limit=40).launch()
logger.info("Done")