Spaces:

double-ai
/

FormulaOne-Leaderboard

Running on CPU Upgrade

App Files Files Community

FormulaOne-Leaderboard / src /submission /submit.py

Alvinn-aai

less verbosity

65e0342 about 1 month ago

raw

history blame

6.18 kB

	import time
	from datetime import datetime, timezone, timedelta
	import os
	import requests

	import pandas as pd
	from datasets import Dataset, get_dataset_config_names
	from datasets.exceptions import DatasetNotFoundError
	from pandas.api.types import is_integer_dtype
	import gradio as gr

	from src.datamodel.data import F1Data
	from src.display.formatting import styled_error, styled_message
	from src.display.utils import ModelType
	from src.envs import SUBMISSIONS_REPO, TOKEN
	from src.logger import get_logger
	from src.validation.validate import is_submission_file_valid, is_valid


	logger = get_logger(__name__)

	MIN_WAIT_TIME_PER_USER_HRS = 24


	def add_new_solutions(
	lbdb: F1Data,
	username: str,
	stable_id: str,
	system_name: str,
	org: str,
	sys_type: str,
	submission_path: str,
	is_warmup_dataset: bool,
	ensure_all_present: bool = False,
	):
	# Users must wait MIN_WAIT_TIME_PER_USER_HRS hours between submissions.
	try:
	submitted_ids = get_dataset_config_names(SUBMISSIONS_REPO, token=TOKEN)
	except (DatasetNotFoundError, FileNotFoundError):
	submitted_ids = []

	user_last_submission_date = None
	for sid in submitted_ids:
	# Extract stable ID (last part)
	past_stable_id = sid.rsplit("_", 1)[-1]
	# Extract timestamp string (first two parts)
	ts_str = "_".join(sid.split("_", 2)[:2])

	ts = datetime.strptime(ts_str, "%Y%m%d_%H%M%S").replace(tzinfo=timezone.utc)
	if past_stable_id == stable_id:
	if user_last_submission_date is None:
	user_last_submission_date = ts
	else:
	user_last_submission_date = max(user_last_submission_date, ts)

	if user_last_submission_date is not None:
	now = datetime.now(timezone.utc)
	elapsed = now - user_last_submission_date
	if elapsed < timedelta(hours=MIN_WAIT_TIME_PER_USER_HRS):
	remaining_hrs = MIN_WAIT_TIME_PER_USER_HRS - elapsed.total_seconds() / 3600
	logger.info(f"{username} must wait {remaining_hrs:.2f} more hours.")
	return styled_error(
	f"You must wait {MIN_WAIT_TIME_PER_USER_HRS} hours between submissions. "
	f"Remaining wait time: {remaining_hrs:.2f} hours"
	)

	logger.info(
	f"Adding new submission: {system_name=}, {org=}, {sys_type=} and {submission_path=}",
	)

	# Double-checking.
	for val in [system_name, org, sys_type]:
	assert is_valid(val)
	assert is_submission_file_valid(submission_path, is_warmup_dataset=is_warmup_dataset)

	sys_type = ModelType.from_str(sys_type).name

	try:
	submission_df = pd.read_json(submission_path, lines=True)
	if ensure_all_present:
	_validate_all_submissions_present(lbdb=lbdb, pd_ds=submission_df)
	except Exception:
	logger.warning("Failed to parse submission DF!", exc_info=True)
	return styled_error(
	"An error occurred. Please try again later."
	) # Use same message as external error. Avoid infoleak.

	submission_id = f"{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}_{username}_{stable_id}"

	# Seems good, creating the eval.
	logger.info(f"Adding new submission: {submission_id}")
	submission_ts = time.time_ns()

	def add_info(row):
	return {
	**row,
	"system_name": system_name,
	"organization": org,
	"system_type": sys_type,
	"submission_id": submission_id,
	"submission_ts": submission_ts,
	"evaluation_id": "", # This will be set later when the evaluation is launched in the backend
	"evaluation_start_ts": "", # This will be set when the evaluation starts
	}

	ds = Dataset.from_pandas(submission_df).map(add_info)
	ds.push_to_hub(
	SUBMISSIONS_REPO,
	submission_id,
	private=True,
	)

	return styled_message(
	"Your request has been submitted to the evaluation queue!\n"
	+ "Results may take up to 24 hours to be processed and shown in the leaderboard."
	)


	def fetch_sub_claim(oauth_token: gr.OAuthToken \| None) -> dict \| None:
	if oauth_token is None:
	return None
	provider = os.getenv("OPENID_PROVIDER_URL")
	if not provider:
	return None
	try:
	oidc_meta = requests.get(f"{provider}/.well-known/openid-configuration", timeout=5)
	oidc_meta = oidc_meta.json()
	userinfo_ep = oidc_meta["userinfo_endpoint"]
	claims = requests.get(userinfo_ep, headers={"Authorization": f"Bearer {oauth_token.token}"}, timeout=5)
	logger.info(f"userinfo_endpoint response: status={claims.status_code}\nheaders={dict(claims.headers)}")
	claims = claims.json()
	# Typical fields: sub (stable id), preferred_username, name, picture
	return {
	"sub": claims.get("sub"),
	"preferred_username": claims.get("preferred_username"),
	"name": claims.get("name"),
	}
	except Exception as e:
	logger.warning(f"Failed to fetch user claims: {e}")
	return None


	def _validate_all_submissions_present(
	lbdb: F1Data,
	pd_ds: pd.DataFrame,
	):
	logger.info(f"Validating DS size {len(pd_ds)} columns {pd_ds.columns} set {set(pd_ds.columns)}")
	expected_cols = ["problem_id", "solution"]

	if set(pd_ds.columns) != set(expected_cols):
	return ValueError(f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}")

	if not is_integer_dtype(pd_ds["problem_id"]):
	return ValueError("problem_id must be str convertible to int")

	if any(type(v) is not str for v in pd_ds["solution"]):
	return ValueError("solution must be of type str")

	submitted_ids = set(pd_ds.problem_id.astype(str))
	if submitted_ids != lbdb.code_problem_ids:
	missing = lbdb.code_problem_ids - submitted_ids
	unknown = submitted_ids - lbdb.code_problem_ids
	raise ValueError(f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown")
	if len(pd_ds) > len(lbdb.code_problem_ids):
	return ValueError("Duplicate problem IDs exist in uploaded file")