Spaces:

double-ai
/

FormulaOne-Leaderboard

Running on CPU Upgrade

App Files Files Community

FormulaOne-Leaderboard / src /submission /submit.py

galb-dai

Remove some unused code/imports.

416ebf1 about 2 months ago

raw

history blame

3.21 kB

	import time
	from datetime import datetime, timezone

	import pandas as pd
	from datasets import Dataset
	from pandas.api.types import is_integer_dtype

	from src.datamodel.data import F1Data
	from src.display.formatting import styled_error, styled_message
	from src.display.utils import ModelType
	from src.envs import SUBMISSIONS_REPO
	from src.logger import get_logger

	logger = get_logger(__name__)


	def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str \| None:
	logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
	expected_cols = ["problem_id", "solution"]

	if set(pd_ds.columns) != set(expected_cols):
	return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"

	if not is_integer_dtype(pd_ds["problem_id"]):
	return "problem_id must be str convertible to int"

	if any(type(v) is not str for v in pd_ds["solution"]):
	return "solution must be of type str"

	submitted_ids = set(pd_ds.problem_id.astype(str))
	if submitted_ids != lbdb.code_problem_ids:
	missing = lbdb.code_problem_ids - submitted_ids
	unknown = submitted_ids - lbdb.code_problem_ids
	return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
	if len(pd_ds) > len(lbdb.code_problem_ids):
	return "Duplicate problem IDs exist in uploaded file"

	return None


	def add_new_solutions(
	lbdb: F1Data,
	system_name: str,
	org: str,
	sys_type: str,
	submission_path: str,
	skip_validation: bool = False,
	):
	logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
	if not system_name:
	return styled_error("Please fill system name")

	if not org:
	return styled_error("Please fill organization name")

	if not sys_type:
	return styled_error("Please select system type")
	sys_type = ModelType.from_str(sys_type).name

	if not submission_path:
	return styled_error("Please upload JSONL solutions file")

	try:
	submission_df = pd.read_json(submission_path, lines=True)
	except Exception as e:
	return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")

	if not skip_validation:
	validation_error = validate_submission(lbdb, submission_df)
	if validation_error:
	return styled_error(validation_error)

	submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"

	# Seems good, creating the eval
	print(f"Adding new submission: {submission_id}")
	submission_ts = time.time_ns()

	def add_info(row):
	return {
	**row,
	"system_name": system_name,
	"organization": org,
	"system_type": sys_type,
	"submission_id": submission_id,
	"submission_ts": submission_ts,
	}

	ds = Dataset.from_pandas(submission_df).map(add_info)
	ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)

	return styled_message(
	"Your request has been submitted to the evaluation queue!\nResults may take up to 24 hours to be processed and shown in the leaderboard."
	)