Spaces:

double-ai
/

FormulaOne-Leaderboard

Running on CPU Upgrade

App Files Files Community

FormulaOne-Leaderboard / src /submission /submit.py

Alvinn-aai

data upload script, support both splits

8cfcd49 about 2 months ago

raw

history blame

3.93 kB

	import json
	import os
	from datetime import datetime, timezone
	import time

	from datasets import Dataset
	import pandas as pd

	from src.datamodel.data import F1Data
	from src.display.formatting import styled_error, styled_message, styled_warning
	from src.display.utils import ModelType
	from src.envs import API, SUBMISSIONS_REPO, TOKEN
	from src.logger import get_logger
	# from src.submission.check_validity import (
	# already_submitted_models,
	# check_model_card,
	# get_model_size,
	# is_model_on_hub,
	# )

	logger = get_logger(__name__)

	def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str \| None:
	logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
	expected_cols = ["formula_name", "solution"]
	if set(pd_ds.columns) != set(expected_cols):
	return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
	if any(type(v) != str for v in pd_ds["formula_name"]):
	return "Not all formula_name values are of type str"
	if any(type(v) != str for v in pd_ds["solution"]):
	return "Not all solution values are of type str"
	submitted_formulas = set(pd_ds["formula_name"])
	if submitted_formulas != lbdb.code_problem_formulas:
	missing = lbdb.code_problem_formulas - submitted_formulas
	unknown = submitted_formulas - lbdb.code_problem_formulas
	return f"Mismatched formula names: {len(missing)} missing, {len(unknown)} unknown"
	if len(pd_ds) > len(lbdb.code_problem_formulas):
	return "Duplicate formula solutions exist in uploaded file"
	return None

	def add_new_solutions(
	lbdb: F1Data,
	system_name : str,
	org: str,
	sys_type: str,
	submission_path: str,
	):
	logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
	if not system_name:
	return styled_error("Please fill system name")

	if not org:
	return styled_error("Please fill organization name")

	if not sys_type:
	return styled_error("Please select system type")
	sys_type = ModelType.from_str(sys_type).name

	if not submission_path:
	return styled_error("Please upload JSONL solutions file")

	try:
	submission_df = pd.read_json(submission_path, lines=True)
	except Exception as e:
	return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")

	validation_error = validate_submission(lbdb, submission_df)
	if validation_error:
	return styled_error(validation_error)


	submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"

	# Seems good, creating the eval
	print(f"Adding new submission: {submission_id}")
	submission_ts = time.time_ns()

	def add_info(row):
	row["system_name"] = system_name
	row["organization"] = org
	row["system_type"] = sys_type
	row["submission_id"] = submission_id
	row["submission_ts"] = submission_ts

	ds = Dataset.from_pandas(submission_df).map(add_info)

	ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
	# print("Creating eval file")
	# OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
	# os.makedirs(OUT_DIR, exist_ok=True)
	# out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"

	# with open(out_path, "w") as f:
	# f.write(json.dumps(eval_entry))

	# print("Uploading eval file")
	# API.upload_file(
	# path_or_fileobj=out_path,
	# path_in_repo=out_path.split("eval-queue/")[1],
	# repo_id=QUEUE_REPO,
	# repo_type="dataset",
	# commit_message=f"Add {model} to eval queue",
	# )

	# # Remove the local file
	# os.remove(out_path)

	return styled_message(
	"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
	)