Spaces:

double-ai
/

FormulaOne-Leaderboard

Running on CPU Upgrade

File size: 3,931 Bytes

import json
import os
from datetime import datetime, timezone
import time

from datasets import Dataset
import pandas as pd

from src.datamodel.data import F1Data
from src.display.formatting import styled_error, styled_message, styled_warning
from src.display.utils import ModelType
from src.envs import API, SUBMISSIONS_REPO, TOKEN
from src.logger import get_logger
# from src.submission.check_validity import (
#     already_submitted_models,
#     check_model_card,
#     get_model_size,
#     is_model_on_hub,
# )

logger = get_logger(__name__)

def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
    logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
    expected_cols = ["formula_name", "solution"]
    if set(pd_ds.columns) != set(expected_cols):
        return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
    if any(type(v) != str for v in pd_ds["formula_name"]):
        return "Not all formula_name values are of type str"
    if any(type(v) != str for v in pd_ds["solution"]):
        return "Not all solution values are of type str"
    submitted_formulas = set(pd_ds["formula_name"])
    if submitted_formulas != lbdb.code_problem_formulas:
        missing = lbdb.code_problem_formulas - submitted_formulas
        unknown = submitted_formulas - lbdb.code_problem_formulas
        return f"Mismatched formula names: {len(missing)} missing, {len(unknown)} unknown"
    if len(pd_ds) > len(lbdb.code_problem_formulas):
        return "Duplicate formula solutions exist in uploaded file"
    return None

def add_new_solutions(
    lbdb: F1Data,
    system_name : str,
    org: str,
    sys_type: str,
    submission_path: str,
):
    logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
    if not system_name:
        return styled_error("Please fill system name")

    if not org:
        return styled_error("Please fill organization name")

    if not sys_type:
        return styled_error("Please select system type")
    sys_type = ModelType.from_str(sys_type).name

    if not submission_path:
        return styled_error("Please upload JSONL solutions file")

    try:
        submission_df = pd.read_json(submission_path, lines=True)
    except Exception as e:
        return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")

    validation_error = validate_submission(lbdb, submission_df)
    if validation_error:
        return styled_error(validation_error)


    submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"

    # Seems good, creating the eval
    print(f"Adding new submission: {submission_id}")
    submission_ts = time.time_ns()

    def add_info(row):
        row["system_name"] = system_name
        row["organization"] = org
        row["system_type"] = sys_type
        row["submission_id"] = submission_id
        row["submission_ts"] = submission_ts

    ds = Dataset.from_pandas(submission_df).map(add_info)

    ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
    # print("Creating eval file")
    # OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
    # os.makedirs(OUT_DIR, exist_ok=True)
    # out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"

    # with open(out_path, "w") as f:
    #     f.write(json.dumps(eval_entry))

    # print("Uploading eval file")
    # API.upload_file(
    #     path_or_fileobj=out_path,
    #     path_in_repo=out_path.split("eval-queue/")[1],
    #     repo_id=QUEUE_REPO,
    #     repo_type="dataset",
    #     commit_message=f"Add {model} to eval queue",
    # )

    # # Remove the local file
    # os.remove(out_path)

    return styled_message(
        "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
    )