import json import os from datetime import datetime, timezone import time from datasets import Dataset import pandas as pd from src.datamodel.data import F1Data from src.display.formatting import styled_error, styled_message, styled_warning from src.envs import API, SUBMISSIONS_REPO, TOKEN from src.logger import get_logger # from src.submission.check_validity import ( # already_submitted_models, # check_model_card, # get_model_size, # is_model_on_hub, # ) logger = get_logger(__name__) def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None: logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns)) if set(pd_ds.columns) != set(["formula_name", "solution"]): return "Bad format of submission" if any(type(v) != str for v in pd_ds["formula_name"]): return "Not all formula_name values are of type str" if any(type(v) != str for v in pd_ds["solution"]): return "Not all solution values are of type str" submitted_formulas = set(pd_ds["formula_name"]) if submitted_formulas != lbdb.code_problem_formulas: missing = lbdb.code_problem_formulas - submitted_formulas unknown = submitted_formulas - lbdb.code_problem_formulas return f"Mismatched formula names: missing {len(missing)} unknown {len(unknown)}" if len(pd_ds) > len(lbdb.code_problem_formulas): return "Duplicate formula solutions exist in uploaded file" return None def add_new_solutions( lbdb: F1Data, submitter: str, submission_path: str, ): logger.info("ADD SUBMISSION! submitter %s path %s", submitter, submission_path) if not submitter: return styled_error("Please fill submitter name") if not submission_path: return styled_error("Please upload JSONL solutions file") try: pd_ds = pd.read_json(submission_path, lines=True) except Exception as e: return styled_error(f"Cannot read uploaded JSONL file: {str(e)}") validation_error = validate_submission(lbdb, pd_ds) if validation_error: return styled_error(validation_error) submission_id = datetime.now().strftime("%Y%m%d%H%M%S") # Seems good, creating the eval print(f"Adding new submission {submission_id} from {submitter}") submission_ts = time.time_ns() def add_info(row): row["submitter"] = submitter row["submission_id"] = submission_id row["submission_ts"] = submission_ts ds = Dataset.from_pandas(pd_ds).map(add_info) ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True) # print("Creating eval file") # OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}" # os.makedirs(OUT_DIR, exist_ok=True) # out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json" # with open(out_path, "w") as f: # f.write(json.dumps(eval_entry)) # print("Uploading eval file") # API.upload_file( # path_or_fileobj=out_path, # path_in_repo=out_path.split("eval-queue/")[1], # repo_id=QUEUE_REPO, # repo_type="dataset", # commit_message=f"Add {model} to eval queue", # ) # # Remove the local file # os.remove(out_path) return styled_message( "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list." )