File size: 2,727 Bytes
0061e14
 
 
c887522
0061e14
c887522
0061e14
c887522
 
 
053a0cd
c887522
 
 
 
 
 
 
053a0cd
 
c887522
 
 
0469aa2
0061e14
053a0cd
c887522
 
0061e14
c887522
 
0061e14
 
c887522
 
 
0061e14
c887522
 
 
 
 
 
 
0061e14
c887522
0061e14
 
c887522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0061e14
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import json
import os
from datetime import datetime, timezone
import time

import pandas as pd

from src.datamodel.data import F1Data
from src.display.formatting import styled_error, styled_message, styled_warning
from src.envs import API, SUBMISSIONS_REPO, TOKEN
from src.logger import get_logger
# from src.submission.check_validity import (
#     already_submitted_models,
#     check_model_card,
#     get_model_size,
#     is_model_on_hub,
# )

logger = get_logger(__name__)

def add_new_solutions(
    lbdb: F1Data,
    submission_path: str,
    submitter: str,
):
    logger.info("ADD SUBMISSION! submitter %s path %s", submitter, submission_path)
    if not submitter:
        return styled_error("Please fill submitter name")

    if not submission_path:
        return styled_error("Please upload JSONL solutions file")

    try:
        ds = pd.read_json(submission_path, lines=True)
    except Exception as e:
        return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")

    submitted_formulas = set(ds["formula_name"])
    if submitted_formulas != lbdb.code_problem_formulas:
        missing = lbdb.code_problem_formulas - submitted_formulas
        unknown = submitted_formulas - lbdb.code_problem_formulas
        return styled_error(f"Mismatched formula names: missing {len(missing)} unknown {len(unknown)}")
    if len(ds) > len(lbdb.code_problem_formulas):
        return styled_error("Duplicate formula solutions exist in uploaded file")

    submission_id = datetime.now().strftime("%Y%m%d%H%M%S")

    # Seems good, creating the eval
    print(f"Adding new submission {submission_id} from {submitter}")
    submission_ts = time.time_ns()

    def add_info(row):
        row["submitter"] = submitter
        row["submission_id"] = submission_id
        row["submission_ts"] = submission_ts

    ds = ds.map(add_info)

    ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
    # print("Creating eval file")
    # OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
    # os.makedirs(OUT_DIR, exist_ok=True)
    # out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"

    # with open(out_path, "w") as f:
    #     f.write(json.dumps(eval_entry))

    # print("Uploading eval file")
    # API.upload_file(
    #     path_or_fileobj=out_path,
    #     path_in_repo=out_path.split("eval-queue/")[1],
    #     repo_id=QUEUE_REPO,
    #     repo_type="dataset",
    #     commit_message=f"Add {model} to eval queue",
    # )

    # # Remove the local file
    # os.remove(out_path)

    return styled_message(
        "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
    )