File size: 3,564 Bytes
c887522
416ebf1
0061e14
c887522
416ebf1
 
0061e14
c887522
416ebf1
61885ca
416ebf1
053a0cd
48e85fc
80fb2c0
053a0cd
 
80fb2c0
5048713
 
 
 
 
80fb2c0
b74992f
61885ca
5048713
b74992f
 
5048713
b74992f
416ebf1
5048713
b74992f
 
80fb2c0
 
 
5048713
80fb2c0
5048713
a3d4fda
80fb2c0
c887522
 
80fb2c0
61885ca
 
44a4b77
5048713
6446f53
0061e14
5048713
 
 
61885ca
5048713
 
 
6446f53
61885ca
 
0061e14
 
61885ca
5048713
 
 
 
 
 
 
0061e14
61885ca
0061e14
5048713
 
c887522
 
 
5f7ca36
 
 
 
 
 
 
d1bb4d2
 
5f7ca36
c887522
61885ca
814f111
 
 
 
 
0061e14
 
34a2915
 
0061e14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import time
from datetime import datetime, timezone

import pandas as pd
from datasets import Dataset
from pandas.api.types import is_integer_dtype

from src.datamodel.data import F1Data
from src.display.formatting import styled_error, styled_message
from src.display.utils import ModelType
from src.envs import SUBMISSIONS_REPO
from src.logger import get_logger
from src.validation.validate import is_submission_file_valid, is_valid

logger = get_logger(__name__)


def _validate_all_submissions_present(
    lbdb: F1Data,
    pd_ds: pd.DataFrame,
):
    logger.info(f"Validating DS size {len(pd_ds)} columns {pd_ds.columns} set {set(pd_ds.columns)}")
    expected_cols = ["problem_id", "solution"]

    if set(pd_ds.columns) != set(expected_cols):
        return ValueError(f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}")

    if not is_integer_dtype(pd_ds["problem_id"]):
        return ValueError("problem_id must be str convertible to int")

    if any(type(v) is not str for v in pd_ds["solution"]):
        return ValueError("solution must be of type str")

    submitted_ids = set(pd_ds.problem_id.astype(str))
    if submitted_ids != lbdb.code_problem_ids:
        missing = lbdb.code_problem_ids - submitted_ids
        unknown = submitted_ids - lbdb.code_problem_ids
        raise ValueError(f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown")
    if len(pd_ds) > len(lbdb.code_problem_ids):
        return ValueError("Duplicate problem IDs exist in uploaded file")


def add_new_solutions(
    lbdb: F1Data,
    system_name: str,
    org: str,
    sys_type: str,
    submission_path: str,
    ensure_all_present: bool = False,
    is_warmup_dataset: bool,
):
    logger.info(
        f"Adding new submission! {system_name=}, {org=}, {sys_type=} and {submission_path=}",
    )

    # Double-checking.
    for val in [system_name, org, sys_type]:
        assert is_valid(val)
    assert is_submission_file_valid(submission_path, is_warmup_dataset=is_warmup_dataset)

    sys_type = ModelType.from_str(sys_type).name

    try:
        submission_df = pd.read_json(submission_path, lines=True)
        if ensure_all_present:
            _validate_all_submissions_present(lbdb=lbdb, pd_ds=submission_df)
    except Exception:
        logger.warning("Failed to parse submission DF!", exc_info=True)
        return styled_error(
            "An error occurred. Please try again later."
        )  # Use same message as external error. Avoid infoleak.

    submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"

    # Seems good, creating the eval.
    logger.info(f"Adding new submission: {submission_id}")
    submission_ts = time.time_ns()

    def add_info(row):
        return {
            **row,
            "system_name": system_name,
            "organization": org,
            "system_type": sys_type,
            "submission_id": submission_id,
            "submission_ts": submission_ts,
            "evaluation_id": float("nan"),  # This will be set later when the evaluation is launched in the backend
            "evaluation_start_ts": "",  # This will be set when the evaluation starts
        }

    ds = Dataset.from_pandas(submission_df).map(add_info)
    ds.push_to_hub(
        SUBMISSIONS_REPO,
        submission_id,
        private=True,
    )

    return styled_message(
        "Your request has been submitted to the evaluation queue!\n"
        + "Results may take up to 24 hours to be processed and shown in the leaderboard."
    )