File size: 1,061 Bytes
c887522
ed67886
c887522
 
 
272ff8c
 
 
 
 
c887522
 
 
 
 
 
 
 
 
 
 
 
272ff8c
ed67886
a9cd8a5
ed67886
c887522
ed67886
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import functools
import time

from datasets import load_dataset

from src.envs import TOKEN
from src.logger import get_logger

logger = get_logger(__name__)

class F1Data:
    def __init__(self, cp_ds_name: str, sub_ds_name: str, res_ds_name: str):
        self.cp_dataset_name = cp_ds_name
        self.submissions_dataset_name = sub_ds_name
        self.results_dataset_name = res_ds_name
        self.initialize()
    
    @functools.cached_property
    def code_problem_formulas(self) -> set[str]:
        return set(self.code_problems.keys())

    def initialize(self):
        logger.info("Initialize F1Data TOMEN='%s'", TOKEN)
        start_time = time.monotonic()
        cp_ds = load_dataset(self.cp_dataset_name, split="hard", token=TOKEN)
        logger.info("Loaded code-problems dataset from %s in %f sec", self.cp_dataset_name, time.monotonic() - start_time)
        self.code_problems: dict[str, str] = {r["formula_name"]: r["code_problem"]["problem_description"] for r in cp_ds}
        logger.info("Code problems info: %s", self.code_problems)