Spaces:

kostis-init
/

CP-Bench-Leaderboard

Running

File size: 5,428 Bytes

180f9fe
 
 
e67d561
5e53d23
 
 
 
 
70cc330
 
180f9fe
 
 
 
ecfa9d2
15bdc2b
180f9fe
 
 
 
 
 
 
 
 
 
 
5e53d23
180f9fe
ecfa9d2
180f9fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70cc330
180f9fe
 
 
21ed616
 
180f9fe
 
 
21ed616
180f9fe
21ed616
308cfba
 
 
 
 
 
 
 
180f9fe
 
 
 
 
21ed616
 
 
70cc330
 
21ed616
 
 
 
 
 
e67d561
 
70cc330
 
e67d561
 
21ed616
 
70cc330
 
e67d561
70cc330
180f9fe

import time
import json
import tempfile

from huggingface_hub import HfApi, hf_hub_download
import os
import threading
from pathlib import Path

from src.config import DATASET_REPO_ID, DS_RESULTS_PATH
from src.user_eval import evaluate_submission


def run_evaluation(submission_path):
    print(f"Starting evaluation for: {submission_path}")
    main_eval(DATASET_REPO_ID, submission_path, DS_RESULTS_PATH)
    print(f"Evaluation process complete for: {submission_path}", flush=True)


def start_background_evaluation(submission_path):
    """Start evaluation in a background thread."""
    thread = threading.Thread(
        target=lambda: run_evaluation(submission_path),
        daemon=True
    )
    thread.start()
    return True


# --- Main Evaluation Logic ---
def main_eval(
        user_dataset_repo_id: str,
        submission_path_in_dataset: str,  # e.g., "submissions/uploaded_dir_name"
        results_base_path_in_dataset: str  # e.g., "results"
):
    start_time = time.time()
    # Infer submission name for logging and result path generation
    submission_name_for_files = Path(submission_path_in_dataset).name

    print(f"eval.py: Starting evaluation for submission: '{submission_name_for_files}'", flush=True)
    print(f"  User Data Repo: {user_dataset_repo_id}", flush=True)
    print(f"  Submission to download from: {submission_path_in_dataset}", flush=True)
    print(f"  Results to upload to: {results_base_path_in_dataset}/{submission_name_for_files}", flush=True)

    hf_api = HfApi()  # Will use HF_TOKEN from environment

    # Create a top-level temporary directory for all operations for this eval run
    with tempfile.TemporaryDirectory(prefix="eval_run_") as top_level_temp_dir_str:
        top_level_temp_dir = Path(top_level_temp_dir_str)
        local_submission_dir = top_level_temp_dir / "submissions"
        local_result_dir_for_upload = top_level_temp_dir / "results"

        os.makedirs(local_submission_dir, exist_ok=True)
        os.makedirs(local_result_dir_for_upload, exist_ok=True)

        # Path for the summary file within the local temporary result directory
        summary_file_path = local_result_dir_for_upload / "summary.txt"

        # Download submitted files from HF Dataset
        print(f"  Downloading submission files from '{submission_path_in_dataset}' to '{local_submission_dir}'...",
              flush=True)
        try:
            # Download the relevant submission file
            hf_hub_download(
                repo_id=user_dataset_repo_id,
                repo_type="dataset",
                local_dir=local_submission_dir,
                filename=f"{submission_path_in_dataset}/submission.jsonl",
            )
            print(f"  Downloaded submission file successfully.", flush=True)
            # Download the metadata file
            hf_hub_download(
                repo_id=user_dataset_repo_id,
                repo_type="dataset",
                local_dir=local_submission_dir,
                filename=f"{submission_path_in_dataset}/metadata.json",
            )
            print(f"  Downloaded metadata file successfully.", flush=True)

        except Exception as e_download:
            print(f"  CRITICAL ERROR - Failed to download submission files: {e_download}", flush=True)
            return 1

        # load generated models from jsonl to memory
        print(f"  Loading generated models from '{local_submission_dir}'...", flush=True)
        submitted_models = []
        with open(os.path.join(local_submission_dir, submission_path_in_dataset, "submission.jsonl"), "r",
                  encoding="utf-8") as f:
            for line in f:
                try:
                    json_obj = json.loads(line)
                    submitted_models.append(json_obj)
                except json.JSONDecodeError as e:
                    print(f"  ERROR: Failed to parse JSON object from line: {line}. Error: {e}", flush=True)

        # load metadata file
        with open(os.path.join(local_submission_dir, submission_path_in_dataset, "metadata.json"), "r",
                  encoding="utf-8") as f:
            metadata = json.load(f)

        print(f"  Loaded {len(submitted_models)} generated models.", flush=True)

        # Writes stats to the summary file
        evaluate_submission(submitted_models, summary_file_path, metadata["modelling_framework"], top_level_temp_dir)

        # Upload the entire local_result_dir_for_upload to HF Dataset
        result_path_on_hub = f"{results_base_path_in_dataset}/{submission_name_for_files}"
        print(f"  Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...",
              flush=True)
        try:
            hf_api.upload_folder(
                folder_path=str(local_result_dir_for_upload),
                path_in_repo=result_path_on_hub,
                repo_id=user_dataset_repo_id,
                repo_type="dataset",
                commit_message=f"Evaluation results for {submission_name_for_files}"
            )
            print("  Results uploaded successfully.", flush=True)
        except Exception as e_upload:
            print(f"  CRITICAL ERROR: Failed to upload results: {e_upload}", flush=True)
            # The summary.txt was written locally, but upload failed.

    elapsed_time = time.time() - start_time
    print(f"eval.py: Evaluation finished in {elapsed_time:.2f} seconds.", flush=True)