|
import time |
|
import json |
|
import tempfile |
|
|
|
from huggingface_hub import HfApi, hf_hub_download |
|
import os |
|
import threading |
|
from pathlib import Path |
|
|
|
from src.config import DATASET_REPO_ID, DS_RESULTS_PATH |
|
from src.user_eval import evaluate_submission |
|
|
|
|
|
def run_evaluation(submission_path, dataset_version): |
|
print(f"Starting evaluation for: {submission_path} (version: {dataset_version})") |
|
main_eval(submission_path, dataset_version) |
|
print(f"Evaluation process complete for: {submission_path}", flush=True) |
|
|
|
|
|
def start_background_evaluation(submission_path, dataset_version): |
|
"""Start evaluation in a background thread.""" |
|
thread = threading.Thread( |
|
target=lambda: run_evaluation(submission_path, dataset_version), |
|
daemon=True |
|
) |
|
thread.start() |
|
return True |
|
|
|
|
|
|
|
def main_eval( |
|
submission_path_in_dataset: str, |
|
dataset_version: str |
|
): |
|
start_time = time.time() |
|
|
|
submission_name_for_files = Path(submission_path_in_dataset).name |
|
|
|
print(f"eval.py: Starting evaluation for submission: '{submission_name_for_files}'", flush=True) |
|
print(f" User Data Repo: {DATASET_REPO_ID}", flush=True) |
|
print(f" Submission to download from: {submission_path_in_dataset}", flush=True) |
|
print(f" Results to upload to: {DS_RESULTS_PATH}/{dataset_version}/{submission_name_for_files}", flush=True) |
|
|
|
hf_api = HfApi() |
|
|
|
|
|
with tempfile.TemporaryDirectory(prefix="eval_run_") as top_level_temp_dir_str: |
|
top_level_temp_dir = Path(top_level_temp_dir_str) |
|
local_submission_dir = top_level_temp_dir / "submissions" |
|
local_result_dir_for_upload = top_level_temp_dir / "results" |
|
|
|
os.makedirs(local_submission_dir, exist_ok=True) |
|
os.makedirs(local_result_dir_for_upload, exist_ok=True) |
|
|
|
|
|
summary_file_path = local_result_dir_for_upload / "summary.txt" |
|
|
|
|
|
print(f" Downloading submission files from '{submission_path_in_dataset}' to '{local_submission_dir}'...", |
|
flush=True) |
|
try: |
|
|
|
hf_hub_download( |
|
repo_id=DATASET_REPO_ID, |
|
repo_type="dataset", |
|
local_dir=local_submission_dir, |
|
filename=f"{submission_path_in_dataset}/submission.jsonl", |
|
) |
|
print(f" Downloaded submission file successfully.", flush=True) |
|
|
|
hf_hub_download( |
|
repo_id=DATASET_REPO_ID, |
|
repo_type="dataset", |
|
local_dir=local_submission_dir, |
|
filename=f"{submission_path_in_dataset}/metadata.json", |
|
) |
|
print(f" Downloaded metadata file successfully.", flush=True) |
|
|
|
except Exception as e_download: |
|
print(f" CRITICAL ERROR - Failed to download submission files: {e_download}", flush=True) |
|
return 1 |
|
|
|
|
|
print(f" Loading generated models from '{local_submission_dir}'...", flush=True) |
|
submitted_models = [] |
|
with open(os.path.join(local_submission_dir, submission_path_in_dataset, "submission.jsonl"), "r", |
|
encoding="utf-8") as f: |
|
for line in f: |
|
try: |
|
json_obj = json.loads(line) |
|
submitted_models.append(json_obj) |
|
except json.JSONDecodeError as e: |
|
print(f" ERROR: Failed to parse JSON object from line: {line}. Error: {e}", flush=True) |
|
|
|
|
|
with open(os.path.join(local_submission_dir, submission_path_in_dataset, "metadata.json"), "r", |
|
encoding="utf-8") as f: |
|
metadata = json.load(f) |
|
|
|
print(f" Loaded {len(submitted_models)} generated models.", flush=True) |
|
|
|
|
|
evaluate_submission(submitted_models, summary_file_path, metadata["modelling_framework"], top_level_temp_dir, dataset_version) |
|
|
|
|
|
result_path_on_hub = f"{DS_RESULTS_PATH}/{dataset_version}/{submission_name_for_files}" |
|
print(f" Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...", |
|
flush=True) |
|
try: |
|
hf_api.upload_folder( |
|
folder_path=str(local_result_dir_for_upload), |
|
path_in_repo=result_path_on_hub, |
|
repo_id=DATASET_REPO_ID, |
|
repo_type="dataset", |
|
commit_message=f"Evaluation results for {submission_name_for_files}" |
|
) |
|
print(" Results uploaded successfully.", flush=True) |
|
except Exception as e_upload: |
|
print(f" CRITICAL ERROR: Failed to upload results: {e_upload}", flush=True) |
|
|
|
|
|
elapsed_time = time.time() - start_time |
|
print(f"eval.py: Evaluation finished in {elapsed_time:.2f} seconds.", flush=True) |
|
|