import time import json import tempfile from huggingface_hub import HfApi, hf_hub_download import os import threading from pathlib import Path from src.config import DATASET_REPO_ID, DS_RESULTS_PATH from src.user_eval import evaluate_submission def run_evaluation(submission_path): print(f"Starting evaluation for: {submission_path}") main_eval(DATASET_REPO_ID, submission_path, DS_RESULTS_PATH) print(f"Evaluation process complete for: {submission_path}", flush=True) def start_background_evaluation(submission_path): """Start evaluation in a background thread.""" thread = threading.Thread( target=lambda: run_evaluation(submission_path), daemon=True ) thread.start() return True # --- Main Evaluation Logic --- def main_eval( user_dataset_repo_id: str, submission_path_in_dataset: str, # e.g., "submissions/uploaded_dir_name" results_base_path_in_dataset: str # e.g., "results" ): start_time = time.time() # Infer submission name for logging and result path generation submission_name_for_files = Path(submission_path_in_dataset).name print(f"eval.py: Starting evaluation for submission: '{submission_name_for_files}'", flush=True) print(f" User Data Repo: {user_dataset_repo_id}", flush=True) print(f" Submission to download from: {submission_path_in_dataset}", flush=True) print(f" Results to upload to: {results_base_path_in_dataset}/{submission_name_for_files}", flush=True) hf_api = HfApi() # Will use HF_TOKEN from environment # Create a top-level temporary directory for all operations for this eval run with tempfile.TemporaryDirectory(prefix="eval_run_") as top_level_temp_dir_str: top_level_temp_dir = Path(top_level_temp_dir_str) local_submission_dir = top_level_temp_dir / "submissions" local_result_dir_for_upload = top_level_temp_dir / "results" os.makedirs(local_submission_dir, exist_ok=True) os.makedirs(local_result_dir_for_upload, exist_ok=True) # Path for the summary file within the local temporary result directory summary_file_path = local_result_dir_for_upload / "summary.txt" # Download submitted files from HF Dataset print(f" Downloading submission files from '{submission_path_in_dataset}' to '{local_submission_dir}'...", flush=True) try: # Download the relevant submission file hf_hub_download( repo_id=user_dataset_repo_id, repo_type="dataset", local_dir=local_submission_dir, filename=f"{submission_path_in_dataset}/submission.jsonl", ) print(f" Downloaded submission file successfully.", flush=True) # Download the metadata file hf_hub_download( repo_id=user_dataset_repo_id, repo_type="dataset", local_dir=local_submission_dir, filename=f"{submission_path_in_dataset}/metadata.json", ) print(f" Downloaded metadata file successfully.", flush=True) except Exception as e_download: print(f" CRITICAL ERROR - Failed to download submission files: {e_download}", flush=True) return 1 # load generated models from jsonl to memory print(f" Loading generated models from '{local_submission_dir}'...", flush=True) submitted_models = [] with open(os.path.join(local_submission_dir, submission_path_in_dataset, "submission.jsonl"), "r", encoding="utf-8") as f: for line in f: try: json_obj = json.loads(line) submitted_models.append(json_obj) except json.JSONDecodeError as e: print(f" ERROR: Failed to parse JSON object from line: {line}. Error: {e}", flush=True) # load metadata file with open(os.path.join(local_submission_dir, submission_path_in_dataset, "metadata.json"), "r", encoding="utf-8") as f: metadata = json.load(f) print(f" Loaded {len(submitted_models)} generated models.", flush=True) # Writes stats to the summary file evaluate_submission(submitted_models, summary_file_path, metadata["modelling_framework"], top_level_temp_dir) # Upload the entire local_result_dir_for_upload to HF Dataset result_path_on_hub = f"{results_base_path_in_dataset}/{submission_name_for_files}" print(f" Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...", flush=True) try: hf_api.upload_folder( folder_path=str(local_result_dir_for_upload), path_in_repo=result_path_on_hub, repo_id=user_dataset_repo_id, repo_type="dataset", commit_message=f"Evaluation results for {submission_name_for_files}" ) print(" Results uploaded successfully.", flush=True) except Exception as e_upload: print(f" CRITICAL ERROR: Failed to upload results: {e_upload}", flush=True) # The summary.txt was written locally, but upload failed. elapsed_time = time.time() - start_time print(f"eval.py: Evaluation finished in {elapsed_time:.2f} seconds.", flush=True)