Refactor evaluation logic: streamline user_eval.py, update evaluation script references, and clean up eval.py
70cc330
import time | |
import json | |
import tempfile | |
from huggingface_hub import HfApi, hf_hub_download | |
import os | |
import threading | |
from pathlib import Path | |
from src.config import DATASET_REPO_ID, DS_RESULTS_PATH | |
from src.user_eval import evaluate_submission | |
def run_evaluation(submission_path): | |
print(f"Starting evaluation for: {submission_path}") | |
main_eval(DATASET_REPO_ID, submission_path, DS_RESULTS_PATH) | |
print(f"Evaluation process complete for: {submission_path}", flush=True) | |
def start_background_evaluation(submission_path): | |
"""Start evaluation in a background thread.""" | |
thread = threading.Thread( | |
target=lambda: run_evaluation(submission_path), | |
daemon=True | |
) | |
thread.start() | |
return True | |
# --- Main Evaluation Logic --- | |
def main_eval( | |
user_dataset_repo_id: str, | |
submission_path_in_dataset: str, # e.g., "submissions/uploaded_dir_name" | |
results_base_path_in_dataset: str # e.g., "results" | |
): | |
start_time = time.time() | |
# Infer submission name for logging and result path generation | |
submission_name_for_files = Path(submission_path_in_dataset).name | |
print(f"eval.py: Starting evaluation for submission: '{submission_name_for_files}'", flush=True) | |
print(f" User Data Repo: {user_dataset_repo_id}", flush=True) | |
print(f" Submission to download from: {submission_path_in_dataset}", flush=True) | |
print(f" Results to upload to: {results_base_path_in_dataset}/{submission_name_for_files}", flush=True) | |
hf_api = HfApi() # Will use HF_TOKEN from environment | |
# Create a top-level temporary directory for all operations for this eval run | |
with tempfile.TemporaryDirectory(prefix="eval_run_") as top_level_temp_dir_str: | |
top_level_temp_dir = Path(top_level_temp_dir_str) | |
local_submission_dir = top_level_temp_dir / "submissions" | |
local_result_dir_for_upload = top_level_temp_dir / "results" | |
os.makedirs(local_submission_dir, exist_ok=True) | |
os.makedirs(local_result_dir_for_upload, exist_ok=True) | |
# Path for the summary file within the local temporary result directory | |
summary_file_path = local_result_dir_for_upload / "summary.txt" | |
# Download submitted files from HF Dataset | |
print(f" Downloading submission files from '{submission_path_in_dataset}' to '{local_submission_dir}'...", | |
flush=True) | |
try: | |
# Download the relevant submission file | |
hf_hub_download( | |
repo_id=user_dataset_repo_id, | |
repo_type="dataset", | |
local_dir=local_submission_dir, | |
filename=f"{submission_path_in_dataset}/submission.jsonl", | |
) | |
print(f" Downloaded submission file successfully.", flush=True) | |
# Download the metadata file | |
hf_hub_download( | |
repo_id=user_dataset_repo_id, | |
repo_type="dataset", | |
local_dir=local_submission_dir, | |
filename=f"{submission_path_in_dataset}/metadata.json", | |
) | |
print(f" Downloaded metadata file successfully.", flush=True) | |
except Exception as e_download: | |
print(f" CRITICAL ERROR - Failed to download submission files: {e_download}", flush=True) | |
return 1 | |
# load generated models from jsonl to memory | |
print(f" Loading generated models from '{local_submission_dir}'...", flush=True) | |
submitted_models = [] | |
with open(os.path.join(local_submission_dir, submission_path_in_dataset, "submission.jsonl"), "r", | |
encoding="utf-8") as f: | |
for line in f: | |
try: | |
json_obj = json.loads(line) | |
submitted_models.append(json_obj) | |
except json.JSONDecodeError as e: | |
print(f" ERROR: Failed to parse JSON object from line: {line}. Error: {e}", flush=True) | |
# load metadata file | |
with open(os.path.join(local_submission_dir, submission_path_in_dataset, "metadata.json"), "r", | |
encoding="utf-8") as f: | |
metadata = json.load(f) | |
print(f" Loaded {len(submitted_models)} generated models.", flush=True) | |
# Writes stats to the summary file | |
evaluate_submission(submitted_models, summary_file_path, metadata["modelling_framework"], top_level_temp_dir) | |
# Upload the entire local_result_dir_for_upload to HF Dataset | |
result_path_on_hub = f"{results_base_path_in_dataset}/{submission_name_for_files}" | |
print(f" Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...", | |
flush=True) | |
try: | |
hf_api.upload_folder( | |
folder_path=str(local_result_dir_for_upload), | |
path_in_repo=result_path_on_hub, | |
repo_id=user_dataset_repo_id, | |
repo_type="dataset", | |
commit_message=f"Evaluation results for {submission_name_for_files}" | |
) | |
print(" Results uploaded successfully.", flush=True) | |
except Exception as e_upload: | |
print(f" CRITICAL ERROR: Failed to upload results: {e_upload}", flush=True) | |
# The summary.txt was written locally, but upload failed. | |
elapsed_time = time.time() - start_time | |
print(f"eval.py: Evaluation finished in {elapsed_time:.2f} seconds.", flush=True) | |