kostis-init's picture
Refactor evaluation logic: streamline user_eval.py, update evaluation script references, and clean up eval.py
70cc330
import time
import json
import tempfile
from huggingface_hub import HfApi, hf_hub_download
import os
import threading
from pathlib import Path
from src.config import DATASET_REPO_ID, DS_RESULTS_PATH
from src.user_eval import evaluate_submission
def run_evaluation(submission_path):
print(f"Starting evaluation for: {submission_path}")
main_eval(DATASET_REPO_ID, submission_path, DS_RESULTS_PATH)
print(f"Evaluation process complete for: {submission_path}", flush=True)
def start_background_evaluation(submission_path):
"""Start evaluation in a background thread."""
thread = threading.Thread(
target=lambda: run_evaluation(submission_path),
daemon=True
)
thread.start()
return True
# --- Main Evaluation Logic ---
def main_eval(
user_dataset_repo_id: str,
submission_path_in_dataset: str, # e.g., "submissions/uploaded_dir_name"
results_base_path_in_dataset: str # e.g., "results"
):
start_time = time.time()
# Infer submission name for logging and result path generation
submission_name_for_files = Path(submission_path_in_dataset).name
print(f"eval.py: Starting evaluation for submission: '{submission_name_for_files}'", flush=True)
print(f" User Data Repo: {user_dataset_repo_id}", flush=True)
print(f" Submission to download from: {submission_path_in_dataset}", flush=True)
print(f" Results to upload to: {results_base_path_in_dataset}/{submission_name_for_files}", flush=True)
hf_api = HfApi() # Will use HF_TOKEN from environment
# Create a top-level temporary directory for all operations for this eval run
with tempfile.TemporaryDirectory(prefix="eval_run_") as top_level_temp_dir_str:
top_level_temp_dir = Path(top_level_temp_dir_str)
local_submission_dir = top_level_temp_dir / "submissions"
local_result_dir_for_upload = top_level_temp_dir / "results"
os.makedirs(local_submission_dir, exist_ok=True)
os.makedirs(local_result_dir_for_upload, exist_ok=True)
# Path for the summary file within the local temporary result directory
summary_file_path = local_result_dir_for_upload / "summary.txt"
# Download submitted files from HF Dataset
print(f" Downloading submission files from '{submission_path_in_dataset}' to '{local_submission_dir}'...",
flush=True)
try:
# Download the relevant submission file
hf_hub_download(
repo_id=user_dataset_repo_id,
repo_type="dataset",
local_dir=local_submission_dir,
filename=f"{submission_path_in_dataset}/submission.jsonl",
)
print(f" Downloaded submission file successfully.", flush=True)
# Download the metadata file
hf_hub_download(
repo_id=user_dataset_repo_id,
repo_type="dataset",
local_dir=local_submission_dir,
filename=f"{submission_path_in_dataset}/metadata.json",
)
print(f" Downloaded metadata file successfully.", flush=True)
except Exception as e_download:
print(f" CRITICAL ERROR - Failed to download submission files: {e_download}", flush=True)
return 1
# load generated models from jsonl to memory
print(f" Loading generated models from '{local_submission_dir}'...", flush=True)
submitted_models = []
with open(os.path.join(local_submission_dir, submission_path_in_dataset, "submission.jsonl"), "r",
encoding="utf-8") as f:
for line in f:
try:
json_obj = json.loads(line)
submitted_models.append(json_obj)
except json.JSONDecodeError as e:
print(f" ERROR: Failed to parse JSON object from line: {line}. Error: {e}", flush=True)
# load metadata file
with open(os.path.join(local_submission_dir, submission_path_in_dataset, "metadata.json"), "r",
encoding="utf-8") as f:
metadata = json.load(f)
print(f" Loaded {len(submitted_models)} generated models.", flush=True)
# Writes stats to the summary file
evaluate_submission(submitted_models, summary_file_path, metadata["modelling_framework"], top_level_temp_dir)
# Upload the entire local_result_dir_for_upload to HF Dataset
result_path_on_hub = f"{results_base_path_in_dataset}/{submission_name_for_files}"
print(f" Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...",
flush=True)
try:
hf_api.upload_folder(
folder_path=str(local_result_dir_for_upload),
path_in_repo=result_path_on_hub,
repo_id=user_dataset_repo_id,
repo_type="dataset",
commit_message=f"Evaluation results for {submission_name_for_files}"
)
print(" Results uploaded successfully.", flush=True)
except Exception as e_upload:
print(f" CRITICAL ERROR: Failed to upload results: {e_upload}", flush=True)
# The summary.txt was written locally, but upload failed.
elapsed_time = time.time() - start_time
print(f"eval.py: Evaluation finished in {elapsed_time:.2f} seconds.", flush=True)