"""Utilities for interacting with the Hugging Face Hub.""" import io import json import os import shutil from pathlib import Path import pandas as pd from huggingface_hub import HfApi, hf_hub_download, list_repo_files from src.config import DATASET_REPO_ID, DS_RESULTS_PATH, DS_SUBMISSIONS_PATH, LDB_COLS # Initialize HfApi try: HF_API = HfApi() print(f"Successfully initialized HfApi. Will use dataset repo: {DATASET_REPO_ID}") except Exception as e: print(f"Failed to initialize HfApi: {e}") HF_API = None def load_leaderboard_data(): """Load leaderboard data from Hugging Face Dataset.""" if not HF_API: return pd.DataFrame(columns=LDB_COLS) leaderboard_entries = [] processed_result_dirs = set() try: # List all files in the results path of the dataset repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset") # Find all summary files summary_files = [ f for f in repo_files if f.endswith("summary.txt") and f.startswith(DS_RESULTS_PATH + "/") ] summary_files.sort(reverse=True) submissions = [ f for f in repo_files if f.endswith("submission.jsonl") and f.startswith(DS_SUBMISSIONS_PATH + "/") ] # for file_path in summary_files: for file_path in submissions: dir_name = Path(file_path).parent.name if dir_name in processed_result_dirs: continue processed_result_dirs.add(dir_name) entry = {LDB_COLS[0]: dir_name, LDB_COLS[1]: 'In Progress...', LDB_COLS[2]: 'In Progress...', LDB_COLS[3]: 'In Progress...', LDB_COLS[4]: 'In Progress...'} # check if summary file exists, otherwise skip if f"{DS_RESULTS_PATH}/{dir_name}/summary.txt" not in repo_files: leaderboard_entries.append(entry) continue # Download summary file local_summary_path = hf_hub_download( repo_id=DATASET_REPO_ID, filename=f"{DS_RESULTS_PATH}/{dir_name}/summary.txt", repo_type="dataset", local_dir=os.path.join("local_hf_downloads", dir_name), ) if Path(local_summary_path).exists(): with open(local_summary_path, "r", encoding="utf-8") as f: for line in f: if 'Execution perc' in line: entry[LDB_COLS[1]] = float(line.split(":")[1].strip().replace("%", "")) if 'Consistency perc' in line: entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", "")) if 'Final Solution Accuracy' in line: entry[LDB_COLS[3]] = float(line.split(":")[1].strip().replace("%", "")) if 'Total Submitted Models Parsed' in line: entry[LDB_COLS[4]] = int(line.split(":")[1].strip()) os.remove(local_summary_path) leaderboard_entries.append(entry) except Exception as e: print(f"Error loading leaderboard data: {e}") if not leaderboard_entries: return pd.DataFrame(columns=LDB_COLS) return pd.DataFrame(leaderboard_entries) def upload_submission(uploaded_file, dir_name, report_file, model_framework): """Upload submission to Hugging Face Dataset.""" if not HF_API: return False, "Hugging Face API not initialized" try: submission_path = f"{DS_SUBMISSIONS_PATH}/{dir_name}" HF_API.upload_file( path_or_fileobj=uploaded_file, path_in_repo=f"{submission_path}/submission.jsonl", repo_id=DATASET_REPO_ID, repo_type="dataset", commit_message=f"Upload submission: {dir_name}" ) if report_file: HF_API.upload_file( path_or_fileobj=report_file, path_in_repo=f"{submission_path}/report.pdf", repo_id=DATASET_REPO_ID, repo_type="dataset", commit_message=f"Upload report for submission: {dir_name}" ) # create a file for metadata metadata = { "submission_name": dir_name, "modelling_framework": model_framework, } HF_API.upload_file( path_or_fileobj=io.BytesIO(json.dumps(metadata, indent=4).encode('utf-8')), path_in_repo=f"{submission_path}/metadata.json", repo_id=DATASET_REPO_ID, repo_type="dataset", commit_message=f"Upload metadata for submission: {dir_name}" ) return True, submission_path except Exception as e: return False, f"Upload error: {str(e)}" def check_name_exists(submission_name): if not HF_API: return False try: repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset") for file_path in repo_files: if file_path.startswith(f"{DS_SUBMISSIONS_PATH}/{submission_name}"): return True except Exception as e: print(f"Error checking name existence: {e}") return False