Spaces:

kostis-init
/

CP-Bench-Leaderboard

Running

File size: 4,573 Bytes

"""Utilities for interacting with the Hugging Face Hub."""

import os
import shutil
from pathlib import Path
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download, list_repo_files
from huggingface_hub.utils import RepositoryNotFoundError, HFValidationError

from src.config import DATASET_REPO_ID, DS_RESULTS_PATH, DS_SUBMISSIONS_PATH, LDB_COLS

# Initialize HfApi
try:
    HF_API = HfApi()
    print(f"Successfully initialized HfApi. Will use dataset repo: {DATASET_REPO_ID}")
except Exception as e:
    print(f"Failed to initialize HfApi: {e}")
    HF_API = None


def load_leaderboard_data():
    """Load leaderboard data from Hugging Face Dataset."""
    if not HF_API:
        return pd.DataFrame(columns=LDB_COLS)

    leaderboard_entries = []
    processed_result_dirs = set()
    
    try:
        # List all files in the results path of the dataset
        repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
        
        # Find all summary files
        summary_files = [
            f for f in repo_files 
            if f.endswith("summary.txt") and f.startswith(DS_RESULTS_PATH + "/")
        ]
        summary_files.sort(reverse=True)
        
        for file_path in summary_files:
            dir_name = Path(file_path).parent.name
            if dir_name in processed_result_dirs:
                continue
                
            processed_result_dirs.add(dir_name)
            entry = {LDB_COLS[0]: dir_name, LDB_COLS[1]: 'N/A', LDB_COLS[2]: 'N/A', LDB_COLS[3]: 'N/A', LDB_COLS[4]: 0}
            
            # Download summary file
            temp_dir = os.path.join("temp_hf_downloads", dir_name)
            local_summary_path = hf_hub_download(
                repo_id=DATASET_REPO_ID,
                filename=file_path,
                repo_type="dataset",
                local_dir=temp_dir,
            )
            
            # Count files
            files_in_result_dir = [
                f for f in repo_files
                if f.startswith(f"{DS_RESULTS_PATH}/{dir_name}/") and not f.endswith("/")
            ]
            
            # Parse score from summary
            if Path(local_summary_path).exists():
                with open(local_summary_path, "r", encoding="utf-8") as f:
                    for line in f:
                        if 'Execution perc' in line:
                            entry[LDB_COLS[1]] = float(line.split(":")[1].strip().replace("%", ""))
                        if 'Consistency perc' in line:
                            entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", ""))
                        if 'Final Solution Accuracy' in line:
                            entry[LDB_COLS[3]] = float(line.split(":")[1].strip().replace("%", ""))
                        if 'Total Submitted Models Parsed' in line:
                            entry[LDB_COLS[4]] = int(line.split(":")[1].strip())
                os.remove(local_summary_path)
                
            leaderboard_entries.append(entry)
            
    except Exception as e:
        print(f"Error loading leaderboard data: {e}")
        
    finally:
        # Clean up
        if Path("temp_hf_downloads").exists():
            shutil.rmtree("temp_hf_downloads", ignore_errors=True)
            
    if not leaderboard_entries:
        return pd.DataFrame(columns=LDB_COLS)
        
    return pd.DataFrame(leaderboard_entries)


def upload_submission(uploaded_file, dir_name):
    """Upload submission to Hugging Face Dataset."""
    if not HF_API:
        return False, "Hugging Face API not initialized"

    try:
        submission_path = f"{DS_SUBMISSIONS_PATH}/{dir_name}"

        # file_name = os.path.basename(uploaded_file.name)
        HF_API.upload_file(
            path_or_fileobj=uploaded_file,
            path_in_repo=f"{submission_path}/submission.jsonl",
            repo_id=DATASET_REPO_ID,
            repo_type="dataset",
            commit_message=f"Upload submission: {dir_name}"
        )
        
        return True, submission_path
    except Exception as e:
        return False, f"Upload error: {str(e)}"


def check_name_exists(submission_name):
    if not HF_API:
        return False

    try:
        repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
        for file_path in repo_files:
            if file_path.startswith(f"{DS_SUBMISSIONS_PATH}/{submission_name}"):
                return True
    except Exception as e:
        print(f"Error checking name existence: {e}")

    return False