Spaces:
Sleeping
Sleeping
# src/leaderboard.py | |
import pandas as pd | |
from datasets import Dataset, load_dataset | |
from huggingface_hub import hf_hub_download, upload_file | |
import json | |
import datetime | |
from typing import Dict, List, Optional | |
import os | |
from config import LEADERBOARD_DATASET, HF_TOKEN | |
from src.utils import format_model_name, create_submission_id | |
def initialize_leaderboard() -> Dataset: | |
"""Initialize empty leaderboard dataset.""" | |
empty_data = { | |
'submission_id': [], | |
'model_path': [], | |
'model_display_name': [], | |
'author': [], | |
'submission_date': [], | |
'bleu': [], | |
'chrf': [], | |
'quality_score': [], | |
'cer': [], | |
'wer': [], | |
'rouge1': [], | |
'rouge2': [], | |
'rougeL': [], | |
'len_ratio': [], | |
'detailed_metrics': [], | |
'evaluation_samples': [], | |
'model_type': [] | |
} | |
return Dataset.from_dict(empty_data) | |
def load_leaderboard() -> pd.DataFrame: | |
"""Load current leaderboard from HuggingFace dataset.""" | |
try: | |
dataset = load_dataset(LEADERBOARD_DATASET, split='train') | |
df = dataset.to_pandas() | |
# Ensure all required columns exist | |
required_columns = [ | |
'submission_id', 'model_path', 'model_display_name', 'author', | |
'submission_date', 'bleu', 'chrf', 'quality_score', 'cer', 'wer', | |
'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'detailed_metrics', | |
'evaluation_samples', 'model_type' | |
] | |
for col in required_columns: | |
if col not in df.columns: | |
if col in ['bleu', 'chrf', 'quality_score', 'cer', 'wer', 'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'evaluation_samples']: | |
df[col] = 0.0 | |
else: | |
df[col] = '' | |
return df | |
except Exception as e: | |
print(f"Error loading leaderboard: {e}") | |
print("Initializing empty leaderboard...") | |
return initialize_leaderboard().to_pandas() | |
def save_leaderboard(df: pd.DataFrame) -> bool: | |
"""Save leaderboard back to HuggingFace dataset.""" | |
try: | |
# Convert DataFrame to Dataset | |
dataset = Dataset.from_pandas(df) | |
# Push to HuggingFace Hub | |
dataset.push_to_hub( | |
LEADERBOARD_DATASET, | |
token=HF_TOKEN, | |
commit_message=f"Update leaderboard - {datetime.datetime.now().isoformat()}" | |
) | |
print("Leaderboard saved successfully!") | |
return True | |
except Exception as e: | |
print(f"Error saving leaderboard: {e}") | |
return False | |
def add_model_results( | |
model_path: str, | |
author: str, | |
metrics: Dict, | |
detailed_metrics: Dict, | |
evaluation_samples: int, | |
model_type: str | |
) -> pd.DataFrame: | |
"""Add new model results to leaderboard.""" | |
# Load current leaderboard | |
df = load_leaderboard() | |
# Check if model already exists | |
existing = df[df['model_path'] == model_path] | |
if not existing.empty: | |
print(f"Model {model_path} already exists. Updating with new results.") | |
# Remove existing entry | |
df = df[df['model_path'] != model_path] | |
# Create new entry | |
new_entry = { | |
'submission_id': create_submission_id(), | |
'model_path': model_path, | |
'model_display_name': format_model_name(model_path), | |
'author': author, | |
'submission_date': datetime.datetime.now().isoformat(), | |
'bleu': metrics.get('bleu', 0.0), | |
'chrf': metrics.get('chrf', 0.0), | |
'quality_score': metrics.get('quality_score', 0.0), | |
'cer': metrics.get('cer', 0.0), | |
'wer': metrics.get('wer', 0.0), | |
'rouge1': metrics.get('rouge1', 0.0), | |
'rouge2': metrics.get('rouge2', 0.0), | |
'rougeL': metrics.get('rougeL', 0.0), | |
'len_ratio': metrics.get('len_ratio', 0.0), | |
'detailed_metrics': json.dumps(detailed_metrics), | |
'evaluation_samples': evaluation_samples, | |
'model_type': model_type | |
} | |
# Add to dataframe | |
new_df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True) | |
# Sort by quality score descending | |
new_df = new_df.sort_values('quality_score', ascending=False).reset_index(drop=True) | |
# Save updated leaderboard | |
save_leaderboard(new_df) | |
return new_df | |
def get_leaderboard_summary(df: pd.DataFrame) -> Dict: | |
"""Get summary statistics for the leaderboard.""" | |
if df.empty: | |
return { | |
'total_models': 0, | |
'avg_quality_score': 0.0, | |
'best_model': 'None', | |
'latest_submission': 'None' | |
} | |
return { | |
'total_models': len(df), | |
'avg_quality_score': df['quality_score'].mean(), | |
'best_model': df.iloc[0]['model_display_name'] if not df.empty else 'None', | |
'latest_submission': df['submission_date'].max() if not df.empty else 'None' | |
} | |
def get_top_models(df: pd.DataFrame, n: int = 10) -> pd.DataFrame: | |
"""Get top N models by quality score.""" | |
return df.nlargest(n, 'quality_score') | |
def search_models(df: pd.DataFrame, query: str) -> pd.DataFrame: | |
"""Search models by name or author.""" | |
if not query: | |
return df | |
query = query.lower() | |
mask = ( | |
df['model_display_name'].str.lower().str.contains(query, na=False) | | |
df['author'].str.lower().str.contains(query, na=False) | | |
df['model_path'].str.lower().str.contains(query, na=False) | |
) | |
return df[mask] | |
def export_results(df: pd.DataFrame, format: str = 'csv') -> str: | |
"""Export leaderboard results in specified format.""" | |
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") | |
if format == 'csv': | |
filename = f"salt_leaderboard_{timestamp}.csv" | |
df.to_csv(filename, index=False) | |
return filename | |
elif format == 'json': | |
filename = f"salt_leaderboard_{timestamp}.json" | |
df.to_json(filename, orient='records', indent=2) | |
return filename | |
else: | |
raise ValueError(f"Unsupported format: {format}") |