leaderboard / src /leaderboard.py
akera's picture
Create leaderboard.py
52616c5 verified
raw
history blame
6.1 kB
# src/leaderboard.py
import pandas as pd
from datasets import Dataset, load_dataset
from huggingface_hub import hf_hub_download, upload_file
import json
import datetime
from typing import Dict, List, Optional
import os
from config import LEADERBOARD_DATASET, HF_TOKEN
from src.utils import format_model_name, create_submission_id
def initialize_leaderboard() -> Dataset:
"""Initialize empty leaderboard dataset."""
empty_data = {
'submission_id': [],
'model_path': [],
'model_display_name': [],
'author': [],
'submission_date': [],
'bleu': [],
'chrf': [],
'quality_score': [],
'cer': [],
'wer': [],
'rouge1': [],
'rouge2': [],
'rougeL': [],
'len_ratio': [],
'detailed_metrics': [],
'evaluation_samples': [],
'model_type': []
}
return Dataset.from_dict(empty_data)
def load_leaderboard() -> pd.DataFrame:
"""Load current leaderboard from HuggingFace dataset."""
try:
dataset = load_dataset(LEADERBOARD_DATASET, split='train')
df = dataset.to_pandas()
# Ensure all required columns exist
required_columns = [
'submission_id', 'model_path', 'model_display_name', 'author',
'submission_date', 'bleu', 'chrf', 'quality_score', 'cer', 'wer',
'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'detailed_metrics',
'evaluation_samples', 'model_type'
]
for col in required_columns:
if col not in df.columns:
if col in ['bleu', 'chrf', 'quality_score', 'cer', 'wer', 'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'evaluation_samples']:
df[col] = 0.0
else:
df[col] = ''
return df
except Exception as e:
print(f"Error loading leaderboard: {e}")
print("Initializing empty leaderboard...")
return initialize_leaderboard().to_pandas()
def save_leaderboard(df: pd.DataFrame) -> bool:
"""Save leaderboard back to HuggingFace dataset."""
try:
# Convert DataFrame to Dataset
dataset = Dataset.from_pandas(df)
# Push to HuggingFace Hub
dataset.push_to_hub(
LEADERBOARD_DATASET,
token=HF_TOKEN,
commit_message=f"Update leaderboard - {datetime.datetime.now().isoformat()}"
)
print("Leaderboard saved successfully!")
return True
except Exception as e:
print(f"Error saving leaderboard: {e}")
return False
def add_model_results(
model_path: str,
author: str,
metrics: Dict,
detailed_metrics: Dict,
evaluation_samples: int,
model_type: str
) -> pd.DataFrame:
"""Add new model results to leaderboard."""
# Load current leaderboard
df = load_leaderboard()
# Check if model already exists
existing = df[df['model_path'] == model_path]
if not existing.empty:
print(f"Model {model_path} already exists. Updating with new results.")
# Remove existing entry
df = df[df['model_path'] != model_path]
# Create new entry
new_entry = {
'submission_id': create_submission_id(),
'model_path': model_path,
'model_display_name': format_model_name(model_path),
'author': author,
'submission_date': datetime.datetime.now().isoformat(),
'bleu': metrics.get('bleu', 0.0),
'chrf': metrics.get('chrf', 0.0),
'quality_score': metrics.get('quality_score', 0.0),
'cer': metrics.get('cer', 0.0),
'wer': metrics.get('wer', 0.0),
'rouge1': metrics.get('rouge1', 0.0),
'rouge2': metrics.get('rouge2', 0.0),
'rougeL': metrics.get('rougeL', 0.0),
'len_ratio': metrics.get('len_ratio', 0.0),
'detailed_metrics': json.dumps(detailed_metrics),
'evaluation_samples': evaluation_samples,
'model_type': model_type
}
# Add to dataframe
new_df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
# Sort by quality score descending
new_df = new_df.sort_values('quality_score', ascending=False).reset_index(drop=True)
# Save updated leaderboard
save_leaderboard(new_df)
return new_df
def get_leaderboard_summary(df: pd.DataFrame) -> Dict:
"""Get summary statistics for the leaderboard."""
if df.empty:
return {
'total_models': 0,
'avg_quality_score': 0.0,
'best_model': 'None',
'latest_submission': 'None'
}
return {
'total_models': len(df),
'avg_quality_score': df['quality_score'].mean(),
'best_model': df.iloc[0]['model_display_name'] if not df.empty else 'None',
'latest_submission': df['submission_date'].max() if not df.empty else 'None'
}
def get_top_models(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
"""Get top N models by quality score."""
return df.nlargest(n, 'quality_score')
def search_models(df: pd.DataFrame, query: str) -> pd.DataFrame:
"""Search models by name or author."""
if not query:
return df
query = query.lower()
mask = (
df['model_display_name'].str.lower().str.contains(query, na=False) |
df['author'].str.lower().str.contains(query, na=False) |
df['model_path'].str.lower().str.contains(query, na=False)
)
return df[mask]
def export_results(df: pd.DataFrame, format: str = 'csv') -> str:
"""Export leaderboard results in specified format."""
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
if format == 'csv':
filename = f"salt_leaderboard_{timestamp}.csv"
df.to_csv(filename, index=False)
return filename
elif format == 'json':
filename = f"salt_leaderboard_{timestamp}.json"
df.to_json(filename, orient='records', indent=2)
return filename
else:
raise ValueError(f"Unsupported format: {format}")