Spaces:
Sleeping
Sleeping
File size: 6,101 Bytes
52616c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# src/leaderboard.py
import pandas as pd
from datasets import Dataset, load_dataset
from huggingface_hub import hf_hub_download, upload_file
import json
import datetime
from typing import Dict, List, Optional
import os
from config import LEADERBOARD_DATASET, HF_TOKEN
from src.utils import format_model_name, create_submission_id
def initialize_leaderboard() -> Dataset:
"""Initialize empty leaderboard dataset."""
empty_data = {
'submission_id': [],
'model_path': [],
'model_display_name': [],
'author': [],
'submission_date': [],
'bleu': [],
'chrf': [],
'quality_score': [],
'cer': [],
'wer': [],
'rouge1': [],
'rouge2': [],
'rougeL': [],
'len_ratio': [],
'detailed_metrics': [],
'evaluation_samples': [],
'model_type': []
}
return Dataset.from_dict(empty_data)
def load_leaderboard() -> pd.DataFrame:
"""Load current leaderboard from HuggingFace dataset."""
try:
dataset = load_dataset(LEADERBOARD_DATASET, split='train')
df = dataset.to_pandas()
# Ensure all required columns exist
required_columns = [
'submission_id', 'model_path', 'model_display_name', 'author',
'submission_date', 'bleu', 'chrf', 'quality_score', 'cer', 'wer',
'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'detailed_metrics',
'evaluation_samples', 'model_type'
]
for col in required_columns:
if col not in df.columns:
if col in ['bleu', 'chrf', 'quality_score', 'cer', 'wer', 'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'evaluation_samples']:
df[col] = 0.0
else:
df[col] = ''
return df
except Exception as e:
print(f"Error loading leaderboard: {e}")
print("Initializing empty leaderboard...")
return initialize_leaderboard().to_pandas()
def save_leaderboard(df: pd.DataFrame) -> bool:
"""Save leaderboard back to HuggingFace dataset."""
try:
# Convert DataFrame to Dataset
dataset = Dataset.from_pandas(df)
# Push to HuggingFace Hub
dataset.push_to_hub(
LEADERBOARD_DATASET,
token=HF_TOKEN,
commit_message=f"Update leaderboard - {datetime.datetime.now().isoformat()}"
)
print("Leaderboard saved successfully!")
return True
except Exception as e:
print(f"Error saving leaderboard: {e}")
return False
def add_model_results(
model_path: str,
author: str,
metrics: Dict,
detailed_metrics: Dict,
evaluation_samples: int,
model_type: str
) -> pd.DataFrame:
"""Add new model results to leaderboard."""
# Load current leaderboard
df = load_leaderboard()
# Check if model already exists
existing = df[df['model_path'] == model_path]
if not existing.empty:
print(f"Model {model_path} already exists. Updating with new results.")
# Remove existing entry
df = df[df['model_path'] != model_path]
# Create new entry
new_entry = {
'submission_id': create_submission_id(),
'model_path': model_path,
'model_display_name': format_model_name(model_path),
'author': author,
'submission_date': datetime.datetime.now().isoformat(),
'bleu': metrics.get('bleu', 0.0),
'chrf': metrics.get('chrf', 0.0),
'quality_score': metrics.get('quality_score', 0.0),
'cer': metrics.get('cer', 0.0),
'wer': metrics.get('wer', 0.0),
'rouge1': metrics.get('rouge1', 0.0),
'rouge2': metrics.get('rouge2', 0.0),
'rougeL': metrics.get('rougeL', 0.0),
'len_ratio': metrics.get('len_ratio', 0.0),
'detailed_metrics': json.dumps(detailed_metrics),
'evaluation_samples': evaluation_samples,
'model_type': model_type
}
# Add to dataframe
new_df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
# Sort by quality score descending
new_df = new_df.sort_values('quality_score', ascending=False).reset_index(drop=True)
# Save updated leaderboard
save_leaderboard(new_df)
return new_df
def get_leaderboard_summary(df: pd.DataFrame) -> Dict:
"""Get summary statistics for the leaderboard."""
if df.empty:
return {
'total_models': 0,
'avg_quality_score': 0.0,
'best_model': 'None',
'latest_submission': 'None'
}
return {
'total_models': len(df),
'avg_quality_score': df['quality_score'].mean(),
'best_model': df.iloc[0]['model_display_name'] if not df.empty else 'None',
'latest_submission': df['submission_date'].max() if not df.empty else 'None'
}
def get_top_models(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
"""Get top N models by quality score."""
return df.nlargest(n, 'quality_score')
def search_models(df: pd.DataFrame, query: str) -> pd.DataFrame:
"""Search models by name or author."""
if not query:
return df
query = query.lower()
mask = (
df['model_display_name'].str.lower().str.contains(query, na=False) |
df['author'].str.lower().str.contains(query, na=False) |
df['model_path'].str.lower().str.contains(query, na=False)
)
return df[mask]
def export_results(df: pd.DataFrame, format: str = 'csv') -> str:
"""Export leaderboard results in specified format."""
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
if format == 'csv':
filename = f"salt_leaderboard_{timestamp}.csv"
df.to_csv(filename, index=False)
return filename
elif format == 'json':
filename = f"salt_leaderboard_{timestamp}.json"
df.to_json(filename, orient='records', indent=2)
return filename
else:
raise ValueError(f"Unsupported format: {format}") |