Spaces:
Sleeping
Sleeping
Create leaderboard.py
Browse files- src/leaderboard.py +183 -0
src/leaderboard.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# src/leaderboard.py
|
2 |
+
import pandas as pd
|
3 |
+
from datasets import Dataset, load_dataset
|
4 |
+
from huggingface_hub import hf_hub_download, upload_file
|
5 |
+
import json
|
6 |
+
import datetime
|
7 |
+
from typing import Dict, List, Optional
|
8 |
+
import os
|
9 |
+
from config import LEADERBOARD_DATASET, HF_TOKEN
|
10 |
+
from src.utils import format_model_name, create_submission_id
|
11 |
+
|
12 |
+
def initialize_leaderboard() -> Dataset:
|
13 |
+
"""Initialize empty leaderboard dataset."""
|
14 |
+
empty_data = {
|
15 |
+
'submission_id': [],
|
16 |
+
'model_path': [],
|
17 |
+
'model_display_name': [],
|
18 |
+
'author': [],
|
19 |
+
'submission_date': [],
|
20 |
+
'bleu': [],
|
21 |
+
'chrf': [],
|
22 |
+
'quality_score': [],
|
23 |
+
'cer': [],
|
24 |
+
'wer': [],
|
25 |
+
'rouge1': [],
|
26 |
+
'rouge2': [],
|
27 |
+
'rougeL': [],
|
28 |
+
'len_ratio': [],
|
29 |
+
'detailed_metrics': [],
|
30 |
+
'evaluation_samples': [],
|
31 |
+
'model_type': []
|
32 |
+
}
|
33 |
+
return Dataset.from_dict(empty_data)
|
34 |
+
|
35 |
+
def load_leaderboard() -> pd.DataFrame:
|
36 |
+
"""Load current leaderboard from HuggingFace dataset."""
|
37 |
+
try:
|
38 |
+
dataset = load_dataset(LEADERBOARD_DATASET, split='train')
|
39 |
+
df = dataset.to_pandas()
|
40 |
+
|
41 |
+
# Ensure all required columns exist
|
42 |
+
required_columns = [
|
43 |
+
'submission_id', 'model_path', 'model_display_name', 'author',
|
44 |
+
'submission_date', 'bleu', 'chrf', 'quality_score', 'cer', 'wer',
|
45 |
+
'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'detailed_metrics',
|
46 |
+
'evaluation_samples', 'model_type'
|
47 |
+
]
|
48 |
+
|
49 |
+
for col in required_columns:
|
50 |
+
if col not in df.columns:
|
51 |
+
if col in ['bleu', 'chrf', 'quality_score', 'cer', 'wer', 'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'evaluation_samples']:
|
52 |
+
df[col] = 0.0
|
53 |
+
else:
|
54 |
+
df[col] = ''
|
55 |
+
|
56 |
+
return df
|
57 |
+
|
58 |
+
except Exception as e:
|
59 |
+
print(f"Error loading leaderboard: {e}")
|
60 |
+
print("Initializing empty leaderboard...")
|
61 |
+
return initialize_leaderboard().to_pandas()
|
62 |
+
|
63 |
+
def save_leaderboard(df: pd.DataFrame) -> bool:
|
64 |
+
"""Save leaderboard back to HuggingFace dataset."""
|
65 |
+
try:
|
66 |
+
# Convert DataFrame to Dataset
|
67 |
+
dataset = Dataset.from_pandas(df)
|
68 |
+
|
69 |
+
# Push to HuggingFace Hub
|
70 |
+
dataset.push_to_hub(
|
71 |
+
LEADERBOARD_DATASET,
|
72 |
+
token=HF_TOKEN,
|
73 |
+
commit_message=f"Update leaderboard - {datetime.datetime.now().isoformat()}"
|
74 |
+
)
|
75 |
+
|
76 |
+
print("Leaderboard saved successfully!")
|
77 |
+
return True
|
78 |
+
|
79 |
+
except Exception as e:
|
80 |
+
print(f"Error saving leaderboard: {e}")
|
81 |
+
return False
|
82 |
+
|
83 |
+
def add_model_results(
|
84 |
+
model_path: str,
|
85 |
+
author: str,
|
86 |
+
metrics: Dict,
|
87 |
+
detailed_metrics: Dict,
|
88 |
+
evaluation_samples: int,
|
89 |
+
model_type: str
|
90 |
+
) -> pd.DataFrame:
|
91 |
+
"""Add new model results to leaderboard."""
|
92 |
+
|
93 |
+
# Load current leaderboard
|
94 |
+
df = load_leaderboard()
|
95 |
+
|
96 |
+
# Check if model already exists
|
97 |
+
existing = df[df['model_path'] == model_path]
|
98 |
+
if not existing.empty:
|
99 |
+
print(f"Model {model_path} already exists. Updating with new results.")
|
100 |
+
# Remove existing entry
|
101 |
+
df = df[df['model_path'] != model_path]
|
102 |
+
|
103 |
+
# Create new entry
|
104 |
+
new_entry = {
|
105 |
+
'submission_id': create_submission_id(),
|
106 |
+
'model_path': model_path,
|
107 |
+
'model_display_name': format_model_name(model_path),
|
108 |
+
'author': author,
|
109 |
+
'submission_date': datetime.datetime.now().isoformat(),
|
110 |
+
'bleu': metrics.get('bleu', 0.0),
|
111 |
+
'chrf': metrics.get('chrf', 0.0),
|
112 |
+
'quality_score': metrics.get('quality_score', 0.0),
|
113 |
+
'cer': metrics.get('cer', 0.0),
|
114 |
+
'wer': metrics.get('wer', 0.0),
|
115 |
+
'rouge1': metrics.get('rouge1', 0.0),
|
116 |
+
'rouge2': metrics.get('rouge2', 0.0),
|
117 |
+
'rougeL': metrics.get('rougeL', 0.0),
|
118 |
+
'len_ratio': metrics.get('len_ratio', 0.0),
|
119 |
+
'detailed_metrics': json.dumps(detailed_metrics),
|
120 |
+
'evaluation_samples': evaluation_samples,
|
121 |
+
'model_type': model_type
|
122 |
+
}
|
123 |
+
|
124 |
+
# Add to dataframe
|
125 |
+
new_df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
|
126 |
+
|
127 |
+
# Sort by quality score descending
|
128 |
+
new_df = new_df.sort_values('quality_score', ascending=False).reset_index(drop=True)
|
129 |
+
|
130 |
+
# Save updated leaderboard
|
131 |
+
save_leaderboard(new_df)
|
132 |
+
|
133 |
+
return new_df
|
134 |
+
|
135 |
+
def get_leaderboard_summary(df: pd.DataFrame) -> Dict:
|
136 |
+
"""Get summary statistics for the leaderboard."""
|
137 |
+
if df.empty:
|
138 |
+
return {
|
139 |
+
'total_models': 0,
|
140 |
+
'avg_quality_score': 0.0,
|
141 |
+
'best_model': 'None',
|
142 |
+
'latest_submission': 'None'
|
143 |
+
}
|
144 |
+
|
145 |
+
return {
|
146 |
+
'total_models': len(df),
|
147 |
+
'avg_quality_score': df['quality_score'].mean(),
|
148 |
+
'best_model': df.iloc[0]['model_display_name'] if not df.empty else 'None',
|
149 |
+
'latest_submission': df['submission_date'].max() if not df.empty else 'None'
|
150 |
+
}
|
151 |
+
|
152 |
+
def get_top_models(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
|
153 |
+
"""Get top N models by quality score."""
|
154 |
+
return df.nlargest(n, 'quality_score')
|
155 |
+
|
156 |
+
def search_models(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
157 |
+
"""Search models by name or author."""
|
158 |
+
if not query:
|
159 |
+
return df
|
160 |
+
|
161 |
+
query = query.lower()
|
162 |
+
mask = (
|
163 |
+
df['model_display_name'].str.lower().str.contains(query, na=False) |
|
164 |
+
df['author'].str.lower().str.contains(query, na=False) |
|
165 |
+
df['model_path'].str.lower().str.contains(query, na=False)
|
166 |
+
)
|
167 |
+
|
168 |
+
return df[mask]
|
169 |
+
|
170 |
+
def export_results(df: pd.DataFrame, format: str = 'csv') -> str:
|
171 |
+
"""Export leaderboard results in specified format."""
|
172 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
173 |
+
|
174 |
+
if format == 'csv':
|
175 |
+
filename = f"salt_leaderboard_{timestamp}.csv"
|
176 |
+
df.to_csv(filename, index=False)
|
177 |
+
return filename
|
178 |
+
elif format == 'json':
|
179 |
+
filename = f"salt_leaderboard_{timestamp}.json"
|
180 |
+
df.to_json(filename, orient='records', indent=2)
|
181 |
+
return filename
|
182 |
+
else:
|
183 |
+
raise ValueError(f"Unsupported format: {format}")
|