akera commited on
Commit
52616c5
·
verified ·
1 Parent(s): 34a7f8e

Create leaderboard.py

Browse files
Files changed (1) hide show
  1. src/leaderboard.py +183 -0
src/leaderboard.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/leaderboard.py
2
+ import pandas as pd
3
+ from datasets import Dataset, load_dataset
4
+ from huggingface_hub import hf_hub_download, upload_file
5
+ import json
6
+ import datetime
7
+ from typing import Dict, List, Optional
8
+ import os
9
+ from config import LEADERBOARD_DATASET, HF_TOKEN
10
+ from src.utils import format_model_name, create_submission_id
11
+
12
+ def initialize_leaderboard() -> Dataset:
13
+ """Initialize empty leaderboard dataset."""
14
+ empty_data = {
15
+ 'submission_id': [],
16
+ 'model_path': [],
17
+ 'model_display_name': [],
18
+ 'author': [],
19
+ 'submission_date': [],
20
+ 'bleu': [],
21
+ 'chrf': [],
22
+ 'quality_score': [],
23
+ 'cer': [],
24
+ 'wer': [],
25
+ 'rouge1': [],
26
+ 'rouge2': [],
27
+ 'rougeL': [],
28
+ 'len_ratio': [],
29
+ 'detailed_metrics': [],
30
+ 'evaluation_samples': [],
31
+ 'model_type': []
32
+ }
33
+ return Dataset.from_dict(empty_data)
34
+
35
+ def load_leaderboard() -> pd.DataFrame:
36
+ """Load current leaderboard from HuggingFace dataset."""
37
+ try:
38
+ dataset = load_dataset(LEADERBOARD_DATASET, split='train')
39
+ df = dataset.to_pandas()
40
+
41
+ # Ensure all required columns exist
42
+ required_columns = [
43
+ 'submission_id', 'model_path', 'model_display_name', 'author',
44
+ 'submission_date', 'bleu', 'chrf', 'quality_score', 'cer', 'wer',
45
+ 'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'detailed_metrics',
46
+ 'evaluation_samples', 'model_type'
47
+ ]
48
+
49
+ for col in required_columns:
50
+ if col not in df.columns:
51
+ if col in ['bleu', 'chrf', 'quality_score', 'cer', 'wer', 'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'evaluation_samples']:
52
+ df[col] = 0.0
53
+ else:
54
+ df[col] = ''
55
+
56
+ return df
57
+
58
+ except Exception as e:
59
+ print(f"Error loading leaderboard: {e}")
60
+ print("Initializing empty leaderboard...")
61
+ return initialize_leaderboard().to_pandas()
62
+
63
+ def save_leaderboard(df: pd.DataFrame) -> bool:
64
+ """Save leaderboard back to HuggingFace dataset."""
65
+ try:
66
+ # Convert DataFrame to Dataset
67
+ dataset = Dataset.from_pandas(df)
68
+
69
+ # Push to HuggingFace Hub
70
+ dataset.push_to_hub(
71
+ LEADERBOARD_DATASET,
72
+ token=HF_TOKEN,
73
+ commit_message=f"Update leaderboard - {datetime.datetime.now().isoformat()}"
74
+ )
75
+
76
+ print("Leaderboard saved successfully!")
77
+ return True
78
+
79
+ except Exception as e:
80
+ print(f"Error saving leaderboard: {e}")
81
+ return False
82
+
83
+ def add_model_results(
84
+ model_path: str,
85
+ author: str,
86
+ metrics: Dict,
87
+ detailed_metrics: Dict,
88
+ evaluation_samples: int,
89
+ model_type: str
90
+ ) -> pd.DataFrame:
91
+ """Add new model results to leaderboard."""
92
+
93
+ # Load current leaderboard
94
+ df = load_leaderboard()
95
+
96
+ # Check if model already exists
97
+ existing = df[df['model_path'] == model_path]
98
+ if not existing.empty:
99
+ print(f"Model {model_path} already exists. Updating with new results.")
100
+ # Remove existing entry
101
+ df = df[df['model_path'] != model_path]
102
+
103
+ # Create new entry
104
+ new_entry = {
105
+ 'submission_id': create_submission_id(),
106
+ 'model_path': model_path,
107
+ 'model_display_name': format_model_name(model_path),
108
+ 'author': author,
109
+ 'submission_date': datetime.datetime.now().isoformat(),
110
+ 'bleu': metrics.get('bleu', 0.0),
111
+ 'chrf': metrics.get('chrf', 0.0),
112
+ 'quality_score': metrics.get('quality_score', 0.0),
113
+ 'cer': metrics.get('cer', 0.0),
114
+ 'wer': metrics.get('wer', 0.0),
115
+ 'rouge1': metrics.get('rouge1', 0.0),
116
+ 'rouge2': metrics.get('rouge2', 0.0),
117
+ 'rougeL': metrics.get('rougeL', 0.0),
118
+ 'len_ratio': metrics.get('len_ratio', 0.0),
119
+ 'detailed_metrics': json.dumps(detailed_metrics),
120
+ 'evaluation_samples': evaluation_samples,
121
+ 'model_type': model_type
122
+ }
123
+
124
+ # Add to dataframe
125
+ new_df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
126
+
127
+ # Sort by quality score descending
128
+ new_df = new_df.sort_values('quality_score', ascending=False).reset_index(drop=True)
129
+
130
+ # Save updated leaderboard
131
+ save_leaderboard(new_df)
132
+
133
+ return new_df
134
+
135
+ def get_leaderboard_summary(df: pd.DataFrame) -> Dict:
136
+ """Get summary statistics for the leaderboard."""
137
+ if df.empty:
138
+ return {
139
+ 'total_models': 0,
140
+ 'avg_quality_score': 0.0,
141
+ 'best_model': 'None',
142
+ 'latest_submission': 'None'
143
+ }
144
+
145
+ return {
146
+ 'total_models': len(df),
147
+ 'avg_quality_score': df['quality_score'].mean(),
148
+ 'best_model': df.iloc[0]['model_display_name'] if not df.empty else 'None',
149
+ 'latest_submission': df['submission_date'].max() if not df.empty else 'None'
150
+ }
151
+
152
+ def get_top_models(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
153
+ """Get top N models by quality score."""
154
+ return df.nlargest(n, 'quality_score')
155
+
156
+ def search_models(df: pd.DataFrame, query: str) -> pd.DataFrame:
157
+ """Search models by name or author."""
158
+ if not query:
159
+ return df
160
+
161
+ query = query.lower()
162
+ mask = (
163
+ df['model_display_name'].str.lower().str.contains(query, na=False) |
164
+ df['author'].str.lower().str.contains(query, na=False) |
165
+ df['model_path'].str.lower().str.contains(query, na=False)
166
+ )
167
+
168
+ return df[mask]
169
+
170
+ def export_results(df: pd.DataFrame, format: str = 'csv') -> str:
171
+ """Export leaderboard results in specified format."""
172
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
173
+
174
+ if format == 'csv':
175
+ filename = f"salt_leaderboard_{timestamp}.csv"
176
+ df.to_csv(filename, index=False)
177
+ return filename
178
+ elif format == 'json':
179
+ filename = f"salt_leaderboard_{timestamp}.json"
180
+ df.to_json(filename, orient='records', indent=2)
181
+ return filename
182
+ else:
183
+ raise ValueError(f"Unsupported format: {format}")