File size: 6,101 Bytes
52616c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# src/leaderboard.py
import pandas as pd
from datasets import Dataset, load_dataset
from huggingface_hub import hf_hub_download, upload_file
import json
import datetime
from typing import Dict, List, Optional
import os
from config import LEADERBOARD_DATASET, HF_TOKEN
from src.utils import format_model_name, create_submission_id

def initialize_leaderboard() -> Dataset:
    """Initialize empty leaderboard dataset."""
    empty_data = {
        'submission_id': [],
        'model_path': [],
        'model_display_name': [],
        'author': [],
        'submission_date': [],
        'bleu': [],
        'chrf': [],
        'quality_score': [],
        'cer': [],
        'wer': [],
        'rouge1': [],
        'rouge2': [],
        'rougeL': [],
        'len_ratio': [],
        'detailed_metrics': [],
        'evaluation_samples': [],
        'model_type': []
    }
    return Dataset.from_dict(empty_data)

def load_leaderboard() -> pd.DataFrame:
    """Load current leaderboard from HuggingFace dataset."""
    try:
        dataset = load_dataset(LEADERBOARD_DATASET, split='train')
        df = dataset.to_pandas()
        
        # Ensure all required columns exist
        required_columns = [
            'submission_id', 'model_path', 'model_display_name', 'author',
            'submission_date', 'bleu', 'chrf', 'quality_score', 'cer', 'wer',
            'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'detailed_metrics',
            'evaluation_samples', 'model_type'
        ]
        
        for col in required_columns:
            if col not in df.columns:
                if col in ['bleu', 'chrf', 'quality_score', 'cer', 'wer', 'rouge1', 'rouge2', 'rougeL', 'len_ratio', 'evaluation_samples']:
                    df[col] = 0.0
                else:
                    df[col] = ''
        
        return df
        
    except Exception as e:
        print(f"Error loading leaderboard: {e}")
        print("Initializing empty leaderboard...")
        return initialize_leaderboard().to_pandas()

def save_leaderboard(df: pd.DataFrame) -> bool:
    """Save leaderboard back to HuggingFace dataset."""
    try:
        # Convert DataFrame to Dataset
        dataset = Dataset.from_pandas(df)
        
        # Push to HuggingFace Hub
        dataset.push_to_hub(
            LEADERBOARD_DATASET,
            token=HF_TOKEN,
            commit_message=f"Update leaderboard - {datetime.datetime.now().isoformat()}"
        )
        
        print("Leaderboard saved successfully!")
        return True
        
    except Exception as e:
        print(f"Error saving leaderboard: {e}")
        return False

def add_model_results(
    model_path: str,
    author: str,
    metrics: Dict,
    detailed_metrics: Dict,
    evaluation_samples: int,
    model_type: str
) -> pd.DataFrame:
    """Add new model results to leaderboard."""
    
    # Load current leaderboard
    df = load_leaderboard()
    
    # Check if model already exists
    existing = df[df['model_path'] == model_path]
    if not existing.empty:
        print(f"Model {model_path} already exists. Updating with new results.")
        # Remove existing entry
        df = df[df['model_path'] != model_path]
    
    # Create new entry
    new_entry = {
        'submission_id': create_submission_id(),
        'model_path': model_path,
        'model_display_name': format_model_name(model_path),
        'author': author,
        'submission_date': datetime.datetime.now().isoformat(),
        'bleu': metrics.get('bleu', 0.0),
        'chrf': metrics.get('chrf', 0.0),
        'quality_score': metrics.get('quality_score', 0.0),
        'cer': metrics.get('cer', 0.0),
        'wer': metrics.get('wer', 0.0),
        'rouge1': metrics.get('rouge1', 0.0),
        'rouge2': metrics.get('rouge2', 0.0),
        'rougeL': metrics.get('rougeL', 0.0),
        'len_ratio': metrics.get('len_ratio', 0.0),
        'detailed_metrics': json.dumps(detailed_metrics),
        'evaluation_samples': evaluation_samples,
        'model_type': model_type
    }
    
    # Add to dataframe
    new_df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
    
    # Sort by quality score descending
    new_df = new_df.sort_values('quality_score', ascending=False).reset_index(drop=True)
    
    # Save updated leaderboard
    save_leaderboard(new_df)
    
    return new_df

def get_leaderboard_summary(df: pd.DataFrame) -> Dict:
    """Get summary statistics for the leaderboard."""
    if df.empty:
        return {
            'total_models': 0,
            'avg_quality_score': 0.0,
            'best_model': 'None',
            'latest_submission': 'None'
        }
    
    return {
        'total_models': len(df),
        'avg_quality_score': df['quality_score'].mean(),
        'best_model': df.iloc[0]['model_display_name'] if not df.empty else 'None',
        'latest_submission': df['submission_date'].max() if not df.empty else 'None'
    }

def get_top_models(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
    """Get top N models by quality score."""
    return df.nlargest(n, 'quality_score')

def search_models(df: pd.DataFrame, query: str) -> pd.DataFrame:
    """Search models by name or author."""
    if not query:
        return df
    
    query = query.lower()
    mask = (
        df['model_display_name'].str.lower().str.contains(query, na=False) |
        df['author'].str.lower().str.contains(query, na=False) |
        df['model_path'].str.lower().str.contains(query, na=False)
    )
    
    return df[mask]

def export_results(df: pd.DataFrame, format: str = 'csv') -> str:
    """Export leaderboard results in specified format."""
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    
    if format == 'csv':
        filename = f"salt_leaderboard_{timestamp}.csv"
        df.to_csv(filename, index=False)
        return filename
    elif format == 'json':
        filename = f"salt_leaderboard_{timestamp}.json"
        df.to_json(filename, orient='records', indent=2)
        return filename
    else:
        raise ValueError(f"Unsupported format: {format}")