File size: 2,851 Bytes
11a64ab
 
 
 
aa99a22
11a64ab
aa99a22
11a64ab
 
8727da4
aa99a22
8727da4
11a64ab
 
 
aa99a22
 
 
11a64ab
aa99a22
 
 
 
11a64ab
 
aa99a22
 
 
11a64ab
 
 
aa99a22
 
11a64ab
aa99a22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8727da4
aa99a22
 
8727da4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11a64ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# config.py
import os

# HuggingFace settings
HF_TOKEN = os.getenv("HF_TOKEN")
LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
TEST_SET_DATASET = "Sunbird/salt-translation-test-set"
SALT_DATASET = "sunbird/salt"

# Language settings - ALL UG40 LANGUAGES (Updated from SALT constants)
ALL_UG40_LANGUAGES = [
    'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa'
]

LANGUAGE_NAMES = {
    'ach': 'Acholi',
    'eng': 'English', 
    'lgg': 'Lugbara',
    'lug': 'Luganda',
    'nyn': 'Runyankole',
    'rny': 'Runyoro',
    'teo': 'Ateso',
    'swa': 'Swahili'
}

# Google Translate supported subset (for comparison)
GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']

# Google Translate language mapping
GOOGLE_LANG_MAP = {
    'lug': 'lg',
    'ach': 'ach', 
    'swa': 'sw',
    'eng': 'en'
}

# Evaluation settings
MAX_TEST_SAMPLES = 500  # Per language pair
MIN_SAMPLES_PER_PAIR = 10  # Minimum samples to be valid

# UI settings
TITLE = "🏆 SALT Translation Leaderboard"
DESCRIPTION = """
**Scientific evaluation of translation models on Ugandan languages**

Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs.
Compare against Google Translate baseline and other submitted models.
"""

# File format specifications
PREDICTION_FORMAT = {
    'required_columns': ['sample_id', 'prediction'],
    'optional_columns': ['model_name', 'confidence'],
    'file_types': ['.csv', '.tsv', '.json']
}

# Metrics configuration - Updated to match reference implementation
METRICS_CONFIG = {
    'primary_metrics': ['bleu', 'chrf', 'quality_score'],
    'secondary_metrics': ['rouge1', 'rouge2', 'rougeL', 'cer', 'wer', 'len_ratio'],
    'display_precision': 4,
    'quality_score_components': [
        'bleu',     # normalized to 0-1
        'chrf',     # already 0-1  
        'cer',      # inverted (1-cer)
        'wer',      # inverted (1-wer)
        'rouge1',   # 0-1
        'rougeL'    # 0-1
    ],
    'error_metrics': ['cer', 'wer'],  # Lower is better
    'score_metrics': ['bleu', 'chrf', 'quality_score', 'rouge1', 'rouge2', 'rougeL']  # Higher is better
}

# Display settings for leaderboard
DISPLAY_CONFIG = {
    'max_models_radar': 8,
    'max_models_ranking': 15,
    'max_language_pairs_detail': 20,
    'decimal_places': {
        'quality_score': 4,
        'bleu': 2,
        'chrf': 4,
        'rouge1': 4,
        'rouge2': 4,
        'rougeL': 4,
        'cer': 4,
        'wer': 4,
        'len_ratio': 3,
        'coverage_rate': 1  # percentage
    }
}

# Chart colors and styling
CHART_CONFIG = {
    'google_comparable_color': '#1f77b4',
    'ug40_only_color': '#ff7f0e',
    'primary_colorscale': 'Viridis',
    'secondary_colorscale': 'Plasma',
    'bar_height_per_model': 30,
    'min_chart_height': 400,
    'max_chart_height': 1000
}