leaderboard / config.py
akera's picture
Update config.py
aa99a22 verified
raw
history blame
1.69 kB
# config.py
import os
# HuggingFace settings
HF_TOKEN = os.getenv("HF_TOKEN")
LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
TEST_SET_DATASET = "Sunbird/salt-translation-test-set"
SALT_DATASET = "sunbird/salt"
# Language settings - ALL UG40 LANGUAGES
ALL_UG40_LANGUAGES = [
'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa' # Complete this with actual SALT languages
]
LANGUAGE_NAMES = {
'ach': 'Acholi',
'eng': 'English',
'lgg': 'Lugbara',
'lug': 'Luganda',
'nyn': 'Runyankole',
'rny': 'Runyoro',
'teo': 'Ateso',
'swa': 'Swahili'
}
# Google Translate supported subset (for comparison)
GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']
# Google Translate language mapping
GOOGLE_LANG_MAP = {
'lug': 'lg',
'ach': 'ach',
'swa': 'sw',
'eng': 'en'
}
# Evaluation settings
MAX_TEST_SAMPLES = 500 # Per language pair
MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid
# UI settings
TITLE = "πŸ† SALT Translation Leaderboard"
DESCRIPTION = """
**Scientific evaluation of translation models on Ugandan languages**
Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs.
Compare against Google Translate baseline and other submitted models.
"""
# File format specifications
PREDICTION_FORMAT = {
'required_columns': ['sample_id', 'prediction'],
'optional_columns': ['model_name', 'confidence'],
'file_types': ['.csv', '.tsv', '.json']
}
# Metrics configuration
METRICS_CONFIG = {
'primary_metrics': ['bleu', 'chrf', 'quality_score'],
'secondary_metrics': ['rouge1', 'rougeL', 'cer', 'wer'],
'display_precision': 4
}