Spaces:
Running
Running
File size: 1,690 Bytes
11a64ab aa99a22 11a64ab aa99a22 11a64ab aa99a22 11a64ab aa99a22 11a64ab aa99a22 11a64ab aa99a22 11a64ab aa99a22 11a64ab aa99a22 11a64ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# config.py
import os
# HuggingFace settings
HF_TOKEN = os.getenv("HF_TOKEN")
LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
TEST_SET_DATASET = "Sunbird/salt-translation-test-set"
SALT_DATASET = "sunbird/salt"
# Language settings - ALL UG40 LANGUAGES
ALL_UG40_LANGUAGES = [
'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa' # Complete this with actual SALT languages
]
LANGUAGE_NAMES = {
'ach': 'Acholi',
'eng': 'English',
'lgg': 'Lugbara',
'lug': 'Luganda',
'nyn': 'Runyankole',
'rny': 'Runyoro',
'teo': 'Ateso',
'swa': 'Swahili'
}
# Google Translate supported subset (for comparison)
GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']
# Google Translate language mapping
GOOGLE_LANG_MAP = {
'lug': 'lg',
'ach': 'ach',
'swa': 'sw',
'eng': 'en'
}
# Evaluation settings
MAX_TEST_SAMPLES = 500 # Per language pair
MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid
# UI settings
TITLE = "🏆 SALT Translation Leaderboard"
DESCRIPTION = """
**Scientific evaluation of translation models on Ugandan languages**
Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs.
Compare against Google Translate baseline and other submitted models.
"""
# File format specifications
PREDICTION_FORMAT = {
'required_columns': ['sample_id', 'prediction'],
'optional_columns': ['model_name', 'confidence'],
'file_types': ['.csv', '.tsv', '.json']
}
# Metrics configuration
METRICS_CONFIG = {
'primary_metrics': ['bleu', 'chrf', 'quality_score'],
'secondary_metrics': ['rouge1', 'rougeL', 'cer', 'wer'],
'display_precision': 4
} |