Spaces:
Sleeping
Sleeping
# config.py | |
import os | |
# HuggingFace settings | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard" | |
TEST_SET_DATASET = "Sunbird/salt-translation-test-set" | |
SALT_DATASET = "sunbird/salt" | |
# Language settings - ALL UG40 LANGUAGES | |
ALL_UG40_LANGUAGES = [ | |
'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa' # Complete this with actual SALT languages | |
] | |
LANGUAGE_NAMES = { | |
'ach': 'Acholi', | |
'eng': 'English', | |
'lgg': 'Lugbara', | |
'lug': 'Luganda', | |
'nyn': 'Runyankole', | |
'rny': 'Runyoro', | |
'teo': 'Ateso', | |
'swa': 'Swahili' | |
} | |
# Google Translate supported subset (for comparison) | |
GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng'] | |
# Google Translate language mapping | |
GOOGLE_LANG_MAP = { | |
'lug': 'lg', | |
'ach': 'ach', | |
'swa': 'sw', | |
'eng': 'en' | |
} | |
# Evaluation settings | |
MAX_TEST_SAMPLES = 500 # Per language pair | |
MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid | |
# UI settings | |
TITLE = "π SALT Translation Leaderboard" | |
DESCRIPTION = """ | |
**Scientific evaluation of translation models on Ugandan languages** | |
Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs. | |
Compare against Google Translate baseline and other submitted models. | |
""" | |
# File format specifications | |
PREDICTION_FORMAT = { | |
'required_columns': ['sample_id', 'prediction'], | |
'optional_columns': ['model_name', 'confidence'], | |
'file_types': ['.csv', '.tsv', '.json'] | |
} | |
# Metrics configuration | |
METRICS_CONFIG = { | |
'primary_metrics': ['bleu', 'chrf', 'quality_score'], | |
'secondary_metrics': ['rouge1', 'rougeL', 'cer', 'wer'], | |
'display_precision': 4 | |
} |