# config.py import os # HuggingFace settings HF_TOKEN = os.getenv("HF_TOKEN") LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard" TEST_SET_DATASET = "Sunbird/salt-translation-test-set" SALT_DATASET = "sunbird/salt" # Language settings - ALL UG40 LANGUAGES (Updated from SALT constants) ALL_UG40_LANGUAGES = [ 'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa' ] LANGUAGE_NAMES = { 'ach': 'Acholi', 'eng': 'English', 'lgg': 'Lugbara', 'lug': 'Luganda', 'nyn': 'Runyankole', 'rny': 'Runyoro', 'teo': 'Ateso', 'swa': 'Swahili' } # Google Translate supported subset (for comparison) GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng'] # Google Translate language mapping GOOGLE_LANG_MAP = { 'lug': 'lg', 'ach': 'ach', 'swa': 'sw', 'eng': 'en' } # Evaluation settings MAX_TEST_SAMPLES = 500 # Per language pair MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid # UI settings TITLE = "🏆 SALT Translation Leaderboard" DESCRIPTION = """ Evaluation of translation models on Ugandan languages Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs. Compare against Google Translate baseline and other submitted models. """ # File format specifications PREDICTION_FORMAT = { 'required_columns': ['sample_id', 'prediction'], 'optional_columns': ['model_name', 'confidence'], 'file_types': ['.csv', '.tsv', '.json'] } # Metrics configuration - Updated to match reference implementation METRICS_CONFIG = { 'primary_metrics': ['bleu', 'chrf', 'quality_score'], 'secondary_metrics': ['rouge1', 'rouge2', 'rougeL', 'cer', 'wer', 'len_ratio'], 'display_precision': 4, 'quality_score_components': [ 'bleu', # normalized to 0-1 'chrf', # already 0-1 'cer', # inverted (1-cer) 'wer', # inverted (1-wer) 'rouge1', # 0-1 'rougeL' # 0-1 ], 'error_metrics': ['cer', 'wer'], # Lower is better 'score_metrics': ['bleu', 'chrf', 'quality_score', 'rouge1', 'rouge2', 'rougeL'] # Higher is better } # Display settings for leaderboard DISPLAY_CONFIG = { 'max_models_radar': 8, 'max_models_ranking': 15, 'max_language_pairs_detail': 20, 'decimal_places': { 'quality_score': 4, 'bleu': 2, 'chrf': 4, 'rouge1': 4, 'rouge2': 4, 'rougeL': 4, 'cer': 4, 'wer': 4, 'len_ratio': 3, 'coverage_rate': 1 # percentage } } # Chart colors and styling CHART_CONFIG = { 'google_comparable_color': '#1f77b4', 'ug40_only_color': '#ff7f0e', 'primary_colorscale': 'Viridis', 'secondary_colorscale': 'Plasma', 'bar_height_per_model': 30, 'min_chart_height': 400, 'max_chart_height': 1000 }