# config.py import os # HuggingFace settings HF_TOKEN = os.getenv("HF_TOKEN") LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard" TEST_SET_DATASET = "Sunbird/salt-translation-test-set" SALT_DATASET = "sunbird/salt" # Language settings - ALL UG40 LANGUAGES (Updated from SALT constants) ALL_UG40_LANGUAGES = [ 'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa' ] LANGUAGE_NAMES = { 'ach': 'Acholi', 'eng': 'English', 'lgg': 'Lugbara', 'lug': 'Luganda', 'nyn': 'Runyankole', 'rny': 'Runyoro', 'teo': 'Ateso', 'swa': 'Swahili' } # Google Translate supported subset (for comparison) GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng'] # Google Translate language mapping GOOGLE_LANG_MAP = { 'lug': 'lg', 'ach': 'ach', 'swa': 'sw', 'eng': 'en' } # Evaluation settings MAX_TEST_SAMPLES = 500 # Per language pair MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid # UI settings TITLE = "🏆 SALT Translation Leaderboard" DESCRIPTION = """ **Scientific evaluation of translation models on Ugandan languages** Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs. Compare against Google Translate baseline and other submitted models. """ # File format specifications PREDICTION_FORMAT = { 'required_columns': ['sample_id', 'prediction'], 'optional_columns': ['model_name', 'confidence'], 'file_types': ['.csv', '.tsv', '.json'] } # Metrics configuration - Updated to match reference implementation METRICS_CONFIG = { 'primary_metrics': ['bleu', 'chrf', 'quality_score'], 'secondary_metrics': ['rouge1', 'rouge2', 'rougeL', 'cer', 'wer', 'len_ratio'], 'display_precision': 4, 'quality_score_components': [ 'bleu', # normalized to 0-1 'chrf', # already 0-1 'cer', # inverted (1-cer) 'wer', # inverted (1-wer) 'rouge1', # 0-1 'rougeL' # 0-1 ], 'error_metrics': ['cer', 'wer'], # Lower is better 'score_metrics': ['bleu', 'chrf', 'quality_score', 'rouge1', 'rouge2', 'rougeL'] # Higher is better } # Display settings for leaderboard DISPLAY_CONFIG = { 'max_models_radar': 8, 'max_models_ranking': 15, 'max_language_pairs_detail': 20, 'decimal_places': { 'quality_score': 4, 'bleu': 2, 'chrf': 4, 'rouge1': 4, 'rouge2': 4, 'rougeL': 4, 'cer': 4, 'wer': 4, 'len_ratio': 3, 'coverage_rate': 1 # percentage } } # Chart colors and styling CHART_CONFIG = { 'google_comparable_color': '#1f77b4', 'ug40_only_color': '#ff7f0e', 'primary_colorscale': 'Viridis', 'secondary_colorscale': 'Plasma', 'bar_height_per_model': 30, 'min_chart_height': 400, 'max_chart_height': 1000 }