Spaces:
Sleeping
Sleeping
Update config.py
Browse files
config.py
CHANGED
|
@@ -2,41 +2,61 @@
|
|
| 2 |
import os
|
| 3 |
|
| 4 |
# HuggingFace settings
|
| 5 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 6 |
LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
|
|
|
|
| 7 |
SALT_DATASET = "sunbird/salt"
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
"gemma", "qwen", "llama", "nllb", "google-translate"
|
| 13 |
]
|
| 14 |
|
| 15 |
-
# Evaluation settings
|
| 16 |
-
MAX_EVAL_SAMPLES = 200 # Limit for faster evaluation
|
| 17 |
-
BATCH_SIZE = 4
|
| 18 |
-
MAX_NEW_TOKENS = 100
|
| 19 |
-
|
| 20 |
-
# UI settings
|
| 21 |
-
TITLE = "🏆 SALT Translation Model Leaderboard"
|
| 22 |
-
DESCRIPTION = """
|
| 23 |
-
Evaluate your translation models on Ugandan languages!
|
| 24 |
-
Submit a HuggingFace model and see how it performs on Luganda, Acholi, and Swahili translation tasks.
|
| 25 |
-
"""
|
| 26 |
-
|
| 27 |
-
# Supported languages (Google Translate compatible subset)
|
| 28 |
-
SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']
|
| 29 |
LANGUAGE_NAMES = {
|
|
|
|
|
|
|
|
|
|
| 30 |
'lug': 'Luganda',
|
| 31 |
-
'
|
| 32 |
-
'
|
| 33 |
-
'
|
|
|
|
| 34 |
}
|
| 35 |
|
|
|
|
|
|
|
|
|
|
| 36 |
# Google Translate language mapping
|
| 37 |
GOOGLE_LANG_MAP = {
|
| 38 |
'lug': 'lg',
|
| 39 |
-
'ach': 'ach',
|
| 40 |
-
'swa': 'sw',
|
| 41 |
'eng': 'en'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
}
|
|
|
|
| 2 |
import os
|
| 3 |
|
| 4 |
# HuggingFace settings
|
| 5 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 6 |
LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
|
| 7 |
+
TEST_SET_DATASET = "Sunbird/salt-translation-test-set"
|
| 8 |
SALT_DATASET = "sunbird/salt"
|
| 9 |
|
| 10 |
+
# Language settings - ALL UG40 LANGUAGES
|
| 11 |
+
ALL_UG40_LANGUAGES = [
|
| 12 |
+
'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa' # Complete this with actual SALT languages
|
|
|
|
| 13 |
]
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
LANGUAGE_NAMES = {
|
| 16 |
+
'ach': 'Acholi',
|
| 17 |
+
'eng': 'English',
|
| 18 |
+
'lgg': 'Lugbara',
|
| 19 |
'lug': 'Luganda',
|
| 20 |
+
'nyn': 'Runyankole',
|
| 21 |
+
'rny': 'Runyoro',
|
| 22 |
+
'teo': 'Ateso',
|
| 23 |
+
'swa': 'Swahili'
|
| 24 |
}
|
| 25 |
|
| 26 |
+
# Google Translate supported subset (for comparison)
|
| 27 |
+
GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']
|
| 28 |
+
|
| 29 |
# Google Translate language mapping
|
| 30 |
GOOGLE_LANG_MAP = {
|
| 31 |
'lug': 'lg',
|
| 32 |
+
'ach': 'ach',
|
| 33 |
+
'swa': 'sw',
|
| 34 |
'eng': 'en'
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
# Evaluation settings
|
| 38 |
+
MAX_TEST_SAMPLES = 500 # Per language pair
|
| 39 |
+
MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid
|
| 40 |
+
|
| 41 |
+
# UI settings
|
| 42 |
+
TITLE = "🏆 SALT Translation Leaderboard"
|
| 43 |
+
DESCRIPTION = """
|
| 44 |
+
**Scientific evaluation of translation models on Ugandan languages**
|
| 45 |
+
|
| 46 |
+
Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs.
|
| 47 |
+
Compare against Google Translate baseline and other submitted models.
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
# File format specifications
|
| 51 |
+
PREDICTION_FORMAT = {
|
| 52 |
+
'required_columns': ['sample_id', 'prediction'],
|
| 53 |
+
'optional_columns': ['model_name', 'confidence'],
|
| 54 |
+
'file_types': ['.csv', '.tsv', '.json']
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
# Metrics configuration
|
| 58 |
+
METRICS_CONFIG = {
|
| 59 |
+
'primary_metrics': ['bleu', 'chrf', 'quality_score'],
|
| 60 |
+
'secondary_metrics': ['rouge1', 'rougeL', 'cer', 'wer'],
|
| 61 |
+
'display_precision': 4
|
| 62 |
}
|