Spaces:
Sleeping
Sleeping
Update config.py
Browse files
config.py
CHANGED
@@ -2,41 +2,61 @@
|
|
2 |
import os
|
3 |
|
4 |
# HuggingFace settings
|
5 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
6 |
LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
|
|
|
7 |
SALT_DATASET = "sunbird/salt"
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
|
12 |
-
"gemma", "qwen", "llama", "nllb", "google-translate"
|
13 |
]
|
14 |
|
15 |
-
# Evaluation settings
|
16 |
-
MAX_EVAL_SAMPLES = 200 # Limit for faster evaluation
|
17 |
-
BATCH_SIZE = 4
|
18 |
-
MAX_NEW_TOKENS = 100
|
19 |
-
|
20 |
-
# UI settings
|
21 |
-
TITLE = "🏆 SALT Translation Model Leaderboard"
|
22 |
-
DESCRIPTION = """
|
23 |
-
Evaluate your translation models on Ugandan languages!
|
24 |
-
Submit a HuggingFace model and see how it performs on Luganda, Acholi, and Swahili translation tasks.
|
25 |
-
"""
|
26 |
-
|
27 |
-
# Supported languages (Google Translate compatible subset)
|
28 |
-
SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']
|
29 |
LANGUAGE_NAMES = {
|
|
|
|
|
|
|
30 |
'lug': 'Luganda',
|
31 |
-
'
|
32 |
-
'
|
33 |
-
'
|
|
|
34 |
}
|
35 |
|
|
|
|
|
|
|
36 |
# Google Translate language mapping
|
37 |
GOOGLE_LANG_MAP = {
|
38 |
'lug': 'lg',
|
39 |
-
'ach': 'ach',
|
40 |
-
'swa': 'sw',
|
41 |
'eng': 'en'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
}
|
|
|
2 |
import os
|
3 |
|
4 |
# HuggingFace settings
|
5 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
6 |
LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
|
7 |
+
TEST_SET_DATASET = "Sunbird/salt-translation-test-set"
|
8 |
SALT_DATASET = "sunbird/salt"
|
9 |
|
10 |
+
# Language settings - ALL UG40 LANGUAGES
|
11 |
+
ALL_UG40_LANGUAGES = [
|
12 |
+
'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa' # Complete this with actual SALT languages
|
|
|
13 |
]
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
LANGUAGE_NAMES = {
|
16 |
+
'ach': 'Acholi',
|
17 |
+
'eng': 'English',
|
18 |
+
'lgg': 'Lugbara',
|
19 |
'lug': 'Luganda',
|
20 |
+
'nyn': 'Runyankole',
|
21 |
+
'rny': 'Runyoro',
|
22 |
+
'teo': 'Ateso',
|
23 |
+
'swa': 'Swahili'
|
24 |
}
|
25 |
|
26 |
+
# Google Translate supported subset (for comparison)
|
27 |
+
GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']
|
28 |
+
|
29 |
# Google Translate language mapping
|
30 |
GOOGLE_LANG_MAP = {
|
31 |
'lug': 'lg',
|
32 |
+
'ach': 'ach',
|
33 |
+
'swa': 'sw',
|
34 |
'eng': 'en'
|
35 |
+
}
|
36 |
+
|
37 |
+
# Evaluation settings
|
38 |
+
MAX_TEST_SAMPLES = 500 # Per language pair
|
39 |
+
MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid
|
40 |
+
|
41 |
+
# UI settings
|
42 |
+
TITLE = "🏆 SALT Translation Leaderboard"
|
43 |
+
DESCRIPTION = """
|
44 |
+
**Scientific evaluation of translation models on Ugandan languages**
|
45 |
+
|
46 |
+
Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs.
|
47 |
+
Compare against Google Translate baseline and other submitted models.
|
48 |
+
"""
|
49 |
+
|
50 |
+
# File format specifications
|
51 |
+
PREDICTION_FORMAT = {
|
52 |
+
'required_columns': ['sample_id', 'prediction'],
|
53 |
+
'optional_columns': ['model_name', 'confidence'],
|
54 |
+
'file_types': ['.csv', '.tsv', '.json']
|
55 |
+
}
|
56 |
+
|
57 |
+
# Metrics configuration
|
58 |
+
METRICS_CONFIG = {
|
59 |
+
'primary_metrics': ['bleu', 'chrf', 'quality_score'],
|
60 |
+
'secondary_metrics': ['rouge1', 'rougeL', 'cer', 'wer'],
|
61 |
+
'display_precision': 4
|
62 |
}
|