akera commited on
Commit
aa99a22
·
verified ·
1 Parent(s): bf6309d

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +44 -24
config.py CHANGED
@@ -2,41 +2,61 @@
2
  import os
3
 
4
  # HuggingFace settings
5
- HF_TOKEN = os.getenv("HF_TOKEN") # Set in Space secrets
6
  LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
 
7
  SALT_DATASET = "sunbird/salt"
8
 
9
- # Model settings
10
- MAX_MODEL_SIZE_GB = 15 # Limit for HF Space
11
- SUPPORTED_MODEL_TYPES = [
12
- "gemma", "qwen", "llama", "nllb", "google-translate"
13
  ]
14
 
15
- # Evaluation settings
16
- MAX_EVAL_SAMPLES = 200 # Limit for faster evaluation
17
- BATCH_SIZE = 4
18
- MAX_NEW_TOKENS = 100
19
-
20
- # UI settings
21
- TITLE = "🏆 SALT Translation Model Leaderboard"
22
- DESCRIPTION = """
23
- Evaluate your translation models on Ugandan languages!
24
- Submit a HuggingFace model and see how it performs on Luganda, Acholi, and Swahili translation tasks.
25
- """
26
-
27
- # Supported languages (Google Translate compatible subset)
28
- SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']
29
  LANGUAGE_NAMES = {
 
 
 
30
  'lug': 'Luganda',
31
- 'ach': 'Acholi',
32
- 'swa': 'Swahili',
33
- 'eng': 'English'
 
34
  }
35
 
 
 
 
36
  # Google Translate language mapping
37
  GOOGLE_LANG_MAP = {
38
  'lug': 'lg',
39
- 'ach': 'ach',
40
- 'swa': 'sw',
41
  'eng': 'en'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
 
2
  import os
3
 
4
  # HuggingFace settings
5
+ HF_TOKEN = os.getenv("HF_TOKEN")
6
  LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
7
+ TEST_SET_DATASET = "Sunbird/salt-translation-test-set"
8
  SALT_DATASET = "sunbird/salt"
9
 
10
+ # Language settings - ALL UG40 LANGUAGES
11
+ ALL_UG40_LANGUAGES = [
12
+ 'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa' # Complete this with actual SALT languages
 
13
  ]
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  LANGUAGE_NAMES = {
16
+ 'ach': 'Acholi',
17
+ 'eng': 'English',
18
+ 'lgg': 'Lugbara',
19
  'lug': 'Luganda',
20
+ 'nyn': 'Runyankole',
21
+ 'rny': 'Runyoro',
22
+ 'teo': 'Ateso',
23
+ 'swa': 'Swahili'
24
  }
25
 
26
+ # Google Translate supported subset (for comparison)
27
+ GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']
28
+
29
  # Google Translate language mapping
30
  GOOGLE_LANG_MAP = {
31
  'lug': 'lg',
32
+ 'ach': 'ach',
33
+ 'swa': 'sw',
34
  'eng': 'en'
35
+ }
36
+
37
+ # Evaluation settings
38
+ MAX_TEST_SAMPLES = 500 # Per language pair
39
+ MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid
40
+
41
+ # UI settings
42
+ TITLE = "🏆 SALT Translation Leaderboard"
43
+ DESCRIPTION = """
44
+ **Scientific evaluation of translation models on Ugandan languages**
45
+
46
+ Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs.
47
+ Compare against Google Translate baseline and other submitted models.
48
+ """
49
+
50
+ # File format specifications
51
+ PREDICTION_FORMAT = {
52
+ 'required_columns': ['sample_id', 'prediction'],
53
+ 'optional_columns': ['model_name', 'confidence'],
54
+ 'file_types': ['.csv', '.tsv', '.json']
55
+ }
56
+
57
+ # Metrics configuration
58
+ METRICS_CONFIG = {
59
+ 'primary_metrics': ['bleu', 'chrf', 'quality_score'],
60
+ 'secondary_metrics': ['rouge1', 'rougeL', 'cer', 'wer'],
61
+ 'display_precision': 4
62
  }