Spaces:

akera
/

leaderboard

Sleeping

App Files Files Community

leaderboard / config.py

akera

Update config.py

aa99a22 verified 3 months ago

raw

history blame

1.69 kB

	# config.py
	import os

	# HuggingFace settings
	HF_TOKEN = os.getenv("HF_TOKEN")
	LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
	TEST_SET_DATASET = "Sunbird/salt-translation-test-set"
	SALT_DATASET = "sunbird/salt"

	# Language settings - ALL UG40 LANGUAGES
	ALL_UG40_LANGUAGES = [
	'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa' # Complete this with actual SALT languages
	]

	LANGUAGE_NAMES = {
	'ach': 'Acholi',
	'eng': 'English',
	'lgg': 'Lugbara',
	'lug': 'Luganda',
	'nyn': 'Runyankole',
	'rny': 'Runyoro',
	'teo': 'Ateso',
	'swa': 'Swahili'
	}

	# Google Translate supported subset (for comparison)
	GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']

	# Google Translate language mapping
	GOOGLE_LANG_MAP = {
	'lug': 'lg',
	'ach': 'ach',
	'swa': 'sw',
	'eng': 'en'
	}

	# Evaluation settings
	MAX_TEST_SAMPLES = 500 # Per language pair
	MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid

	# UI settings
	TITLE = "🏆 SALT Translation Leaderboard"
	DESCRIPTION = """
	Scientific evaluation of translation models on Ugandan languages

	Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs.
	Compare against Google Translate baseline and other submitted models.
	"""

	# File format specifications
	PREDICTION_FORMAT = {
	'required_columns': ['sample_id', 'prediction'],
	'optional_columns': ['model_name', 'confidence'],
	'file_types': ['.csv', '.tsv', '.json']
	}

	# Metrics configuration
	METRICS_CONFIG = {
	'primary_metrics': ['bleu', 'chrf', 'quality_score'],
	'secondary_metrics': ['rouge1', 'rougeL', 'cer', 'wer'],
	'display_precision': 4
	}