Spaces:

akera
/

leaderboard

Sleeping

App Files Files Community

leaderboard / config.py

akera

Update config.py

a729bca verified 3 months ago

raw

history blame

2.84 kB

	# config.py
	import os

	# HuggingFace settings
	HF_TOKEN = os.getenv("HF_TOKEN")
	LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
	TEST_SET_DATASET = "Sunbird/salt-translation-test-set"
	SALT_DATASET = "sunbird/salt"

	# Language settings - ALL UG40 LANGUAGES (Updated from SALT constants)
	ALL_UG40_LANGUAGES = [
	'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa'
	]

	LANGUAGE_NAMES = {
	'ach': 'Acholi',
	'eng': 'English',
	'lgg': 'Lugbara',
	'lug': 'Luganda',
	'nyn': 'Runyankole',
	'rny': 'Runyoro',
	'teo': 'Ateso',
	'swa': 'Swahili'
	}

	# Google Translate supported subset (for comparison)
	GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']

	# Google Translate language mapping
	GOOGLE_LANG_MAP = {
	'lug': 'lg',
	'ach': 'ach',
	'swa': 'sw',
	'eng': 'en'
	}

	# Evaluation settings
	MAX_TEST_SAMPLES = 500 # Per language pair
	MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid

	# UI settings
	TITLE = "🏆 SALT Translation Leaderboard"
	DESCRIPTION = """
	Evaluation of translation models on Ugandan languages

	Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs.
	Compare against Google Translate baseline and other submitted models.
	"""

	# File format specifications
	PREDICTION_FORMAT = {
	'required_columns': ['sample_id', 'prediction'],
	'optional_columns': ['model_name', 'confidence'],
	'file_types': ['.csv', '.tsv', '.json']
	}

	# Metrics configuration - Updated to match reference implementation
	METRICS_CONFIG = {
	'primary_metrics': ['bleu', 'chrf', 'quality_score'],
	'secondary_metrics': ['rouge1', 'rouge2', 'rougeL', 'cer', 'wer', 'len_ratio'],
	'display_precision': 4,
	'quality_score_components': [
	'bleu', # normalized to 0-1
	'chrf', # already 0-1
	'cer', # inverted (1-cer)
	'wer', # inverted (1-wer)
	'rouge1', # 0-1
	'rougeL' # 0-1
	],
	'error_metrics': ['cer', 'wer'], # Lower is better
	'score_metrics': ['bleu', 'chrf', 'quality_score', 'rouge1', 'rouge2', 'rougeL'] # Higher is better
	}

	# Display settings for leaderboard
	DISPLAY_CONFIG = {
	'max_models_radar': 8,
	'max_models_ranking': 15,
	'max_language_pairs_detail': 20,
	'decimal_places': {
	'quality_score': 4,
	'bleu': 2,
	'chrf': 4,
	'rouge1': 4,
	'rouge2': 4,
	'rougeL': 4,
	'cer': 4,
	'wer': 4,
	'len_ratio': 3,
	'coverage_rate': 1 # percentage
	}
	}

	# Chart colors and styling
	CHART_CONFIG = {
	'google_comparable_color': '#1f77b4',
	'ug40_only_color': '#ff7f0e',
	'primary_colorscale': 'Viridis',
	'secondary_colorscale': 'Plasma',
	'bar_height_per_model': 30,
	'min_chart_height': 400,
	'max_chart_height': 1000
	}