leaderboard / src /utils.py
akera's picture
Create src/utils.py
1abade4 verified
raw
history blame
1.76 kB
# src/utils.py
import re
import datetime
from typing import Dict, List, Any
import salt.constants
def get_language_name(lang_code: str) -> str:
"""Get full language name from ISO code."""
if lang_code is None:
return "Unknown"
return salt.constants.SALT_LANGUAGE_NAMES.get(lang_code, str(lang_code))
def format_model_name(model_path: str) -> str:
"""Format model name for display in leaderboard."""
if model_path == 'google-translate':
return 'Google Translate'
# Extract model name from HuggingFace path
if '/' in model_path:
return model_path.split('/')[-1]
return model_path
def validate_model_path(model_path: str) -> bool:
"""Validate if model path is supported."""
if model_path == 'google-translate':
return True
# Check if it's a valid HuggingFace model path format
pattern = r'^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$'
return bool(re.match(pattern, model_path)) or not '/' in model_path
def get_model_type(model_path: str) -> str:
"""Determine model type from path."""
model_path_lower = model_path.lower()
if model_path == 'google-translate':
return 'google-translate'
elif 'gemma' in model_path_lower:
return 'gemma'
elif 'qwen' in model_path_lower:
return 'qwen'
elif 'llama' in model_path_lower:
return 'llama'
elif 'nllb' in model_path_lower:
return 'nllb'
else:
return 'other'
def create_submission_id() -> str:
"""Create unique submission ID."""
return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
def sanitize_input(text: str) -> str:
"""Sanitize user input."""
if not text:
return ""
return re.sub(r'[^\w\-./]', '', text.strip())