Spaces:
Sleeping
Sleeping
| # app.py | |
| import subprocess | |
| import sys | |
| import os | |
| from pathlib import Path | |
| def setup_salt(): | |
| """Clone and setup SALT library like in Colab.""" | |
| try: | |
| # Check if salt is already available | |
| import salt.dataset | |
| print("β SALT library already available") | |
| return True | |
| except ImportError: | |
| pass | |
| print("π₯ Setting up SALT library...") | |
| try: | |
| # Clone SALT repo if not exists | |
| salt_dir = Path("salt") | |
| if not salt_dir.exists(): | |
| print("π Cloning SALT repository...") | |
| subprocess.check_call([ | |
| "git", "clone", "https://github.com/sunbirdai/salt.git" | |
| ]) | |
| else: | |
| print("π SALT repository already exists") | |
| # Install SALT requirements | |
| salt_requirements = salt_dir / "requirements.txt" | |
| if salt_requirements.exists(): | |
| print("π¦ Installing SALT requirements...") | |
| subprocess.check_call([ | |
| sys.executable, "-m", "pip", "install", "-q", "-r", str(salt_requirements) | |
| ]) | |
| # Add SALT directory to Python path | |
| salt_path = str(salt_dir.absolute()) | |
| if salt_path not in sys.path: | |
| sys.path.insert(0, salt_path) | |
| print(f"π Added {salt_path} to Python path") | |
| # Test import | |
| import salt.dataset | |
| print("β SALT library setup completed successfully") | |
| return True | |
| except Exception as e: | |
| print(f"β Failed to setup SALT: {e}") | |
| return False | |
| # Setup SALT on startup | |
| print("π Starting SALT Translation Leaderboard...") | |
| if not setup_salt(): | |
| print("β Cannot continue without SALT library") | |
| print("π‘ Please check that git is available and GitHub is accessible") | |
| sys.exit(1) | |
| import gradio as gr | |
| import pandas as pd | |
| import json | |
| import traceback | |
| from datetime import datetime | |
| from typing import Optional, Dict, Tuple | |
| # Import our modules | |
| from src.test_set import get_public_test_set, get_complete_test_set, create_test_set_download, validate_test_set_integrity | |
| from src.validation import validate_submission_complete | |
| from src.evaluation import evaluate_predictions, generate_evaluation_report, get_google_translate_baseline | |
| from src.leaderboard import ( | |
| load_leaderboard, add_model_to_leaderboard, get_leaderboard_stats, | |
| filter_leaderboard, export_leaderboard, get_model_comparison | |
| ) | |
| from src.plotting import ( | |
| create_leaderboard_ranking_plot, create_metrics_comparison_plot, | |
| create_language_pair_heatmap, create_coverage_analysis_plot, | |
| create_model_performance_timeline, create_google_comparison_plot, | |
| create_detailed_model_analysis, create_submission_summary_plot | |
| ) | |
| from src.utils import sanitize_model_name, get_all_language_pairs, get_google_comparable_pairs | |
| from config import * | |
| # Global variables for caching | |
| current_leaderboard = None | |
| public_test_set = None | |
| complete_test_set = None | |
| def initialize_data(): | |
| """Initialize test sets and leaderboard data.""" | |
| global public_test_set, complete_test_set, current_leaderboard | |
| try: | |
| print("π Initializing SALT Translation Leaderboard...") | |
| # Load test sets | |
| print("π₯ Loading test sets...") | |
| public_test_set = get_public_test_set() | |
| complete_test_set = get_complete_test_set() | |
| # Load leaderboard | |
| print("π Loading leaderboard...") | |
| current_leaderboard = load_leaderboard() | |
| print(f"β Initialization complete!") | |
| print(f" - Test set: {len(public_test_set):,} samples") | |
| print(f" - Language pairs: {len(get_all_language_pairs())}") | |
| print(f" - Current models: {len(current_leaderboard)}") | |
| return True | |
| except Exception as e: | |
| print(f"β Initialization failed: {e}") | |
| traceback.print_exc() | |
| return False | |
| def download_test_set() -> Tuple[str, str]: | |
| """Create downloadable test set and return file path and info.""" | |
| try: | |
| global public_test_set | |
| if public_test_set is None: | |
| public_test_set = get_public_test_set() | |
| # Create download file | |
| download_path, stats = create_test_set_download() | |
| # Create info message | |
| info_msg = f""" | |
| π₯ **SALT Test Set Downloaded Successfully!** | |
| **Dataset Statistics:** | |
| - **Total Samples**: {stats['total_samples']:,} | |
| - **Language Pairs**: {stats['language_pairs']} | |
| - **Google Comparable**: {stats['google_comparable_samples']:,} samples | |
| - **Languages**: {', '.join(stats['languages'])} | |
| **File Format:** | |
| - `sample_id`: Unique identifier for each sample | |
| - `source_text`: Text to be translated | |
| - `source_language`: Source language code | |
| - `target_language`: Target language code | |
| - `domain`: Content domain (if available) | |
| - `google_comparable`: Whether this pair can be compared with Google Translate | |
| **Next Steps:** | |
| 1. Run your model on the source texts | |
| 2. Create a CSV/JSON file with columns: `sample_id`, `prediction` | |
| 3. Upload your predictions using the "Submit Predictions" tab | |
| """ | |
| return download_path, info_msg | |
| except Exception as e: | |
| error_msg = f"β Error creating test set download: {str(e)}" | |
| return None, error_msg | |
| def validate_submission(file, model_name: str, author: str, description: str) -> Tuple[str, Optional[pd.DataFrame]]: | |
| """Validate uploaded prediction file, supporting str paths, bytes, and Gradio wrappers.""" | |
| try: | |
| if file is None: | |
| return "β Please upload a predictions file", None | |
| if not model_name.strip(): | |
| return "β Please provide a model name", None | |
| # 1) Determine raw bytes | |
| if isinstance(file, bytes): | |
| file_content = file | |
| elif isinstance(file, str): | |
| # could be a path or raw text | |
| if os.path.exists(file): | |
| with open(file, "rb") as f: | |
| file_content = f.read() | |
| else: | |
| file_content = file.encode("utf-8") | |
| elif hasattr(file, "name") and os.path.exists(file.name): | |
| # tempfile._TemporaryFileWrapper from Gradio | |
| with open(file.name, "rb") as f: | |
| file_content = f.read() | |
| else: | |
| return "β Could not read uploaded file", None | |
| # 2) Infer filename for format-sniffing | |
| filename = ( | |
| getattr(file, "name", None) | |
| or getattr(file, "filename", None) | |
| or "predictions.csv" | |
| ) | |
| # 3) Load test set if needed | |
| global complete_test_set | |
| if complete_test_set is None: | |
| complete_test_set = get_complete_test_set() | |
| # 4) Run existing validation pipeline | |
| validation_result = validate_submission_complete( | |
| file_content, filename, complete_test_set, model_name | |
| ) | |
| if validation_result["valid"]: | |
| return validation_result["report"], validation_result["predictions"] | |
| else: | |
| return validation_result["report"], None | |
| except Exception as e: | |
| return ( | |
| f"β Validation error: {e}\n\nTraceback:\n{traceback.format_exc()}", | |
| None, | |
| ) | |
| def evaluate_submission( | |
| predictions_df: pd.DataFrame, | |
| model_name: str, | |
| author: str, | |
| description: str, | |
| validation_info: Dict | |
| ) -> Tuple[str, pd.DataFrame, object, object]: | |
| """Evaluate validated predictions and update leaderboard.""" | |
| try: | |
| if predictions_df is None: | |
| return "β No valid predictions to evaluate", None, None, None | |
| # Get complete test set with targets | |
| global complete_test_set, current_leaderboard | |
| if complete_test_set is None: | |
| complete_test_set = get_complete_test_set() | |
| # Run evaluation | |
| print(f"π Evaluating {model_name}...") | |
| evaluation_results = evaluate_predictions(predictions_df, complete_test_set) | |
| if evaluation_results.get('error'): | |
| return f"β Evaluation error: {evaluation_results['error']}", None, None, None | |
| # Add to leaderboard | |
| print("π Adding to leaderboard...") | |
| model_type = "user_submission" # Could be enhanced to detect model type | |
| updated_leaderboard = add_model_to_leaderboard( | |
| model_name=sanitize_model_name(model_name), | |
| author=author or "Anonymous", | |
| evaluation_results=evaluation_results, | |
| validation_info=validation_info, | |
| model_type=model_type, | |
| description=description or "" | |
| ) | |
| # Update global leaderboard | |
| current_leaderboard = updated_leaderboard | |
| # Generate evaluation report | |
| report = generate_evaluation_report(evaluation_results, model_name) | |
| # Create visualization plots | |
| summary_plot = create_submission_summary_plot(validation_info, evaluation_results) | |
| ranking_plot = create_leaderboard_ranking_plot(updated_leaderboard) | |
| # Format success message | |
| rank = updated_leaderboard[updated_leaderboard['model_name'] == sanitize_model_name(model_name)].index[0] + 1 | |
| total_models = len(updated_leaderboard) | |
| success_msg = f""" | |
| π **Evaluation Complete!** | |
| **Your Results:** | |
| - **Model**: {model_name} | |
| - **Rank**: #{rank} out of {total_models} models | |
| - **Quality Score**: {evaluation_results['averages'].get('quality_score', 0):.4f} | |
| - **BLEU**: {evaluation_results['averages'].get('bleu', 0):.2f} | |
| - **ChrF**: {evaluation_results['averages'].get('chrf', 0):.4f} | |
| **Coverage:** | |
| - **Samples Evaluated**: {evaluation_results['evaluated_samples']:,} | |
| - **Language Pairs**: {evaluation_results['summary']['language_pairs_covered']} | |
| - **Google Comparable**: {evaluation_results['summary']['google_comparable_pairs']} pairs | |
| {report} | |
| """ | |
| return success_msg, updated_leaderboard, summary_plot, ranking_plot | |
| except Exception as e: | |
| error_msg = f"β Evaluation failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}" | |
| return error_msg, None, None, None | |
| def refresh_leaderboard_display( | |
| search_query: str = "", | |
| model_type_filter: str = "all", | |
| min_coverage: float = 0.0, | |
| google_only: bool = False | |
| ) -> Tuple[pd.DataFrame, object, object, str]: | |
| """Refresh and filter leaderboard display.""" | |
| try: | |
| global current_leaderboard | |
| if current_leaderboard is None: | |
| current_leaderboard = load_leaderboard() | |
| # Apply filters | |
| filtered_df = filter_leaderboard( | |
| current_leaderboard, | |
| search_query=search_query, | |
| model_type=model_type_filter, | |
| min_coverage=min_coverage, | |
| google_comparable_only=google_only | |
| ) | |
| # Create plots | |
| ranking_plot = create_leaderboard_ranking_plot(filtered_df) | |
| comparison_plot = create_metrics_comparison_plot(filtered_df) | |
| # Get stats | |
| stats = get_leaderboard_stats(filtered_df) | |
| stats_text = f""" | |
| π **Leaderboard Statistics** | |
| - **Total Models**: {stats['total_models']} | |
| - **Average Quality Score**: {stats['avg_quality_score']:.4f} | |
| - **Google Comparable Models**: {stats['google_comparable_models']} | |
| **Best Model**: {stats['best_model']['name'] if stats['best_model'] else 'None'} | |
| **Latest Submission**: {stats['latest_submission'][:10] if stats['latest_submission'] else 'None'} | |
| """ | |
| return filtered_df, ranking_plot, comparison_plot, stats_text | |
| except Exception as e: | |
| error_msg = f"Error loading leaderboard: {str(e)}" | |
| empty_df = pd.DataFrame() | |
| return empty_df, None, None, error_msg | |
| def get_model_details(model_name: str) -> Tuple[str, object]: | |
| """Get detailed analysis for a specific model.""" | |
| try: | |
| global current_leaderboard | |
| if current_leaderboard is None: | |
| return "Leaderboard not loaded", None | |
| # Find model | |
| model_row = current_leaderboard[current_leaderboard['model_name'] == model_name] | |
| if model_row.empty: | |
| return f"Model '{model_name}' not found", None | |
| model_info = model_row.iloc[0] | |
| # Parse detailed metrics | |
| try: | |
| detailed_results = json.loads(model_info['detailed_metrics']) | |
| except: | |
| detailed_results = {} | |
| # Create detailed plot | |
| detail_plot = create_detailed_model_analysis(detailed_results, model_name) | |
| # Format model details | |
| details_text = f""" | |
| # π Model Details: {model_name} | |
| **Basic Information:** | |
| - **Author**: {model_info['author']} | |
| - **Submission Date**: {model_info['submission_date'][:10]} | |
| - **Model Type**: {model_info['model_type']} | |
| - **Description**: {model_info['description'] or 'No description provided'} | |
| **Performance Metrics:** | |
| - **Quality Score**: {model_info['quality_score']:.4f} | |
| - **BLEU**: {model_info['bleu']:.2f} | |
| - **ChrF**: {model_info['chrf']:.4f} | |
| - **ROUGE-1**: {model_info['rouge1']:.4f} | |
| - **ROUGE-L**: {model_info['rougeL']:.4f} | |
| **Coverage Information:** | |
| - **Total Samples**: {model_info['total_samples']:,} | |
| - **Language Pairs Covered**: {model_info['language_pairs_covered']} | |
| - **Google Comparable Pairs**: {model_info['google_pairs_covered']} | |
| - **Coverage Rate**: {model_info['coverage_rate']:.1%} | |
| **Google Translate Comparison:** | |
| - **Google Quality Score**: {model_info['google_quality_score']:.4f} | |
| - **Google BLEU**: {model_info['google_bleu']:.2f} | |
| - **Google ChrF**: {model_info['google_chrf']:.4f} | |
| """ | |
| return details_text, detail_plot | |
| except Exception as e: | |
| error_msg = f"Error getting model details: {str(e)}" | |
| return error_msg, None | |
| # Initialize data on startup | |
| print("π Starting SALT Translation Leaderboard...") | |
| initialization_success = initialize_data() | |
| # Create Gradio interface | |
| with gr.Blocks( | |
| title=TITLE, | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1400px !important; | |
| margin: 0 auto; | |
| } | |
| .main-header { | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| padding: 2rem; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| border-radius: 10px; | |
| } | |
| .metric-box { | |
| background: #f8f9fa; | |
| padding: 1rem; | |
| border-radius: 8px; | |
| margin: 0.5rem 0; | |
| border-left: 4px solid #007bff; | |
| } | |
| .error-box { | |
| background: #f8d7da; | |
| color: #721c24; | |
| padding: 1rem; | |
| border-radius: 8px; | |
| border-left: 4px solid #dc3545; | |
| } | |
| .success-box { | |
| background: #d4edda; | |
| color: #155724; | |
| padding: 1rem; | |
| border-radius: 8px; | |
| border-left: 4px solid #28a745; | |
| } | |
| """ | |
| ) as demo: | |
| # Header | |
| gr.Markdown(f""" | |
| <div class="main-header"> | |
| # {TITLE} | |
| {DESCRIPTION} | |
| **Supported Languages**: {len(ALL_UG40_LANGUAGES)} Ugandan languages | **Google Comparable**: {len(GOOGLE_SUPPORTED_LANGUAGES)} languages | |
| </div> | |
| """) | |
| # Status indicator | |
| if initialization_success: | |
| status_msg = "β System initialized successfully" | |
| else: | |
| status_msg = "β System initialization failed - some features may not work" | |
| gr.Markdown(f"**Status**: {status_msg}") | |
| with gr.Tabs(): | |
| # Tab 1: Get Test Set | |
| with gr.Tab("π₯ Download Test Set", id="download"): | |
| gr.Markdown(""" | |
| ## π Get the SALT Translation Test Set | |
| Download the standardized test set to evaluate your translation model. | |
| The test set contains source texts in multiple Ugandan languages that you need to translate. | |
| """) | |
| with gr.Row(): | |
| download_btn = gr.Button("π₯ Download Test Set", variant="primary", size="lg") | |
| with gr.Row(): | |
| with gr.Column(): | |
| download_file = gr.File(label="π Test Set File", interactive=False) | |
| with gr.Column(): | |
| download_info = gr.Markdown(label="βΉοΈ Test Set Information") | |
| gr.Markdown(""" | |
| ### π Instructions | |
| 1. **Download** the test set using the button above | |
| 2. **Run your model** on the source texts to generate translations | |
| 3. **Create a predictions file** with your model's outputs | |
| 4. **Submit** your predictions using the "Submit Predictions" tab | |
| ### π Required Prediction Format | |
| Your predictions file must be a CSV/TSV/JSON with these columns: | |
| - `sample_id`: The unique identifier from the test set | |
| - `prediction`: Your model's translation for that sample | |
| **Example CSV:** | |
| ``` | |
| sample_id,prediction | |
| salt_000001,Oli otya mukwano gwange? | |
| salt_000002,Webale nyo olukya | |
| ... | |
| ``` | |
| """) | |
| # Tab 2: Submit Predictions | |
| with gr.Tab("π Submit Predictions", id="submit"): | |
| gr.Markdown(""" | |
| ## π― Submit Your Model's Predictions | |
| Upload your model's predictions on the SALT test set for evaluation. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Model information | |
| gr.Markdown("### π Model Information") | |
| model_name_input = gr.Textbox( | |
| label="π€ Model Name", | |
| placeholder="e.g., MyTranslator-v1.0", | |
| info="Unique name for your model" | |
| ) | |
| author_input = gr.Textbox( | |
| label="π€ Author/Organization", | |
| placeholder="Your name or organization", | |
| value="Anonymous" | |
| ) | |
| description_input = gr.Textbox( | |
| label="π Description (Optional)", | |
| placeholder="Brief description of your model", | |
| lines=3 | |
| ) | |
| # File upload | |
| gr.Markdown("### π€ Upload Predictions") | |
| gr.Markdown("Upload a CSV/TSV/JSON file with your model's predictions") | |
| predictions_file = gr.File( | |
| label="π Predictions File", | |
| file_types=[".csv", ".tsv", ".json"] | |
| ) | |
| validate_btn = gr.Button("β Validate Submission", variant="secondary") | |
| submit_btn = gr.Button("π Submit for Evaluation", variant="primary", interactive=False) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Validation Results") | |
| validation_output = gr.Markdown() | |
| # Results section | |
| gr.Markdown("### π Evaluation Results") | |
| with gr.Row(): | |
| evaluation_output = gr.Markdown() | |
| with gr.Row(): | |
| with gr.Column(): | |
| submission_plot = gr.Plot(label="π Your Submission Analysis") | |
| with gr.Column(): | |
| updated_leaderboard_plot = gr.Plot(label="π Updated Leaderboard") | |
| with gr.Row(): | |
| results_table = gr.Dataframe(label="π Updated Leaderboard", interactive=False) | |
| # Tab 3: Leaderboard | |
| with gr.Tab("π Leaderboard", id="leaderboard"): | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| search_input = gr.Textbox( | |
| label="π Search Models", | |
| placeholder="Search by model name, author...", | |
| ) | |
| with gr.Column(scale=1): | |
| model_type_dropdown = gr.Dropdown( | |
| label="π§ Model Type", | |
| choices=["all", "user_submission", "baseline"], | |
| value="all" | |
| ) | |
| with gr.Column(scale=1): | |
| min_coverage_slider = gr.Slider( | |
| label="π Min Coverage", | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.0, | |
| step=0.1 | |
| ) | |
| with gr.Column(scale=1): | |
| google_only_checkbox = gr.Checkbox( | |
| label="π€ Google Comparable Only", | |
| value=False | |
| ) | |
| with gr.Row(): | |
| refresh_btn = gr.Button("π Refresh", variant="secondary") | |
| with gr.Row(): | |
| leaderboard_stats = gr.Markdown() | |
| with gr.Row(): | |
| with gr.Column(): | |
| leaderboard_plot = gr.Plot(label="π Rankings") | |
| with gr.Column(): | |
| comparison_plot = gr.Plot(label="π Multi-Metric Comparison") | |
| with gr.Row(): | |
| leaderboard_table = gr.Dataframe( | |
| label="π Full Leaderboard", | |
| interactive=False, | |
| wrap=True | |
| ) | |
| # Tab 4: Model Analysis | |
| with gr.Tab("π Model Analysis", id="analysis"): | |
| with gr.Row(): | |
| model_select = gr.Dropdown( | |
| label="π€ Select Model", | |
| choices=[], | |
| value=None, | |
| info="Choose a model for detailed analysis" | |
| ) | |
| analyze_btn = gr.Button("π Analyze", variant="primary") | |
| with gr.Row(): | |
| model_details = gr.Markdown() | |
| with gr.Row(): | |
| model_analysis_plot = gr.Plot(label="π Detailed Performance Analysis") | |
| # Tab 5: Documentation | |
| with gr.Tab("π Documentation", id="docs"): | |
| gr.Markdown(f""" | |
| # π SALT Translation Leaderboard Documentation | |
| ## π― Overview | |
| The SALT Translation Leaderboard is a scientific evaluation platform for translation models on Ugandan languages. | |
| Submit your model's predictions on our standardized test set to see how it compares with other models. | |
| ## π£οΈ Supported Languages | |
| **All UG40 Languages ({len(ALL_UG40_LANGUAGES)} total):** | |
| {', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in ALL_UG40_LANGUAGES])} | |
| **Google Translate Comparable ({len(GOOGLE_SUPPORTED_LANGUAGES)} languages):** | |
| {', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in GOOGLE_SUPPORTED_LANGUAGES])} | |
| ## π Evaluation Metrics | |
| ### Primary Metrics | |
| - **Quality Score**: Composite metric (0-1, higher better) combining multiple metrics | |
| - **BLEU**: Translation quality score (0-100, higher better) | |
| - **ChrF**: Character-level F-score (0-1, higher better) | |
| ### Secondary Metrics | |
| - **ROUGE-1/ROUGE-L**: Recall-oriented metrics (0-1, higher better) | |
| - **CER/WER**: Character/Word Error Rate (0-1, lower better) | |
| - **Length Ratio**: Prediction/reference length ratio | |
| ## π Submission Process | |
| ### Step 1: Download Test Set | |
| 1. Go to "Download Test Set" tab | |
| 2. Click "Download Test Set" button | |
| 3. Save the `salt_test_set.csv` file | |
| ### Step 2: Generate Predictions | |
| 1. Load the test set in your code | |
| 2. For each row, translate `source_text` from `source_language` to `target_language` | |
| 3. Save results as CSV with columns: `sample_id`, `prediction` | |
| ### Step 3: Submit & Evaluate | |
| 1. Go to "Submit Predictions" tab | |
| 2. Fill in model information | |
| 3. Upload your predictions file | |
| 4. Validate and submit for evaluation | |
| ## π File Formats | |
| ### Test Set Format | |
| ```csv | |
| sample_id,source_text,source_language,target_language,domain,google_comparable | |
| salt_000001,"Hello world",eng,lug,general,true | |
| salt_000002,"How are you?",eng,ach,conversation,true | |
| ``` | |
| ### Predictions Format | |
| ```csv | |
| sample_id,prediction | |
| salt_000001,"Amakuru ensi" | |
| salt_000002,"Ibino nining?" | |
| ``` | |
| ## π Leaderboard Types | |
| ### 1. Full UG40 Leaderboard | |
| - Includes all {len(get_all_language_pairs())} language pairs | |
| - Complete evaluation across all Ugandan languages | |
| - Primary ranking system | |
| ### 2. Google Translate Comparable | |
| - Limited to {len(get_google_comparable_pairs())} pairs | |
| - Only languages supported by Google Translate | |
| - Allows direct comparison with Google Translate baseline | |
| ## π¬ Scientific Rigor | |
| - **Standardized Evaluation**: Same test set for all models | |
| - **Multiple Metrics**: Comprehensive evaluation beyond just BLEU | |
| - **Coverage Tracking**: Transparency about what each model covers | |
| - **Reproducible**: All evaluation code and data available | |
| ## π€ Contributing | |
| This leaderboard is maintained by [Sunbird AI](https://sunbird.ai). | |
| **Contact**: [[email protected]](mailto:[email protected]) | |
| **GitHub**: [Sunbird AI GitHub](https://github.com/sunbirdai) | |
| ## π Citation | |
| If you use this leaderboard in your research, please cite: | |
| ```bibtex | |
| @misc{{salt_leaderboard_2024, | |
| title={{SALT Translation Leaderboard: Evaluation of Translation Models on Ugandan Languages}}, | |
| author={{Sunbird AI}}, | |
| year={{2024}}, | |
| url={{https://huggingface.co/spaces/Sunbird/salt-translation-leaderboard}} | |
| }} | |
| ``` | |
| ## π Related Resources | |
| - **SALT Dataset**: [sunbird/salt](https://huggingface.co/datasets/sunbird/salt) | |
| - **Sunbird AI Models**: [Sunbird Organization](https://huggingface.co/Sunbird) | |
| - **Research Papers**: [Sunbird AI Publications](https://sunbird.ai/research) | |
| """) | |
| # Event handlers with state management | |
| predictions_validated = gr.State(value=None) | |
| validation_info_state = gr.State(value=None) | |
| # Download test set | |
| download_btn.click( | |
| fn=download_test_set, | |
| outputs=[download_file, download_info] | |
| ) | |
| # Validate predictions | |
| def handle_validation(file, model_name, author, description): | |
| report, predictions = validate_submission(file, model_name, author, description) | |
| is_valid = predictions is not None | |
| return report, predictions, predictions, is_valid | |
| validate_btn.click( | |
| fn=handle_validation, | |
| inputs=[predictions_file, model_name_input, author_input, description_input], | |
| outputs=[validation_output, predictions_validated, validation_info_state, submit_btn] | |
| ) | |
| # Submit for evaluation | |
| def handle_submission(predictions, model_name, author, description, validation_info): | |
| if predictions is None: | |
| return "β Please validate your submission first", None, None, None | |
| # Extract validation info dict | |
| validation_dict = { | |
| 'coverage': getattr(validation_info, 'coverage', 0.8) if hasattr(validation_info, 'coverage') else 0.8, | |
| 'report': 'Validation passed' | |
| } | |
| return evaluate_submission(predictions, model_name, author, description, validation_dict) | |
| submit_btn.click( | |
| fn=handle_submission, | |
| inputs=[predictions_validated, model_name_input, author_input, description_input, validation_info_state], | |
| outputs=[evaluation_output, results_table, submission_plot, updated_leaderboard_plot] | |
| ) | |
| # Refresh leaderboard | |
| def update_leaderboard_and_dropdown(*args): | |
| table, plot1, plot2, stats = refresh_leaderboard_display(*args) | |
| # Update model dropdown choices | |
| model_choices = table['model_name'].tolist() if not table.empty else [] | |
| return table, plot1, plot2, stats, gr.Dropdown(choices=model_choices) | |
| refresh_btn.click( | |
| fn=update_leaderboard_and_dropdown, | |
| inputs=[search_input, model_type_dropdown, min_coverage_slider, google_only_checkbox], | |
| outputs=[leaderboard_table, leaderboard_plot, comparison_plot, leaderboard_stats, model_select] | |
| ) | |
| # Auto-refresh on filter changes | |
| for input_component in [search_input, model_type_dropdown, min_coverage_slider, google_only_checkbox]: | |
| input_component.change( | |
| fn=update_leaderboard_and_dropdown, | |
| inputs=[search_input, model_type_dropdown, min_coverage_slider, google_only_checkbox], | |
| outputs=[leaderboard_table, leaderboard_plot, comparison_plot, leaderboard_stats, model_select] | |
| ) | |
| # Model analysis | |
| analyze_btn.click( | |
| fn=get_model_details, | |
| inputs=[model_select], | |
| outputs=[model_details, model_analysis_plot] | |
| ) | |
| # Load initial data | |
| demo.load( | |
| fn=update_leaderboard_and_dropdown, | |
| inputs=[search_input, model_type_dropdown, min_coverage_slider, google_only_checkbox], | |
| outputs=[leaderboard_table, leaderboard_plot, comparison_plot, leaderboard_stats, model_select] | |
| ) | |
| # Launch the application | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True | |
| ) |