Spaces:
Running
Running
# app.py | |
import subprocess | |
import sys | |
import os | |
from pathlib import Path | |
def setup_salt(): | |
"""Clone and setup SALT library like in Colab.""" | |
try: | |
# Check if salt is already available | |
import salt.dataset | |
print("β SALT library already available") | |
return True | |
except ImportError: | |
pass | |
print("π₯ Setting up SALT library...") | |
try: | |
# Clone SALT repo if not exists | |
salt_dir = Path("salt") | |
if not salt_dir.exists(): | |
print("π Cloning SALT repository...") | |
subprocess.check_call([ | |
"git", "clone", "https://github.com/sunbirdai/salt.git" | |
]) | |
else: | |
print("π SALT repository already exists") | |
# Install SALT requirements | |
salt_requirements = salt_dir / "requirements.txt" | |
if salt_requirements.exists(): | |
print("π¦ Installing SALT requirements...") | |
subprocess.check_call([ | |
sys.executable, "-m", "pip", "install", "-q", "-r", str(salt_requirements) | |
]) | |
# Add SALT directory to Python path | |
salt_path = str(salt_dir.absolute()) | |
if salt_path not in sys.path: | |
sys.path.insert(0, salt_path) | |
print(f"π Added {salt_path} to Python path") | |
# Test import | |
import salt.dataset | |
print("β SALT library setup completed successfully") | |
return True | |
except Exception as e: | |
print(f"β Failed to setup SALT: {e}") | |
return False | |
# Setup SALT on startup | |
print("π Starting SALT Translation Leaderboard...") | |
if not setup_salt(): | |
print("β Cannot continue without SALT library") | |
print("π‘ Please check that git is available and GitHub is accessible") | |
sys.exit(1) | |
import gradio as gr | |
import pandas as pd | |
import json | |
import traceback | |
from datetime import datetime | |
from typing import Optional, Dict, Tuple | |
# Import our modules | |
from src.test_set import get_public_test_set, get_complete_test_set, create_test_set_download, validate_test_set_integrity | |
from src.validation import validate_submission_complete | |
from src.evaluation import evaluate_predictions, generate_evaluation_report, get_google_translate_baseline | |
from src.leaderboard import ( | |
load_leaderboard, add_model_to_leaderboard, get_leaderboard_stats, | |
filter_leaderboard, export_leaderboard, get_model_comparison, prepare_leaderboard_display | |
) | |
from src.plotting import ( | |
create_leaderboard_ranking_plot, create_metrics_comparison_plot, | |
create_language_pair_heatmap, create_coverage_analysis_plot, | |
create_model_performance_timeline, create_google_comparison_plot, | |
create_detailed_model_analysis, create_submission_summary_plot | |
) | |
from src.utils import sanitize_model_name, get_all_language_pairs, get_google_comparable_pairs | |
from config import * | |
# Global variables for caching | |
current_leaderboard = None | |
public_test_set = None | |
complete_test_set = None | |
def initialize_data(): | |
"""Initialize test sets and leaderboard data.""" | |
global public_test_set, complete_test_set, current_leaderboard | |
try: | |
print("π Initializing SALT Translation Leaderboard...") | |
# Load test sets | |
print("π₯ Loading test sets...") | |
public_test_set = get_public_test_set() | |
complete_test_set = get_complete_test_set() | |
# Load leaderboard | |
print("π Loading leaderboard...") | |
current_leaderboard = load_leaderboard() | |
print(f"β Initialization complete!") | |
print(f" - Test set: {len(public_test_set):,} samples") | |
print(f" - Language pairs: {len(get_all_language_pairs())}") | |
print(f" - Current models: {len(current_leaderboard)}") | |
return True | |
except Exception as e: | |
print(f"β Initialization failed: {e}") | |
traceback.print_exc() | |
return False | |
def download_test_set() -> Tuple[str, str]: | |
"""Create downloadable test set and return file path and info.""" | |
try: | |
global public_test_set | |
if public_test_set is None: | |
public_test_set = get_public_test_set() | |
# Create download file | |
download_path, stats = create_test_set_download() | |
# Create info message | |
info_msg = f""" | |
## π₯ SALT Test Set Downloaded Successfully! | |
### Dataset Statistics: | |
- **Total Samples**: {stats['total_samples']:,} | |
- **Language Pairs**: {stats['language_pairs']} | |
- **Google Comparable**: {stats['google_comparable_samples']:,} samples | |
- **Languages**: {', '.join(stats['languages'])} | |
### File Format: | |
- `sample_id`: Unique identifier for each sample | |
- `source_text`: Text to be translated | |
- `source_language`: Source language code | |
- `target_language`: Target language code | |
- `domain`: Content domain (if available) | |
- `google_comparable`: Whether this pair can be compared with Google Translate | |
### Next Steps: | |
1. Run your model on the source texts | |
2. Create a CSV/JSON file with columns: `sample_id`, `prediction` | |
3. Upload your predictions using the "Submit Predictions" tab | |
""" | |
return download_path, info_msg | |
except Exception as e: | |
error_msg = f"β Error creating test set download: {str(e)}" | |
return None, error_msg | |
def validate_submission(file, model_name: str, author: str, description: str) -> Tuple[str, Optional[pd.DataFrame]]: | |
"""Validate uploaded prediction file, supporting str paths, bytes, and Gradio wrappers.""" | |
try: | |
if file is None: | |
return "β Please upload a predictions file", None | |
if not model_name.strip(): | |
return "β Please provide a model name", None | |
# 1) Determine raw bytes | |
if isinstance(file, bytes): | |
file_content = file | |
elif isinstance(file, str): | |
# could be a path or raw text | |
if os.path.exists(file): | |
with open(file, "rb") as f: | |
file_content = f.read() | |
else: | |
file_content = file.encode("utf-8") | |
elif hasattr(file, "name") and os.path.exists(file.name): | |
# tempfile._TemporaryFileWrapper from Gradio | |
with open(file.name, "rb") as f: | |
file_content = f.read() | |
else: | |
return "β Could not read uploaded file", None | |
# 2) Infer filename for format-sniffing | |
filename = ( | |
getattr(file, "name", None) | |
or getattr(file, "filename", None) | |
or "predictions.csv" | |
) | |
# 3) Load test set if needed | |
global complete_test_set | |
if complete_test_set is None: | |
complete_test_set = get_complete_test_set() | |
# 4) Run existing validation pipeline | |
validation_result = validate_submission_complete( | |
file_content, filename, complete_test_set, model_name | |
) | |
if validation_result["valid"]: | |
return validation_result["report"], validation_result["predictions"] | |
else: | |
return validation_result["report"], None | |
except Exception as e: | |
return ( | |
f"β Validation error: {e}\n\nTraceback:\n{traceback.format_exc()}", | |
None, | |
) | |
def evaluate_submission( | |
predictions_df: pd.DataFrame, | |
model_name: str, | |
author: str, | |
description: str, | |
validation_info: Dict | |
) -> Tuple[str, pd.DataFrame, object, object]: | |
"""Evaluate validated predictions and update leaderboard.""" | |
try: | |
if predictions_df is None: | |
return "β No valid predictions to evaluate", None, None, None | |
# Get complete test set with targets | |
global complete_test_set, current_leaderboard | |
if complete_test_set is None: | |
complete_test_set = get_complete_test_set() | |
# Run evaluation | |
print(f"π Evaluating {model_name}...") | |
evaluation_results = evaluate_predictions(predictions_df, complete_test_set) | |
if evaluation_results.get('error'): | |
return f"β Evaluation error: {evaluation_results['error']}", None, None, None | |
# Add to leaderboard | |
print("π Adding to leaderboard...") | |
model_type = "user_submission" # Could be enhanced to detect model type | |
updated_leaderboard = add_model_to_leaderboard( | |
model_name=sanitize_model_name(model_name), | |
author=author or "Anonymous", | |
evaluation_results=evaluation_results, | |
validation_info=validation_info, | |
model_type=model_type, | |
description=description or "" | |
) | |
# Update global leaderboard | |
current_leaderboard = updated_leaderboard | |
# Generate evaluation report | |
report = generate_evaluation_report(evaluation_results, model_name) | |
# Create visualization plots | |
summary_plot = create_submission_summary_plot(validation_info, evaluation_results) | |
ranking_plot = create_leaderboard_ranking_plot(updated_leaderboard) | |
# Format success message | |
rank = updated_leaderboard[updated_leaderboard['model_name'] == sanitize_model_name(model_name)].index[0] + 1 | |
total_models = len(updated_leaderboard) | |
success_msg = f""" | |
## π Evaluation Complete! | |
### Your Results: | |
- **Model**: {model_name} | |
- **Rank**: #{rank} out of {total_models} models | |
- **Quality Score**: {evaluation_results['averages'].get('quality_score', 0):.4f} | |
- **BLEU**: {evaluation_results['averages'].get('bleu', 0):.2f} | |
- **ChrF**: {evaluation_results['averages'].get('chrf', 0):.4f} | |
### Coverage: | |
- **Samples Evaluated**: {evaluation_results['evaluated_samples']:,} | |
- **Language Pairs**: {evaluation_results['summary']['language_pairs_covered']} | |
- **Google Comparable**: {evaluation_results['summary']['google_comparable_pairs']} pairs | |
{report} | |
""" | |
return success_msg, prepare_leaderboard_display(updated_leaderboard), summary_plot, ranking_plot | |
except Exception as e: | |
error_msg = f"β Evaluation failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}" | |
return error_msg, None, None, None | |
def refresh_leaderboard_display( | |
search_query: str = "", | |
model_type_filter: str = "all", | |
min_coverage: float = 0.0, | |
google_only: bool = False | |
) -> Tuple[pd.DataFrame, object, object, str]: | |
"""Refresh and filter leaderboard display.""" | |
try: | |
global current_leaderboard | |
if current_leaderboard is None: | |
current_leaderboard = load_leaderboard() | |
# Apply filters | |
filtered_df = filter_leaderboard( | |
current_leaderboard, | |
search_query=search_query, | |
model_type=model_type_filter, | |
min_coverage=min_coverage, | |
google_comparable_only=google_only | |
) | |
# Prepare for display (removes detailed_metrics column) | |
display_df = prepare_leaderboard_display(filtered_df) | |
# Create plots | |
ranking_plot = create_leaderboard_ranking_plot(filtered_df) | |
comparison_plot = create_metrics_comparison_plot(filtered_df) | |
# Get stats | |
stats = get_leaderboard_stats(filtered_df) | |
stats_text = f""" | |
### π Leaderboard Statistics | |
- **Total Models**: {stats['total_models']} | |
- **Average Quality Score**: {stats['avg_quality_score']:.4f} | |
- **Google Comparable Models**: {stats['google_comparable_models']} | |
**Best Model**: {stats['best_model']['name'] if stats['best_model'] else 'None'} | |
**Latest Submission**: {stats['latest_submission'][:10] if stats['latest_submission'] else 'None'} | |
""" | |
return display_df, ranking_plot, comparison_plot, stats_text | |
except Exception as e: | |
error_msg = f"Error loading leaderboard: {str(e)}" | |
empty_df = pd.DataFrame() | |
return empty_df, None, None, error_msg | |
def get_model_details(model_name: str) -> Tuple[str, object]: | |
"""Get detailed analysis for a specific model.""" | |
try: | |
global current_leaderboard | |
if current_leaderboard is None: | |
return "Leaderboard not loaded", None | |
# Find model | |
model_row = current_leaderboard[current_leaderboard['model_name'] == model_name] | |
if model_row.empty: | |
return f"Model '{model_name}' not found", None | |
model_info = model_row.iloc[0] | |
# Parse detailed metrics | |
try: | |
detailed_results = json.loads(model_info['detailed_metrics']) | |
except: | |
detailed_results = {} | |
# Create detailed plot | |
detail_plot = create_detailed_model_analysis(detailed_results, model_name) | |
# Format model details | |
details_text = f""" | |
## π Model Details: {model_name} | |
### Basic Information: | |
- **Author**: {model_info['author']} | |
- **Submission Date**: {model_info['submission_date'][:10]} | |
- **Model Type**: {model_info['model_type']} | |
- **Description**: {model_info['description'] or 'No description provided'} | |
### Performance Metrics: | |
- **Quality Score**: {model_info['quality_score']:.4f} | |
- **BLEU**: {model_info['bleu']:.2f} | |
- **ChrF**: {model_info['chrf']:.4f} | |
- **ROUGE-1**: {model_info['rouge1']:.4f} | |
- **ROUGE-L**: {model_info['rougeL']:.4f} | |
### Coverage Information: | |
- **Total Samples**: {model_info['total_samples']:,} | |
- **Language Pairs Covered**: {model_info['language_pairs_covered']} | |
- **Google Comparable Pairs**: {model_info['google_pairs_covered']} | |
- **Coverage Rate**: {model_info['coverage_rate']:.1%} | |
### Google Translate Comparison: | |
- **Google Quality Score**: {model_info['google_quality_score']:.4f} | |
- **Google BLEU**: {model_info['google_bleu']:.2f} | |
- **Google ChrF**: {model_info['google_chrf']:.4f} | |
""" | |
return details_text, detail_plot | |
except Exception as e: | |
error_msg = f"Error getting model details: {str(e)}" | |
return error_msg, None | |
# Initialize data on startup | |
print("π Starting SALT Translation Leaderboard...") | |
initialization_success = initialize_data() | |
# Create Gradio interface | |
with gr.Blocks( | |
title=TITLE, | |
theme=gr.themes.Soft(), | |
css=""" | |
.gradio-container { | |
max-width: 1400px !important; | |
margin: 0 auto; | |
} | |
.main-header { | |
text-align: center; | |
margin-bottom: 2rem; | |
padding: 2rem; | |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
color: white; | |
border-radius: 10px; | |
} | |
.metric-box { | |
background: #f8f9fa; | |
padding: 1rem; | |
border-radius: 8px; | |
margin: 0.5rem 0; | |
border-left: 4px solid #007bff; | |
} | |
.error-box { | |
background: #f8d7da; | |
color: #721c24; | |
padding: 1rem; | |
border-radius: 8px; | |
border-left: 4px solid #dc3545; | |
} | |
.success-box { | |
background: #d4edda; | |
color: #155724; | |
padding: 1rem; | |
border-radius: 8px; | |
border-left: 4px solid #28a745; | |
} | |
""" | |
) as demo: | |
# Header | |
gr.HTML(f""" | |
<div class="main-header"> | |
<h1>{TITLE}</h1> | |
<p>{DESCRIPTION}</p> | |
<p><strong>Supported Languages</strong>: {len(ALL_UG40_LANGUAGES)} Ugandan languages | <strong>Google Comparable</strong>: {len(GOOGLE_SUPPORTED_LANGUAGES)} languages</p> | |
</div> | |
""") | |
# Status indicator | |
if initialization_success: | |
status_msg = "β System initialized successfully" | |
else: | |
status_msg = "β System initialization failed - some features may not work" | |
gr.Markdown(f"**Status**: {status_msg}") | |
with gr.Tabs(): | |
# Tab 1: Get Test Set | |
with gr.Tab("π₯ Download Test Set", id="download"): | |
gr.Markdown(""" | |
## π Get the SALT Translation Test Set | |
Download the standardized test set to evaluate your translation model. | |
The test set contains source texts in multiple Ugandan languages that you need to translate. | |
""") | |
with gr.Row(): | |
download_btn = gr.Button("π₯ Download Test Set", variant="primary", size="lg") | |
with gr.Row(): | |
with gr.Column(): | |
download_file = gr.File(label="π Test Set File", interactive=False) | |
with gr.Column(): | |
download_info = gr.Markdown(label="βΉοΈ Test Set Information") | |
gr.Markdown(""" | |
### π Instructions | |
1. **Download** the test set using the button above | |
2. **Run your model** on the source texts to generate translations | |
3. **Create a predictions file** with your model's outputs | |
4. **Submit** your predictions using the "Submit Predictions" tab | |
### π Required Prediction Format | |
Your predictions file must be a CSV/TSV/JSON with these columns: | |
- `sample_id`: The unique identifier from the test set | |
- `prediction`: Your model's translation for that sample | |
**Example CSV:** | |
``` | |
sample_id,prediction | |
salt_000001,Oli otya mukwano gwange? | |
salt_000002,Webale nyo olukya | |
... | |
``` | |
""") | |
# Tab 2: Submit Predictions | |
with gr.Tab("π Submit Predictions", id="submit"): | |
gr.Markdown(""" | |
## π― Submit Your Model's Predictions | |
Upload your model's predictions on the SALT test set for evaluation. | |
""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
# Model information | |
gr.Markdown("### π Model Information") | |
model_name_input = gr.Textbox( | |
label="π€ Model Name", | |
placeholder="e.g., MyTranslator-v1.0", | |
info="Unique name for your model" | |
) | |
author_input = gr.Textbox( | |
label="π€ Author/Organization", | |
placeholder="Your name or organization", | |
value="Anonymous" | |
) | |
description_input = gr.Textbox( | |
label="π Description (Optional)", | |
placeholder="Brief description of your model", | |
lines=3 | |
) | |
# File upload | |
gr.Markdown("### π€ Upload Predictions") | |
gr.Markdown("Upload a CSV/TSV/JSON file with your model's predictions") | |
predictions_file = gr.File( | |
label="π Predictions File", | |
file_types=[".csv", ".tsv", ".json"] | |
) | |
validate_btn = gr.Button("β Validate Submission", variant="secondary") | |
submit_btn = gr.Button("π Submit for Evaluation", variant="primary", interactive=False) | |
with gr.Column(scale=1): | |
gr.Markdown("### π Validation Results") | |
validation_output = gr.Markdown() | |
# Results section | |
gr.Markdown("### π Evaluation Results") | |
with gr.Row(): | |
evaluation_output = gr.Markdown() | |
with gr.Row(): | |
with gr.Column(): | |
submission_plot = gr.Plot(label="π Your Submission Analysis") | |
with gr.Column(): | |
updated_leaderboard_plot = gr.Plot(label="π Updated Leaderboard") | |
with gr.Row(): | |
results_table = gr.Dataframe(label="π Updated Leaderboard", interactive=False) | |
# Tab 3: Leaderboard | |
with gr.Tab("π Leaderboard", id="leaderboard"): | |
with gr.Row(): | |
with gr.Column(scale=3): | |
search_input = gr.Textbox( | |
label="π Search Models", | |
placeholder="Search by model name, author...", | |
) | |
with gr.Column(scale=1): | |
model_type_dropdown = gr.Dropdown( | |
label="π§ Model Type", | |
choices=["all", "user_submission", "baseline"], | |
value="all" | |
) | |
with gr.Column(scale=1): | |
min_coverage_slider = gr.Slider( | |
label="π Min Coverage", | |
minimum=0.0, | |
maximum=1.0, | |
value=0.0, | |
step=0.1 | |
) | |
with gr.Column(scale=1): | |
google_only_checkbox = gr.Checkbox( | |
label="π€ Google Comparable Only", | |
value=False | |
) | |
with gr.Row(): | |
refresh_btn = gr.Button("π Refresh", variant="secondary") | |
with gr.Row(): | |
leaderboard_stats = gr.Markdown() | |
with gr.Row(): | |
with gr.Column(): | |
leaderboard_plot = gr.Plot(label="π Rankings") | |
with gr.Column(): | |
comparison_plot = gr.Plot(label="π Multi-Metric Comparison") | |
with gr.Row(): | |
leaderboard_table = gr.Dataframe( | |
label="π Full Leaderboard", | |
interactive=False, | |
wrap=True | |
) | |
# Tab 4: Model Analysis | |
with gr.Tab("π Model Analysis", id="analysis"): | |
with gr.Row(): | |
model_select = gr.Dropdown( | |
label="π€ Select Model", | |
choices=[], | |
value=None, | |
info="Choose a model for detailed analysis" | |
) | |
analyze_btn = gr.Button("π Analyze", variant="primary") | |
with gr.Row(): | |
model_details = gr.Markdown() | |
with gr.Row(): | |
model_analysis_plot = gr.Plot(label="π Detailed Performance Analysis") | |
# Tab 5: Documentation | |
with gr.Tab("π Documentation", id="docs"): | |
gr.Markdown(f""" | |
# π SALT Translation Leaderboard Documentation | |
## π― Overview | |
The SALT Translation Leaderboard is a scientific evaluation platform for translation models on Ugandan languages. | |
Submit your model's predictions on our standardized test set to see how it compares with other models. | |
## π£οΈ Supported Languages | |
**All UG40 Languages ({len(ALL_UG40_LANGUAGES)} total):** | |
{', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in ALL_UG40_LANGUAGES])} | |
**Google Translate Comparable ({len(GOOGLE_SUPPORTED_LANGUAGES)} languages):** | |
{', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in GOOGLE_SUPPORTED_LANGUAGES])} | |
## π Evaluation Metrics | |
### Primary Metrics | |
- **Quality Score**: Composite metric (0-1, higher better) combining multiple metrics | |
- **BLEU**: Translation quality score (0-100, higher better) | |
- **ChrF**: Character-level F-score (0-1, higher better) | |
### Secondary Metrics | |
- **ROUGE-1/ROUGE-L**: Recall-oriented metrics (0-1, higher better) | |
- **CER/WER**: Character/Word Error Rate (0-1, lower better) | |
- **Length Ratio**: Prediction/reference length ratio | |
## π Submission Process | |
### Step 1: Download Test Set | |
1. Go to "Download Test Set" tab | |
2. Click "Download Test Set" button | |
3. Save the `salt_test_set.csv` file | |
### Step 2: Generate Predictions | |
1. Load the test set in your code | |
2. For each row, translate `source_text` from `source_language` to `target_language` | |
3. Save results as CSV with columns: `sample_id`, `prediction` | |
### Step 3: Submit & Evaluate | |
1. Go to "Submit Predictions" tab | |
2. Fill in model information | |
3. Upload your predictions file | |
4. Validate and submit for evaluation | |
## π File Formats | |
### Test Set Format | |
```csv | |
sample_id,source_text,source_language,target_language,domain,google_comparable | |
salt_000001,"Hello world",eng,lug,general,true | |
salt_000002,"How are you?",eng,ach,conversation,true | |
``` | |
### Predictions Format | |
```csv | |
sample_id,prediction | |
salt_000001,"Amakuru ensi" | |
salt_000002,"Ibino nining?" | |
``` | |
## π Leaderboard Types | |
### 1. Full UG40 Leaderboard | |
- Includes all {len(get_all_language_pairs())} language pairs | |
- Complete evaluation across all Ugandan languages | |
- Primary ranking system | |
### 2. Google Translate Comparable | |
- Limited to {len(get_google_comparable_pairs())} pairs | |
- Only languages supported by Google Translate | |
- Allows direct comparison with Google Translate baseline | |
## π¬ Scientific Rigor | |
- **Standardized Evaluation**: Same test set for all models | |
- **Multiple Metrics**: Comprehensive evaluation beyond just BLEU | |
- **Coverage Tracking**: Transparency about what each model covers | |
- **Reproducible**: All evaluation code and data available | |
## π€ Contributing | |
This leaderboard is maintained by [Sunbird AI](https://sunbird.ai). | |
**Contact**: [[email protected]](mailto:[email protected]) | |
**GitHub**: [Sunbird AI GitHub](https://github.com/sunbirdai) | |
## π Citation | |
If you use this leaderboard in your research, please cite: | |
```bibtex | |
@misc{{salt_leaderboard_2024, | |
title={{SALT Translation Leaderboard: Evaluation of Translation Models on Ugandan Languages}}, | |
author={{Sunbird AI}}, | |
year={{2024}}, | |
url={{https://huggingface.co/spaces/Sunbird/salt-translation-leaderboard}} | |
}} | |
``` | |
## π Related Resources | |
- **SALT Dataset**: [sunbird/salt](https://huggingface.co/datasets/sunbird/salt) | |
- **Sunbird AI Models**: [Sunbird Organization](https://huggingface.co/Sunbird) | |
- **Research Papers**: [Sunbird AI Publications](https://sunbird.ai/research) | |
""") | |
# Event handlers with state management | |
predictions_validated = gr.State(value=None) | |
validation_info_state = gr.State(value=None) | |
# Download test set | |
download_btn.click( | |
fn=download_test_set, | |
outputs=[download_file, download_info] | |
) | |
# Validate predictions | |
def handle_validation(file, model_name, author, description): | |
report, predictions = validate_submission(file, model_name, author, description) | |
valid = predictions is not None | |
# Build the four returns: | |
if valid: | |
return ( | |
report, | |
predictions, # predictions_validated state | |
predictions, # validation_info_state (you can store whatever you like here) | |
gr.update(interactive=True) | |
) | |
else: | |
return ( | |
report, | |
None, | |
None, | |
gr.update(interactive=False) # <β this *disables* the button | |
) | |
validate_btn.click( | |
fn=handle_validation, | |
inputs=[predictions_file, model_name_input, author_input, description_input], | |
outputs=[validation_output, predictions_validated, validation_info_state, submit_btn] | |
) | |
# Submit for evaluation | |
def handle_submission(predictions, model_name, author, description, validation_info): | |
if predictions is None: | |
return "β Please validate your submission first", None, None, None | |
# Extract validation info dict | |
validation_dict = { | |
'coverage': getattr(validation_info, 'coverage', 0.8) if hasattr(validation_info, 'coverage') else 0.8, | |
'report': 'Validation passed' | |
} | |
return evaluate_submission(predictions, model_name, author, description, validation_dict) | |
submit_btn.click( | |
fn=handle_submission, | |
inputs=[predictions_validated, model_name_input, author_input, description_input, validation_info_state], | |
outputs=[evaluation_output, results_table, submission_plot, updated_leaderboard_plot] | |
) | |
# Refresh leaderboard | |
def update_leaderboard_and_dropdown(*args): | |
table, plot1, plot2, stats = refresh_leaderboard_display(*args) | |
# Update model dropdown choices | |
if current_leaderboard is not None and not current_leaderboard.empty: | |
model_choices = current_leaderboard['model_name'].tolist() | |
else: | |
model_choices = [] | |
return table, plot1, plot2, stats, gr.Dropdown(choices=model_choices) | |
refresh_btn.click( | |
fn=update_leaderboard_and_dropdown, | |
inputs=[search_input, model_type_dropdown, min_coverage_slider, google_only_checkbox], | |
outputs=[leaderboard_table, leaderboard_plot, comparison_plot, leaderboard_stats, model_select] | |
) | |
# Auto-refresh on filter changes | |
for input_component in [search_input, model_type_dropdown, min_coverage_slider, google_only_checkbox]: | |
input_component.change( | |
fn=update_leaderboard_and_dropdown, | |
inputs=[search_input, model_type_dropdown, min_coverage_slider, google_only_checkbox], | |
outputs=[leaderboard_table, leaderboard_plot, comparison_plot, leaderboard_stats, model_select] | |
) | |
# Model analysis | |
analyze_btn.click( | |
fn=get_model_details, | |
inputs=[model_select], | |
outputs=[model_details, model_analysis_plot] | |
) | |
# Load initial data | |
demo.load( | |
fn=update_leaderboard_and_dropdown, | |
inputs=[search_input, model_type_dropdown, min_coverage_slider, google_only_checkbox], | |
outputs=[leaderboard_table, leaderboard_plot, comparison_plot, leaderboard_stats, model_select] | |
) | |
# Launch the application | |
if __name__ == "__main__": | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False, | |
show_error=True | |
) |