sudoping01's picture
Update app.py
dce5a82 verified
raw
history blame
61.3 kB
# import gradio as gr
# import pandas as pd
# from datasets import load_dataset
# from jiwer import wer, cer
# import os
# from datetime import datetime
# import re
# from huggingface_hub import login
# # Login to Hugging Face Hub (if token is available)
# token = os.environ.get("HG_TOKEN")
# if token:
# login(token)
# try:
# dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
# references = {row["id"]: row["text"] for row in dataset}
# print(f"Loaded {len(references)} reference transcriptions")
# except Exception as e:
# print(f"Error loading dataset: {str(e)}")
# references = {}
# leaderboard_file = "leaderboard.csv"
# if not os.path.exists(leaderboard_file):
# sample_data = [
# ["test_1", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
# ["test_2", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
# ]
# pd.DataFrame(sample_data,
# columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
# print(f"Created new leaderboard file with sample data")
# else:
# leaderboard_df = pd.read_csv(leaderboard_file)
# if "Combined_Score" not in leaderboard_df.columns:
# leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
# leaderboard_df.to_csv(leaderboard_file, index=False)
# print(f"Added Combined_Score column to existing leaderboard")
# print(f"Loaded leaderboard with {len(leaderboard_df)} entries")
# def normalize_text(text):
# """Normalize text for WER/CER calculation"""
# if not isinstance(text, str):
# text = str(text)
# text = text.lower()
# text = re.sub(r'[^\w\s]', '', text)
# text = re.sub(r'\s+', ' ', text).strip()
# return text
# def calculate_metrics(predictions_df):
# """Calculate WER and CER for predictions."""
# results = []
# total_ref_words = 0
# total_ref_chars = 0
# for _, row in predictions_df.iterrows():
# id_val = row["id"]
# if id_val not in references:
# continue
# reference = normalize_text(references[id_val])
# hypothesis = normalize_text(row["text"])
# if not reference or not hypothesis:
# continue
# reference_words = reference.split()
# hypothesis_words = hypothesis.split()
# reference_chars = list(reference)
# try:
# sample_wer = wer(reference, hypothesis)
# sample_cer = cer(reference, hypothesis)
# sample_wer = min(sample_wer, 2.0)
# sample_cer = min(sample_cer, 2.0)
# total_ref_words += len(reference_words)
# total_ref_chars += len(reference_chars)
# results.append({
# "id": id_val,
# "reference": reference,
# "hypothesis": hypothesis,
# "ref_word_count": len(reference_words),
# "ref_char_count": len(reference_chars),
# "wer": sample_wer,
# "cer": sample_cer
# })
# except Exception as e:
# print(f"Error processing sample {id_val}: {str(e)}")
# pass
# if not results:
# raise ValueError("No valid samples for WER/CER calculation")
# avg_wer = sum(item["wer"] for item in results) / len(results)
# avg_cer = sum(item["cer"] for item in results) / len(results)
# weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
# weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
# return avg_wer, avg_cer, weighted_wer, weighted_cer, results
# def format_as_percentage(value):
# """Convert decimal to percentage with 2 decimal places"""
# return f"{value * 100:.2f}%"
# def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
# """Format leaderboard for display with ranking and percentages"""
# if df is None or len(df) == 0:
# return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
# display_df = df.copy()
# display_df = display_df.sort_values(sort_by)
# display_df.insert(0, "Rank", range(1, len(display_df) + 1))
# for col in ["WER", "CER", "Combined_Score"]:
# if col in display_df.columns:
# display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")
# return display_df
# def update_ranking(method):
# """Update leaderboard ranking based on selected method"""
# try:
# current_lb = pd.read_csv(leaderboard_file)
# if "Combined_Score" not in current_lb.columns:
# current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3
# sort_column = "Combined_Score"
# if method == "WER Only":
# sort_column = "WER"
# elif method == "CER Only":
# sort_column = "CER"
# return prepare_leaderboard_for_display(current_lb, sort_column)
# except Exception as e:
# print(f"Error updating ranking: {str(e)}")
# return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
# def process_submission(model_name, csv_file):
# """Process a new model submission"""
# if not model_name or not model_name.strip():
# return "Error: Please provide a model name.", None
# if not csv_file:
# return "Error: Please upload a CSV file.", None
# try:
# df = pd.read_csv(csv_file)
# if len(df) == 0:
# return "Error: Uploaded CSV is empty.", None
# if set(df.columns) != {"id", "text"}:
# return f"Error: CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None
# if df["id"].duplicated().any():
# dup_ids = df[df["id"].duplicated()]["id"].unique()
# return f"Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None
# missing_ids = set(references.keys()) - set(df["id"])
# extra_ids = set(df["id"]) - set(references.keys())
# if missing_ids:
# return f"Error: Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None
# if extra_ids:
# return f"Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None
# try:
# avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
# # Check for suspiciously low values
# if avg_wer < 0.001:
# return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
# except Exception as e:
# return f"Error calculating metrics: {str(e)}", None
# leaderboard = pd.read_csv(leaderboard_file)
# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# combined_score = avg_wer * 0.7 + avg_cer * 0.3
# if model_name in leaderboard["Model_Name"].values:
# idx = leaderboard[leaderboard["Model_Name"] == model_name].index
# leaderboard.loc[idx, "WER"] = avg_wer
# leaderboard.loc[idx, "CER"] = avg_cer
# leaderboard.loc[idx, "Combined_Score"] = combined_score
# leaderboard.loc[idx, "timestamp"] = timestamp
# updated_leaderboard = leaderboard
# else:
# new_entry = pd.DataFrame(
# [[model_name, avg_wer, avg_cer, combined_score, timestamp]],
# columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
# )
# updated_leaderboard = pd.concat([leaderboard, new_entry])
# updated_leaderboard = updated_leaderboard.sort_values("Combined_Score")
# updated_leaderboard.to_csv(leaderboard_file, index=False)
# display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
# return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard
# except Exception as e:
# return f"Error processing submission: {str(e)}", None
# def get_current_leaderboard():
# """Get the current leaderboard data for display"""
# try:
# if os.path.exists(leaderboard_file):
# current_leaderboard = pd.read_csv(leaderboard_file)
# if "Combined_Score" not in current_leaderboard.columns:
# current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
# current_leaderboard.to_csv(leaderboard_file, index=False)
# return current_leaderboard
# else:
# return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
# except Exception as e:
# print(f"Error getting leaderboard: {str(e)}")
# return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
# def create_leaderboard_table():
# """Create and format the leaderboard table for display"""
# leaderboard_data = get_current_leaderboard()
# return prepare_leaderboard_for_display(leaderboard_data)
# with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
# gr.Markdown(
# """
# # πŸ‡²πŸ‡± Bambara ASR Leaderboard
# This leaderboard tracks and evaluates speech recognition models for the Bambara language.
# Models are ranked based on Word Error Rate (WER), Character Error Rate (CER), and a combined score.
# ## Current Models Performance
# """
# )
# current_data = get_current_leaderboard()
# if len(current_data) > 0:
# best_model = current_data.sort_values("Combined_Score").iloc[0]
# gr.Markdown(f"""
# ### πŸ† Current Best Model: **{best_model['Model_Name']}**
# * WER: **{best_model['WER']*100:.2f}%**
# * CER: **{best_model['CER']*100:.2f}%**
# * Combined Score: **{best_model['Combined_Score']*100:.2f}%**
# """)
# with gr.Tabs() as tabs:
# with gr.TabItem("πŸ… Model Rankings"):
# initial_leaderboard = create_leaderboard_table()
# ranking_method = gr.Radio(
# ["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
# label="Ranking Method",
# value="Combined Score (WER 70%, CER 30%)"
# )
# leaderboard_view = gr.DataFrame(
# value=initial_leaderboard,
# interactive=False,
# label="Models are ranked by selected metric - lower is better"
# )
# ranking_method.change(
# fn=update_ranking,
# inputs=[ranking_method],
# outputs=[leaderboard_view]
# )
# with gr.Accordion("Metrics Explanation", open=False):
# gr.Markdown(
# """
# ## Understanding ASR Metrics
# ### Word Error Rate (WER)
# WER measures how accurately the ASR system recognizes whole words:
# * Lower values indicate better performance
# * Calculated as: (Substitutions + Insertions + Deletions) / Total Words
# * A WER of 0% means perfect transcription
# * A WER of 20% means approximately 1 in 5 words contains an error
# ### Character Error Rate (CER)
# CER measures accuracy at the character level:
# * More fine-grained than WER
# * Better at capturing partial word matches
# * Particularly useful for agglutinative languages like Bambara
# ### Combined Score
# * Weighted average: 70% WER + 30% CER
# * Provides a balanced evaluation of model performance
# * Used as the primary ranking metric
# """
# )
# with gr.TabItem("πŸ“Š Submit New Results"):
# gr.Markdown(
# """
# ### Submit a new model for evaluation
# Upload a CSV file with the following format:
# * Must contain exactly two columns: 'id' and 'text'
# * The 'id' column should match the reference dataset IDs
# * The 'text' column should contain your model's transcriptions
# """
# )
# with gr.Row():
# model_name_input = gr.Textbox(
# label="Model Name",
# placeholder="e.g., MALIBA-AI/bambara-asr"
# )
# gr.Markdown("*Use a descriptive name to identify your model*")
# with gr.Row():
# csv_upload = gr.File(
# label="Upload CSV File",
# file_types=[".csv"]
# )
# gr.Markdown("*CSV with columns: id, text*")
# submit_btn = gr.Button("Submit", variant="primary")
# output_msg = gr.Textbox(label="Status", interactive=False)
# leaderboard_display = gr.DataFrame(
# label="Updated Leaderboard",
# value=initial_leaderboard,
# interactive=False
# )
# submit_btn.click(
# fn=process_submission,
# inputs=[model_name_input, csv_upload],
# outputs=[output_msg, leaderboard_display]
# )
# with gr.TabItem("πŸ“ Benchmark Dataset"):
# gr.Markdown(
# """
# ## About the Benchmark Dataset
# This leaderboard uses the **[sudoping01/bambara-speech-recognition-benchmark](https://huggingface.co/datasets/MALIBA-AI/bambara-speech-recognition-leaderboard)** dataset:
# * Contains diverse Bambara speech samples
# * Includes various speakers, accents, and dialects
# * Covers different speech styles and recording conditions
# * Transcribed and validated
# ### How to Generate Predictions
# To submit results to this leaderboard:
# 1. Download the audio files from the benchmark dataset
# 2. Run your ASR model on the audio files
# 3. Generate a CSV file with 'id' and 'text' columns
# 4. Submit your results using the form in the "Submit New Results" tab
# ### Evaluation Guidelines
# * Text is normalized (lowercase, punctuation removed) before metrics calculation
# * Extreme outliers are capped to prevent skewing results
# * All submissions are validated for format and completeness
# NB: This work is a collaboration between MALIBA-AI, RobotsMali AI4D-LAB and Djelia
# """
# )
# gr.Markdown(
# """
# ---
# ### About MALIBA-AI
# **MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation**
# *"No Malian Language Left Behind"*
# This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
# For more information, visit [MALIBA-AI on Hugging Face](https://huggingface.co/MALIBA-AI).
# """
# )
# if __name__ == "__main__":
# demo.launch()
import gradio as gr
import pandas as pd
from datasets import load_dataset
from jiwer import wer, cer
import os
from datetime import datetime
import re
import plotly.express as px
import plotly.graph_objects as go
from huggingface_hub import login
import numpy as np
# Custom CSS inspired by Sahara leaderboard
custom_head_html = """
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Rubik:wght@400;600&display=swap" rel="stylesheet">
"""
# Header with MALIBA-AI branding
new_header_html = """
<center>
<br><br>
<div style="display: flex; align-items: center; justify-content: center; gap: 20px; margin-bottom: 20px;">
<div style="font-size: 4em;">πŸ‡²πŸ‡±</div>
<div>
<h1 style="margin: 0; font-family: 'Rubik', sans-serif; color: #2f3b7d; font-size: 2.5em; font-weight: 700;">
Bambara ASR Leaderboard
</h1>
<p style="margin: 5px 0 0 0; font-size: 1.2em; color: #7d3561; font-weight: 600;">
Powered by MALIBA-AI β€’ "No Malian Language Left Behind"
</p>
</div>
<div style="font-size: 4em;">πŸŽ™οΈ</div>
</div>
</center>
"""
# Advanced CSS styling inspired by Sahara
sahara_style_css = """
/* Global Styles */
div[class*="gradio-container"] {
background: #FFFBF5 !important;
color: #000 !important;
font-family: 'Inter', sans-serif !important;
}
div.svelte-1nguped {
background: white !important;
}
.fillable.svelte-15jxnnn.svelte-15jxnnn:not(.fill_width) {
max-width: 1580px !important;
}
/* Navigation Buttons */
.nav-button {
background-color: #117b75 !important;
color: #fff !important;
font-weight: bold !important;
border-radius: 8px !important;
border: none !important;
box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
transition: all 0.3s ease !important;
}
.nav-button:hover {
background-color: #0f6b66 !important;
color: #e8850e !important;
transform: translateY(-1px) !important;
box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
}
/* Content Cards */
.content-section {
padding: 40px 0;
}
.content-card {
background-color: #fff !important;
border-radius: 16px !important;
box-shadow: 0 10px 25px -5px rgba(0,0,0,0.1), 0 8px 10px -6px rgba(0,0,0,0.1) !important;
padding: 40px !important;
margin-bottom: 30px !important;
border: 1px solid rgba(0,0,0,0.05) !important;
}
/* Typography */
.content-card h2 {
font-family: "Rubik", sans-serif !important;
font-size: 32px !important;
font-weight: 700 !important;
line-height: 1.25 !important;
letter-spacing: -1px !important;
color: #2f3b7d !important;
margin-bottom: 20px !important;
text-align: center !important;
}
.content-card h3 {
font-size: 22px !important;
color: #2f3b7d !important;
font-weight: 600 !important;
margin-bottom: 15px !important;
}
.content-card h4 {
font-family: "Rubik", sans-serif !important;
color: #7d3561 !important;
font-weight: 600 !important;
margin-bottom: 10px !important;
}
.title {
color: #7d3561 !important;
font-weight: 600 !important;
}
/* Tab Styling */
.tab-wrapper.svelte-1tcem6n.svelte-1tcem6n {
display: flex;
align-items: center;
justify-content: space-between;
position: relative;
height: auto !important;
padding-bottom: 0 !important;
}
.selected.svelte-1tcem6n.svelte-1tcem6n {
background-color: #7d3561 !important;
color: #fff !important;
border-radius: 8px 8px 0 0 !important;
}
button.svelte-1tcem6n.svelte-1tcem6n {
color: #7d3561 !important;
font-weight: 600 !important;
font-size: 16px !important;
padding: 12px 20px !important;
background-color: #fff !important;
border-radius: 8px 8px 0 0 !important;
border: 2px solid #e9ecef !important;
border-bottom: none !important;
transition: all 0.3s ease !important;
}
button.svelte-1tcem6n.svelte-1tcem6n:hover {
background-color: #f8f9fa !important;
border-color: #7d3561 !important;
}
.tab-container.svelte-1tcem6n.svelte-1tcem6n:after {
content: "";
position: absolute;
bottom: 0;
left: 0;
right: 0;
height: 3px;
background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important;
}
/* Table Styling */
div[class*="gradio-container"] .prose table {
color: #000 !important;
border: 2px solid #dca02a !important;
border-radius: 12px !important;
margin-bottom: 20px !important;
margin-left: auto !important;
margin-right: auto !important;
width: 100% !important;
border-collapse: separate !important;
border-spacing: 0 !important;
overflow: hidden !important;
box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important;
}
div[class*="gradio-container"] .prose thead tr {
background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important;
}
div[class*="gradio-container"] .prose th {
color: #fff !important;
font-weight: 700 !important;
font-size: 14px !important;
padding: 15px 10px !important;
text-align: center !important;
border: none !important;
}
div[class*="gradio-container"] .prose td {
font-size: 14px !important;
padding: 12px 10px !important;
border: none !important;
text-align: center !important;
color: #000 !important;
border-bottom: 1px solid #f8f9fa !important;
}
div[class*="gradio-container"] .prose tbody tr:nth-child(even) {
background-color: #f8f9fa !important;
}
div[class*="gradio-container"] .prose tbody tr:hover {
background-color: #e3f2fd !important;
transition: background-color 0.2s ease !important;
}
/* First column (model names) styling */
div[class*="gradio-container"] .prose th:first-child,
div[class*="gradio-container"] .prose td:first-child {
text-align: left !important;
min-width: 250px !important;
font-weight: 600 !important;
}
/* Performance badges */
.performance-badge {
display: inline-block;
padding: 4px 8px;
border-radius: 12px;
font-size: 12px;
font-weight: 600;
margin-left: 8px;
}
.badge-excellent {
background: #d4edda;
color: #155724;
}
.badge-good {
background: #fff3cd;
color: #856404;
}
.badge-fair {
background: #f8d7da;
color: #721c24;
}
/* Stats cards */
.stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin: 20px 0;
}
.stat-card {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 12px;
text-align: center;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
.stat-number {
font-size: 2em;
font-weight: 700;
margin-bottom: 5px;
}
.stat-label {
font-size: 0.9em;
opacity: 0.9;
}
/* Form styling */
.form-section {
background: #f8f9fa;
border-radius: 12px;
padding: 25px;
margin: 20px 0;
border-left: 4px solid #7d3561;
}
/* Citation block */
.citation-block {
background-color: #FDF6E3 !important;
border-radius: 12px !important;
padding: 25px !important;
border-left: 4px solid #D97706 !important;
margin: 20px 0 !important;
}
/* Dropdown styling */
.gradio-dropdown {
border-radius: 8px !important;
border: 2px solid #e9ecef !important;
}
.gradio-dropdown:focus {
border-color: #7d3561 !important;
box-shadow: 0 0 0 3px rgba(125, 53, 97, 0.1) !important;
}
/* Button styling */
.gradio-button {
border-radius: 8px !important;
font-weight: 600 !important;
transition: all 0.3s ease !important;
}
.gradio-button.primary {
background: linear-gradient(135deg, #7d3561 0%, #2f3b7d 100%) !important;
border: none !important;
color: white !important;
}
.gradio-button.primary:hover {
transform: translateY(-2px) !important;
box-shadow: 0 4px 12px rgba(125, 53, 97, 0.3) !important;
}
/* Responsive design */
@media (max-width: 768px) {
.content-card {
padding: 20px !important;
margin-bottom: 20px !important;
}
.content-card h2 {
font-size: 24px !important;
}
.stats-grid {
grid-template-columns: 1fr !important;
}
}
"""
# Login to Hugging Face Hub (if token is available)
token = os.environ.get("HG_TOKEN")
if token:
login(token)
# Load dataset
try:
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
references = {row["id"]: row["text"] for row in dataset}
print(f"Loaded {len(references)} reference transcriptions")
except Exception as e:
print(f"Error loading dataset: {str(e)}")
references = {}
# Initialize leaderboard
leaderboard_file = "leaderboard.csv"
if not os.path.exists(leaderboard_file):
sample_data = [
["MALIBA-AI/bambara-whisper-small", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Whisper-based", "Mali", "ASR"],
["OpenAI/whisper-base", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Foundation", "USA", "ASR"],
]
pd.DataFrame(sample_data,
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]).to_csv(leaderboard_file, index=False)
print(f"Created new leaderboard file with sample data")
else:
leaderboard_df = pd.read_csv(leaderboard_file)
# Add new columns if they don't exist
required_columns = ["Combined_Score", "Type", "Origin", "Task"]
for col in required_columns:
if col not in leaderboard_df.columns:
if col == "Combined_Score":
leaderboard_df[col] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
else:
default_val = "Unknown" if col != "Task" else "ASR"
leaderboard_df[col] = default_val
leaderboard_df.to_csv(leaderboard_file, index=False)
print(f"Loaded leaderboard with {len(leaderboard_df)} entries")
def normalize_text(text):
"""Normalize text for WER/CER calculation"""
if not isinstance(text, str):
text = str(text)
text = text.lower()
text = re.sub(r'[^\w\s]', '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
def calculate_metrics(predictions_df):
"""Calculate WER and CER for predictions."""
results = []
total_ref_words = 0
total_ref_chars = 0
for _, row in predictions_df.iterrows():
id_val = row["id"]
if id_val not in references:
continue
reference = normalize_text(references[id_val])
hypothesis = normalize_text(row["text"])
if not reference or not hypothesis:
continue
reference_words = reference.split()
hypothesis_words = hypothesis.split()
reference_chars = list(reference)
try:
sample_wer = wer(reference, hypothesis)
sample_cer = cer(reference, hypothesis)
sample_wer = min(sample_wer, 2.0)
sample_cer = min(sample_cer, 2.0)
total_ref_words += len(reference_words)
total_ref_chars += len(reference_chars)
results.append({
"id": id_val,
"reference": reference,
"hypothesis": hypothesis,
"ref_word_count": len(reference_words),
"ref_char_count": len(reference_chars),
"wer": sample_wer,
"cer": sample_cer
})
except Exception as e:
print(f"Error processing sample {id_val}: {str(e)}")
pass
if not results:
raise ValueError("No valid samples for WER/CER calculation")
avg_wer = sum(item["wer"] for item in results) / len(results)
avg_cer = sum(item["cer"] for item in results) / len(results)
weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
return avg_wer, avg_cer, weighted_wer, weighted_cer, results
def format_as_percentage(value):
"""Convert decimal to percentage with 2 decimal places"""
return f"{value * 100:.2f}%"
def get_performance_badge(score):
"""Get performance badge based on score"""
if score < 0.15:
return "πŸ† Excellent"
elif score < 0.30:
return "πŸ₯‰ Good"
else:
return "πŸ“ˆ Fair"
def add_medals_to_models(df, score_col="Combined_Score"):
"""Add medals to top-performing models"""
if df.empty or score_col not in df.columns:
return df
df_copy = df.copy()
# Convert score to float for sorting
df_copy[f"{score_col}_float"] = pd.to_numeric(df_copy[score_col], errors='coerce')
# Sort by score (ascending - lower is better for error rates)
df_copy = df_copy.sort_values(by=f"{score_col}_float", ascending=True, na_position='last').reset_index(drop=True)
# Get unique scores for ranking
valid_scores = df_copy[f"{score_col}_float"].dropna().unique()
valid_scores.sort()
# Assign medals
medals = ["πŸ†", "πŸ₯ˆ", "πŸ₯‰"]
def get_medal(score):
if pd.isna(score):
return ""
rank = np.where(valid_scores == score)[0]
if len(rank) > 0 and rank[0] < len(medals):
return medals[rank[0]] + " "
return ""
df_copy["Medal"] = df_copy[f"{score_col}_float"].apply(get_medal)
df_copy["Model_Name"] = df_copy["Medal"] + df_copy["Model_Name"].astype(str)
# Clean up temporary columns
df_copy = df_copy.drop(columns=[f"{score_col}_float", "Medal"])
return df_copy
def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
"""Format leaderboard for display with ranking and percentages"""
if df is None or len(df) == 0:
return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"])
display_df = df.copy()
# Add medals first
display_df = add_medals_to_models(display_df, sort_by)
# Sort by the specified column
display_df[f"{sort_by}_float"] = pd.to_numeric(display_df[sort_by], errors='coerce')
display_df = display_df.sort_values(f"{sort_by}_float", ascending=True, na_position='last')
# Add rank
display_df.insert(0, "Rank", range(1, len(display_df) + 1))
# Format percentages
for col in ["WER", "CER", "Combined_Score"]:
if col in display_df.columns:
display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}" if pd.notna(x) else "---")
# Add performance badges
display_df["Performance"] = display_df["Combined_Score"].apply(lambda x: get_performance_badge(x) if pd.notna(x) else "---")
# Shorten model names for display
display_df["Model"] = display_df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in str(x) else str(x))
# Format date
if "timestamp" in display_df.columns:
display_df["Date"] = pd.to_datetime(display_df["timestamp"], errors='coerce').dt.strftime("%Y-%m-%d")
else:
display_df["Date"] = "---"
# Select and reorder columns
display_columns = ["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"]
available_columns = [col for col in display_columns if col in display_df.columns]
# Clean up temporary columns
temp_cols = [col for col in display_df.columns if col.endswith("_float")]
display_df = display_df.drop(columns=temp_cols, errors='ignore')
return display_df[available_columns]
def create_performance_chart():
"""Create performance visualization chart"""
try:
df = pd.read_csv(leaderboard_file)
if len(df) == 0:
return None
# Sort by Combined_Score
df = df.sort_values("Combined_Score")
fig = go.Figure()
# Add WER bars
fig.add_trace(go.Bar(
name="WER",
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
y=df["WER"] * 100,
marker_color='#ff7f0e',
hovertemplate='<b>%{x}</b><br>WER: %{y:.2f}%<extra></extra>'
))
# Add CER bars
fig.add_trace(go.Bar(
name="CER",
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
y=df["CER"] * 100,
marker_color='#2ca02c',
hovertemplate='<b>%{x}</b><br>CER: %{y:.2f}%<extra></extra>'
))
# Add Combined Score line
fig.add_trace(go.Scatter(
name="Combined Score",
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
y=df["Combined_Score"] * 100,
mode='lines+markers',
line=dict(color='#d62728', width=3),
marker=dict(size=8),
hovertemplate='<b>%{x}</b><br>Combined Score: %{y:.2f}%<extra></extra>'
))
fig.update_layout(
title={
'text': "πŸ“Š Model Performance Comparison",
'x': 0.5,
'font': {'size': 18, 'family': 'Rubik'}
},
xaxis_title="Model",
yaxis_title="Error Rate (%)",
hovermode='x unified',
height=500,
showlegend=True,
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
font=dict(family="Inter", size=12),
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)
return fig
except Exception as e:
print(f"Error creating chart: {str(e)}")
return None
def get_leaderboard_stats():
"""Get summary statistics for the leaderboard"""
try:
df = pd.read_csv(leaderboard_file)
if len(df) == 0:
return """
<div class="stats-grid">
<div class="stat-card">
<div class="stat-number">0</div>
<div class="stat-label">Models Submitted</div>
</div>
</div>
"""
best_model = df.loc[df["Combined_Score"].idxmin()]
total_models = len(df)
avg_wer = df["WER"].mean()
avg_cer = df["CER"].mean()
return f"""
<div class="stats-grid">
<div class="stat-card">
<div class="stat-number">{total_models}</div>
<div class="stat-label">Models Evaluated</div>
</div>
<div class="stat-card">
<div class="stat-number">{format_as_percentage(best_model['Combined_Score'])}</div>
<div class="stat-label">Best Combined Score</div>
</div>
<div class="stat-card">
<div class="stat-number">{format_as_percentage(avg_wer)}</div>
<div class="stat-label">Average WER</div>
</div>
<div class="stat-card">
<div class="stat-number">{format_as_percentage(avg_cer)}</div>
<div class="stat-label">Average CER</div>
</div>
</div>
<div style="text-align: center; margin-top: 20px;">
<h4>πŸ† Current Champion: {best_model['Model_Name']}</h4>
</div>
"""
except Exception as e:
return f"<p>Error loading stats: {str(e)}</p>"
def update_ranking(method):
"""Update leaderboard ranking based on selected method"""
try:
current_lb = pd.read_csv(leaderboard_file)
if "Combined_Score" not in current_lb.columns:
current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3
sort_column = "Combined_Score"
if method == "WER Only":
sort_column = "WER"
elif method == "CER Only":
sort_column = "CER"
return prepare_leaderboard_for_display(current_lb, sort_column)
except Exception as e:
print(f"Error updating ranking: {str(e)}")
return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"])
def compare_models(model_1_name, model_2_name):
"""Compare two models performance"""
try:
df = pd.read_csv(leaderboard_file)
if model_1_name == model_2_name:
return pd.DataFrame([{"Info": "Please select two different models to compare."}])
model_1 = df[df["Model_Name"] == model_1_name]
model_2 = df[df["Model_Name"] == model_2_name]
if model_1.empty or model_2.empty:
return pd.DataFrame([{"Info": "One or both models not found in leaderboard."}])
m1 = model_1.iloc[0]
m2 = model_2.iloc[0]
comparison_data = {
"Metric": ["WER", "CER", "Combined Score"],
model_1_name.split("/")[-1]: [
f"{m1['WER']*100:.2f}%",
f"{m1['CER']*100:.2f}%",
f"{m1['Combined_Score']*100:.2f}%"
],
model_2_name.split("/")[-1]: [
f"{m2['WER']*100:.2f}%",
f"{m2['CER']*100:.2f}%",
f"{m2['Combined_Score']*100:.2f}%"
],
"Difference": [
f"{(m1['WER'] - m2['WER'])*100:+.2f}%",
f"{(m1['CER'] - m2['CER'])*100:+.2f}%",
f"{(m1['Combined_Score'] - m2['Combined_Score'])*100:+.2f}%"
]
}
return pd.DataFrame(comparison_data)
except Exception as e:
return pd.DataFrame([{"Error": f"Error comparing models: {str(e)}"}])
def process_submission(model_name, csv_file, model_type, origin_country):
"""Process a new model submission with enhanced metadata"""
if not model_name or not model_name.strip():
return "❌ **Error:** Please provide a model name.", None, None
if not csv_file:
return "❌ **Error:** Please upload a CSV file.", None, None
try:
df = pd.read_csv(csv_file)
if len(df) == 0:
return "❌ **Error:** Uploaded CSV is empty.", None, None
if set(df.columns) != {"id", "text"}:
return f"❌ **Error:** CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None, None
if df["id"].duplicated().any():
dup_ids = df[df["id"].duplicated()]["id"].unique()
return f"❌ **Error:** Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None, None
missing_ids = set(references.keys()) - set(df["id"])
extra_ids = set(df["id"]) - set(references.keys())
if missing_ids:
return f"❌ **Error:** Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None, None
if extra_ids:
return f"❌ **Error:** Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None, None
try:
avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
if avg_wer < 0.001:
return "❌ **Error:** WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None, None
except Exception as e:
return f"❌ **Error calculating metrics:** {str(e)}", None, None
# Update leaderboard
leaderboard = pd.read_csv(leaderboard_file)
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
combined_score = avg_wer * 0.7 + avg_cer * 0.3
if model_name in leaderboard["Model_Name"].values:
idx = leaderboard[leaderboard["Model_Name"] == model_name].index
leaderboard.loc[idx, "WER"] = avg_wer
leaderboard.loc[idx, "CER"] = avg_cer
leaderboard.loc[idx, "Combined_Score"] = combined_score
leaderboard.loc[idx, "timestamp"] = timestamp
leaderboard.loc[idx, "Type"] = model_type
leaderboard.loc[idx, "Origin"] = origin_country
updated_leaderboard = leaderboard
else:
new_entry = pd.DataFrame(
[[model_name, avg_wer, avg_cer, combined_score, timestamp, model_type, origin_country, "ASR"]],
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]
)
updated_leaderboard = pd.concat([leaderboard, new_entry])
updated_leaderboard = updated_leaderboard.sort_values("Combined_Score")
updated_leaderboard.to_csv(leaderboard_file, index=False)
display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
chart = create_performance_chart()
badge = get_performance_badge(combined_score)
success_msg = f"""
βœ… **Submission processed successfully!**
**{model_name}** ({model_type} from {origin_country})
- **WER:** {format_as_percentage(avg_wer)}
- **CER:** {format_as_percentage(avg_cer)}
- **Combined Score:** {format_as_percentage(combined_score)}
- **Performance:** {badge}
"""
return success_msg, display_leaderboard, chart
except Exception as e:
return f"❌ **Error processing submission:** {str(e)}", None, None
def get_current_leaderboard():
"""Get the current leaderboard data for display"""
try:
if os.path.exists(leaderboard_file):
current_leaderboard = pd.read_csv(leaderboard_file)
# Ensure all required columns exist
required_columns = ["Combined_Score", "Type", "Origin", "Task"]
for col in required_columns:
if col not in current_leaderboard.columns:
if col == "Combined_Score":
current_leaderboard[col] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
else:
current_leaderboard[col] = "Unknown" if col != "Task" else "ASR"
current_leaderboard.to_csv(leaderboard_file, index=False)
return current_leaderboard
else:
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"])
except Exception as e:
print(f"Error getting leaderboard: {str(e)}")
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"])
def create_leaderboard_table():
"""Create and format the leaderboard table for display"""
leaderboard_data = get_current_leaderboard()
return prepare_leaderboard_for_display(leaderboard_data)
def df_to_html(df):
"""Convert DataFrame to HTML with custom styling"""
if df.empty:
return "<p style='text-align: center; color: #666;'>No data available</p>"
# Convert DataFrame to HTML
html = df.to_html(index=False, escape=False, classes="leaderboard-table")
# Add custom styling
html = html.replace('<table class="leaderboard-table"',
'<table class="leaderboard-table" style="width: 100%; margin: 0 auto;"')
return html
# Main Gradio Interface
with gr.Blocks(
title="πŸ‡²πŸ‡± Bambara ASR Leaderboard | MALIBA-AI",
css=sahara_style_css,
head=custom_head_html,
theme=gr.themes.Soft()
) as demo:
# Header Section
gr.HTML(new_header_html)
# Navigation Buttons
with gr.Row():
gr.Button("🌐 MALIBA-AI Website", link="https://maliba-ai.org/", elem_classes=['nav-button'])
gr.Button("πŸ“Š HF Dataset Repo", link="https://huggingface.co/datasets/sudoping01/bambara-speech-recognition-benchmark", elem_classes=['nav-button'])
gr.Button("πŸ€— MALIBA-AI Hub", link="https://huggingface.co/MALIBA-AI", elem_classes=['nav-button'])
gr.Button("πŸ“š Documentation", link="https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard", elem_classes=['nav-button'])
with gr.Group(elem_classes="content-card"):
# Stats display
stats_html = gr.HTML(get_leaderboard_stats())
with gr.Tabs() as tabs:
with gr.TabItem("πŸ… Main Leaderboard", id="main"):
gr.HTML("<h2>Main Leaderboard</h2>")
initial_leaderboard = create_leaderboard_table()
with gr.Row():
ranking_method = gr.Radio(
["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
label="πŸ”„ Ranking Method",
value="Combined Score (WER 70%, CER 30%)",
info="Choose how to rank the models"
)
leaderboard_view = gr.DataFrame(
value=initial_leaderboard,
interactive=False,
label="πŸ“‹ Leaderboard Rankings - Lower scores indicate better performance",
wrap=True,
height=400
)
# Performance chart
gr.Markdown("### πŸ“Š Visual Performance Comparison")
performance_chart = gr.Plot(
value=create_performance_chart(),
label="Model Performance Visualization"
)
ranking_method.change(
fn=update_ranking,
inputs=[ranking_method],
outputs=[leaderboard_view]
)
with gr.Accordion("πŸ“– Understanding ASR Metrics", open=False):
gr.Markdown("""
## 🎯 Automatic Speech Recognition Evaluation Metrics
### Word Error Rate (WER)
**WER** measures transcription accuracy at the word level:
- **Formula:** `(Substitutions + Insertions + Deletions) / Total Reference Words`
- **Range:** 0% (perfect) to 100%+ (very poor)
- **Interpretation:**
- 0-5%: πŸ† Excellent performance
- 5-15%: πŸ₯‰ Good performance
- 15-30%: πŸ“ˆ Fair performance
- 30%+: Poor performance
### Character Error Rate (CER)
**CER** measures transcription accuracy at the character level:
- **Advantage:** More granular than WER, captures partial matches
- **Benefit for Bambara:** Particularly valuable for agglutinative languages
- **Typical Range:** Usually lower than WER values
### Combined Score (Primary Ranking Metric)
**Formula:** `Combined Score = 0.7 Γ— WER + 0.3 Γ— CER`
- **Rationale:** Balanced evaluation emphasizing word-level accuracy
- **Usage:** Primary metric for model ranking
### 🎯 Performance Categories
- πŸ† **Excellent**: < 15% Combined Score
- πŸ₯‰ **Good**: 15-30% Combined Score
- πŸ“ˆ **Fair**: > 30% Combined Score
""")
with gr.TabItem("πŸ“€ Submit New Model", id="submit"):
gr.HTML("<h2>Submit Your Bambara ASR Model</h2>")
gr.Markdown("""
### πŸš€ Ready to benchmark your model? Submit your results and join the leaderboard!
Follow these steps to submit your Bambara ASR model for evaluation.
""")
with gr.Group(elem_classes="form-section"):
with gr.Row():
with gr.Column(scale=2):
model_name_input = gr.Textbox(
label="πŸ€– Model Name",
placeholder="e.g., MALIBA-AI/bambara-whisper-large",
info="Use a descriptive name (organization/model format preferred)"
)
model_type = gr.Dropdown(
label="🏷️ Model Type",
choices=["Whisper-based", "Wav2Vec2", "Foundation", "Custom", "Fine-tuned", "Multilingual", "Other"],
value="Custom",
info="Select the type/architecture of your model"
)
origin_country = gr.Dropdown(
label="🌍 Origin/Institution",
choices=["Mali", "Senegal", "Burkina Faso", "Niger", "Guinea", "Ivory Coast", "USA", "France", "Canada", "UK", "Other"],
value="Mali",
info="Country or region of the developing institution"
)
with gr.Column(scale=1):
gr.Markdown("""
#### πŸ“‹ Submission Requirements
**CSV Format:**
- Columns: `id`, `text`
- Match all reference dataset IDs
- No duplicate IDs
- Text transcriptions in Bambara
**Data Quality:**
- Clean, normalized text
- Consistent formatting
- Complete coverage of test set
""")
csv_upload = gr.File(
label="πŸ“ Upload Predictions CSV",
file_types=[".csv"],
info="Upload your model's transcriptions in the required CSV format"
)
submit_btn = gr.Button("πŸš€ Submit Model", variant="primary", size="lg", elem_classes=['gradio-button', 'primary'])
output_msg = gr.Markdown(label="πŸ“’ Submission Status")
with gr.Row():
leaderboard_display = gr.DataFrame(
label="πŸ“Š Updated Leaderboard",
value=initial_leaderboard,
interactive=False,
wrap=True,
height=400
)
updated_chart = gr.Plot(
label="πŸ“ˆ Updated Performance Chart"
)
submit_btn.click(
fn=process_submission,
inputs=[model_name_input, csv_upload, model_type, origin_country],
outputs=[output_msg, leaderboard_display, updated_chart]
)
with gr.TabItem("πŸ” Compare Models", id="compare"):
gr.HTML("<h2>Compare Two Models</h2>")
gr.Markdown("### Select two models to compare their performance side-by-side")
with gr.Row():
current_data = get_current_leaderboard()
model_names = current_data["Model_Name"].tolist() if not current_data.empty else []
model_1_dropdown = gr.Dropdown(
choices=model_names,
label="πŸ€– Model 1",
info="Select the first model for comparison"
)
model_2_dropdown = gr.Dropdown(
choices=model_names,
label="πŸ€– Model 2",
info="Select the second model for comparison"
)
compare_btn = gr.Button("⚑ Compare Models", variant="primary", elem_classes=['gradio-button', 'primary'])
comparison_note = gr.Markdown("""
**Note on Comparison Results:**
- Positive difference values (🟒) indicate Model 1 performed better
- Negative difference values (πŸ”΄) indicate Model 2 performed better
- Lower error rates indicate better performance
""", visible=False)
comparison_output = gr.DataFrame(
label="πŸ“Š Model Comparison Results",
value=pd.DataFrame([{"Info": "Select two models and click Compare to see the results."}]),
interactive=False
)
def update_comparison_table(m1, m2):
if not m1 or not m2:
return gr.update(visible=False), pd.DataFrame([{"Info": "Please select both models before clicking Compare."}])
if m1 == m2:
return gr.update(visible=False), pd.DataFrame([{"Info": "Please select two different models to compare."}])
df = compare_models(m1, m2)
return gr.update(visible=True), df
compare_btn.click(
fn=update_comparison_table,
inputs=[model_1_dropdown, model_2_dropdown],
outputs=[comparison_note, comparison_output]
)
with gr.TabItem("πŸ“Š Dataset & Methodology", id="dataset"):
gr.HTML("<h2>Dataset & Methodology</h2>")
gr.Markdown("""
## 🎯 About the Bambara Speech Recognition Benchmark
### πŸ“ˆ Dataset Overview
Our benchmark is built on the **`sudoping01/bambara-speech-recognition-benchmark`** dataset, featuring:
- **πŸŽ™οΈ Diverse Audio Samples:** Various speakers, dialects, and recording conditions
- **πŸ—£οΈ Speaker Variety:** Multiple native Bambara speakers from different regions
- **🎡 Acoustic Diversity:** Different recording environments and quality levels
- **βœ… Quality Assurance:** Manually validated transcriptions
- **πŸ“š Content Variety:** Multiple domains and speaking styles
### πŸ”¬ Evaluation Methodology
#### Text Normalization Process
1. **Lowercase conversion** for consistency
2. **Punctuation removal** to focus on linguistic content
3. **Whitespace normalization** for standardized formatting
4. **Unicode normalization** for proper character handling
#### Quality Controls
- **Outlier Detection:** Extreme error rates are capped to prevent skewing
- **Data Validation:** Comprehensive format and completeness checks
- **Duplicate Prevention:** Automatic detection of duplicate submissions
- **Missing Data Handling:** Identification of incomplete submissions
### πŸš€ How to Participate
#### Step 1: Access the Dataset
```python
from datasets import load_dataset
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark")
```
#### Step 2: Generate Predictions
- Process the audio files with your ASR model
- Generate transcriptions for each audio sample
- Ensure your model outputs text in Bambara language
#### Step 3: Format Results
Create a CSV file with exactly these columns:
- **`id`**: Sample identifier (must match dataset IDs)
- **`text`**: Your model's transcription
#### Step 4: Submit & Evaluate
- Upload your CSV using the submission form
- Your model will be automatically evaluated
- Results appear on the leaderboard immediately
### πŸ† Recognition & Impact
**Top-performing models will be:**
- Featured prominently on our leaderboard
- Highlighted in MALIBA-AI communications
- Considered for inclusion in production systems
- Invited to present at community events
### 🀝 Community Guidelines
- **Reproducibility:** Please provide model details and methodology
- **Fair Play:** No data leakage or unfair advantages
- **Collaboration:** Share insights and learnings with the community
- **Attribution:** Properly cite the benchmark in publications
### πŸ“š Technical Specifications
| Aspect | Details |
|--------|---------|
| **Audio Format** | WAV, various sample rates |
| **Language** | Bambara (bam) |
| **Evaluation Metrics** | WER, CER, Combined Score |
| **Text Encoding** | UTF-8 |
| **Submission Format** | CSV with id, text columns |
""")
# Citation and Footer
with gr.Group(elem_classes="content-card"):
gr.HTML("""
<div class="citation-block">
<h2>πŸ“š Citation</h2>
<p>If you use the Bambara ASR Leaderboard for your scientific publication, or if you find the resources useful, please cite our work:</p>
<pre>
@misc{bambara_asr_leaderboard_2025,
title={Bambara Speech Recognition Leaderboard},
author={MALIBA-AI Team},
year={2025},
url={https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard},
note={A community initiative for advancing Bambara speech recognition technology}
}
</pre>
</div>
""")
gr.HTML("""
<div style="text-align: center; margin-top: 30px; padding-top: 20px; border-top: 2px solid #e9ecef;">
<h3 style="color: #7d3561; margin-bottom: 15px;">About MALIBA-AI</h3>
<p style="font-size: 16px; line-height: 1.6; max-width: 800px; margin: 0 auto;">
<strong>MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation</strong><br>
<em>"No Malian Language Left Behind"</em>
</p>
<p style="margin-top: 15px;">
This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
For more information, visit <a href="https://maliba-ai.org/" style="color: #7d3561; font-weight: 600;">MALIBA-AI</a> or
<a href="https://huggingface.co/MALIBA-AI" style="color: #7d3561; font-weight: 600;">our Hugging Face page</a>.
</p>
<div style="margin-top: 20px;">
<span style="font-size: 2em;">πŸ‡²πŸ‡±</span>
<span style="margin: 0 20px; color: #7d3561; font-weight: 600;">β€’</span>
<span style="font-size: 2em;">🀝</span>
<span style="margin: 0 20px; color: #7d3561; font-weight: 600;">β€’</span>
<span style="font-size: 2em;">πŸš€</span>
</div>
</div>
""")
if __name__ == "__main__":
demo.launch()