|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
import pandas as pd |
|
from datasets import load_dataset |
|
from jiwer import wer, cer |
|
import os |
|
from datetime import datetime |
|
import re |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from huggingface_hub import login |
|
import numpy as np |
|
|
|
|
|
custom_head_html = """ |
|
<link rel="preconnect" href="https://fonts.googleapis.com"> |
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
|
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Rubik:wght@400;600&display=swap" rel="stylesheet"> |
|
""" |
|
|
|
|
|
new_header_html = """ |
|
<center> |
|
<br><br> |
|
<div style="display: flex; align-items: center; justify-content: center; gap: 20px; margin-bottom: 20px;"> |
|
<div style="font-size: 4em;">π²π±</div> |
|
<div> |
|
<h1 style="margin: 0; font-family: 'Rubik', sans-serif; color: #2f3b7d; font-size: 2.5em; font-weight: 700;"> |
|
Bambara ASR Leaderboard |
|
</h1> |
|
<p style="margin: 5px 0 0 0; font-size: 1.2em; color: #7d3561; font-weight: 600;"> |
|
Powered by MALIBA-AI β’ "No Malian Language Left Behind" |
|
</p> |
|
</div> |
|
<div style="font-size: 4em;">ποΈ</div> |
|
</div> |
|
</center> |
|
""" |
|
|
|
|
|
sahara_style_css = """ |
|
/* Global Styles */ |
|
div[class*="gradio-container"] { |
|
background: #FFFBF5 !important; |
|
color: #000 !important; |
|
font-family: 'Inter', sans-serif !important; |
|
} |
|
|
|
div.svelte-1nguped { |
|
background: white !important; |
|
} |
|
|
|
.fillable.svelte-15jxnnn.svelte-15jxnnn:not(.fill_width) { |
|
max-width: 1580px !important; |
|
} |
|
|
|
/* Navigation Buttons */ |
|
.nav-button { |
|
background-color: #117b75 !important; |
|
color: #fff !important; |
|
font-weight: bold !important; |
|
border-radius: 8px !important; |
|
border: none !important; |
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important; |
|
transition: all 0.3s ease !important; |
|
} |
|
|
|
.nav-button:hover { |
|
background-color: #0f6b66 !important; |
|
color: #e8850e !important; |
|
transform: translateY(-1px) !important; |
|
box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important; |
|
} |
|
|
|
/* Content Cards */ |
|
.content-section { |
|
padding: 40px 0; |
|
} |
|
|
|
.content-card { |
|
background-color: #fff !important; |
|
border-radius: 16px !important; |
|
box-shadow: 0 10px 25px -5px rgba(0,0,0,0.1), 0 8px 10px -6px rgba(0,0,0,0.1) !important; |
|
padding: 40px !important; |
|
margin-bottom: 30px !important; |
|
border: 1px solid rgba(0,0,0,0.05) !important; |
|
} |
|
|
|
/* Typography */ |
|
.content-card h2 { |
|
font-family: "Rubik", sans-serif !important; |
|
font-size: 32px !important; |
|
font-weight: 700 !important; |
|
line-height: 1.25 !important; |
|
letter-spacing: -1px !important; |
|
color: #2f3b7d !important; |
|
margin-bottom: 20px !important; |
|
text-align: center !important; |
|
} |
|
|
|
.content-card h3 { |
|
font-size: 22px !important; |
|
color: #2f3b7d !important; |
|
font-weight: 600 !important; |
|
margin-bottom: 15px !important; |
|
} |
|
|
|
.content-card h4 { |
|
font-family: "Rubik", sans-serif !important; |
|
color: #7d3561 !important; |
|
font-weight: 600 !important; |
|
margin-bottom: 10px !important; |
|
} |
|
|
|
.title { |
|
color: #7d3561 !important; |
|
font-weight: 600 !important; |
|
} |
|
|
|
/* Tab Styling */ |
|
.tab-wrapper.svelte-1tcem6n.svelte-1tcem6n { |
|
display: flex; |
|
align-items: center; |
|
justify-content: space-between; |
|
position: relative; |
|
height: auto !important; |
|
padding-bottom: 0 !important; |
|
} |
|
|
|
.selected.svelte-1tcem6n.svelte-1tcem6n { |
|
background-color: #7d3561 !important; |
|
color: #fff !important; |
|
border-radius: 8px 8px 0 0 !important; |
|
} |
|
|
|
button.svelte-1tcem6n.svelte-1tcem6n { |
|
color: #7d3561 !important; |
|
font-weight: 600 !important; |
|
font-size: 16px !important; |
|
padding: 12px 20px !important; |
|
background-color: #fff !important; |
|
border-radius: 8px 8px 0 0 !important; |
|
border: 2px solid #e9ecef !important; |
|
border-bottom: none !important; |
|
transition: all 0.3s ease !important; |
|
} |
|
|
|
button.svelte-1tcem6n.svelte-1tcem6n:hover { |
|
background-color: #f8f9fa !important; |
|
border-color: #7d3561 !important; |
|
} |
|
|
|
.tab-container.svelte-1tcem6n.svelte-1tcem6n:after { |
|
content: ""; |
|
position: absolute; |
|
bottom: 0; |
|
left: 0; |
|
right: 0; |
|
height: 3px; |
|
background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important; |
|
} |
|
|
|
/* Table Styling */ |
|
div[class*="gradio-container"] .prose table { |
|
color: #000 !important; |
|
border: 2px solid #dca02a !important; |
|
border-radius: 12px !important; |
|
margin-bottom: 20px !important; |
|
margin-left: auto !important; |
|
margin-right: auto !important; |
|
width: 100% !important; |
|
border-collapse: separate !important; |
|
border-spacing: 0 !important; |
|
overflow: hidden !important; |
|
box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important; |
|
} |
|
|
|
div[class*="gradio-container"] .prose thead tr { |
|
background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important; |
|
} |
|
|
|
div[class*="gradio-container"] .prose th { |
|
color: #fff !important; |
|
font-weight: 700 !important; |
|
font-size: 14px !important; |
|
padding: 15px 10px !important; |
|
text-align: center !important; |
|
border: none !important; |
|
} |
|
|
|
div[class*="gradio-container"] .prose td { |
|
font-size: 14px !important; |
|
padding: 12px 10px !important; |
|
border: none !important; |
|
text-align: center !important; |
|
color: #000 !important; |
|
border-bottom: 1px solid #f8f9fa !important; |
|
} |
|
|
|
div[class*="gradio-container"] .prose tbody tr:nth-child(even) { |
|
background-color: #f8f9fa !important; |
|
} |
|
|
|
div[class*="gradio-container"] .prose tbody tr:hover { |
|
background-color: #e3f2fd !important; |
|
transition: background-color 0.2s ease !important; |
|
} |
|
|
|
/* First column (model names) styling */ |
|
div[class*="gradio-container"] .prose th:first-child, |
|
div[class*="gradio-container"] .prose td:first-child { |
|
text-align: left !important; |
|
min-width: 250px !important; |
|
font-weight: 600 !important; |
|
} |
|
|
|
/* Performance badges */ |
|
.performance-badge { |
|
display: inline-block; |
|
padding: 4px 8px; |
|
border-radius: 12px; |
|
font-size: 12px; |
|
font-weight: 600; |
|
margin-left: 8px; |
|
} |
|
|
|
.badge-excellent { |
|
background: #d4edda; |
|
color: #155724; |
|
} |
|
|
|
.badge-good { |
|
background: #fff3cd; |
|
color: #856404; |
|
} |
|
|
|
.badge-fair { |
|
background: #f8d7da; |
|
color: #721c24; |
|
} |
|
|
|
/* Stats cards */ |
|
.stats-grid { |
|
display: grid; |
|
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); |
|
gap: 20px; |
|
margin: 20px 0; |
|
} |
|
|
|
.stat-card { |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
color: white; |
|
padding: 20px; |
|
border-radius: 12px; |
|
text-align: center; |
|
box-shadow: 0 4px 6px rgba(0,0,0,0.1); |
|
} |
|
|
|
.stat-number { |
|
font-size: 2em; |
|
font-weight: 700; |
|
margin-bottom: 5px; |
|
} |
|
|
|
.stat-label { |
|
font-size: 0.9em; |
|
opacity: 0.9; |
|
} |
|
|
|
/* Form styling */ |
|
.form-section { |
|
background: #f8f9fa; |
|
border-radius: 12px; |
|
padding: 25px; |
|
margin: 20px 0; |
|
border-left: 4px solid #7d3561; |
|
} |
|
|
|
/* Citation block */ |
|
.citation-block { |
|
background-color: #FDF6E3 !important; |
|
border-radius: 12px !important; |
|
padding: 25px !important; |
|
border-left: 4px solid #D97706 !important; |
|
margin: 20px 0 !important; |
|
} |
|
|
|
/* Dropdown styling */ |
|
.gradio-dropdown { |
|
border-radius: 8px !important; |
|
border: 2px solid #e9ecef !important; |
|
} |
|
|
|
.gradio-dropdown:focus { |
|
border-color: #7d3561 !important; |
|
box-shadow: 0 0 0 3px rgba(125, 53, 97, 0.1) !important; |
|
} |
|
|
|
/* Button styling */ |
|
.gradio-button { |
|
border-radius: 8px !important; |
|
font-weight: 600 !important; |
|
transition: all 0.3s ease !important; |
|
} |
|
|
|
.gradio-button.primary { |
|
background: linear-gradient(135deg, #7d3561 0%, #2f3b7d 100%) !important; |
|
border: none !important; |
|
color: white !important; |
|
} |
|
|
|
.gradio-button.primary:hover { |
|
transform: translateY(-2px) !important; |
|
box-shadow: 0 4px 12px rgba(125, 53, 97, 0.3) !important; |
|
} |
|
|
|
/* Responsive design */ |
|
@media (max-width: 768px) { |
|
.content-card { |
|
padding: 20px !important; |
|
margin-bottom: 20px !important; |
|
} |
|
|
|
.content-card h2 { |
|
font-size: 24px !important; |
|
} |
|
|
|
.stats-grid { |
|
grid-template-columns: 1fr !important; |
|
} |
|
} |
|
""" |
|
|
|
|
|
token = os.environ.get("HG_TOKEN") |
|
if token: |
|
login(token) |
|
|
|
|
|
try: |
|
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"] |
|
references = {row["id"]: row["text"] for row in dataset} |
|
print(f"Loaded {len(references)} reference transcriptions") |
|
except Exception as e: |
|
print(f"Error loading dataset: {str(e)}") |
|
references = {} |
|
|
|
|
|
leaderboard_file = "leaderboard.csv" |
|
if not os.path.exists(leaderboard_file): |
|
sample_data = [ |
|
["MALIBA-AI/bambara-whisper-small", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Whisper-based", "Mali", "ASR"], |
|
["OpenAI/whisper-base", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Foundation", "USA", "ASR"], |
|
] |
|
pd.DataFrame(sample_data, |
|
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]).to_csv(leaderboard_file, index=False) |
|
print(f"Created new leaderboard file with sample data") |
|
else: |
|
leaderboard_df = pd.read_csv(leaderboard_file) |
|
|
|
|
|
required_columns = ["Combined_Score", "Type", "Origin", "Task"] |
|
for col in required_columns: |
|
if col not in leaderboard_df.columns: |
|
if col == "Combined_Score": |
|
leaderboard_df[col] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3 |
|
else: |
|
default_val = "Unknown" if col != "Task" else "ASR" |
|
leaderboard_df[col] = default_val |
|
|
|
leaderboard_df.to_csv(leaderboard_file, index=False) |
|
print(f"Loaded leaderboard with {len(leaderboard_df)} entries") |
|
|
|
def normalize_text(text): |
|
"""Normalize text for WER/CER calculation""" |
|
if not isinstance(text, str): |
|
text = str(text) |
|
|
|
text = text.lower() |
|
text = re.sub(r'[^\w\s]', '', text) |
|
text = re.sub(r'\s+', ' ', text).strip() |
|
return text |
|
|
|
def calculate_metrics(predictions_df): |
|
"""Calculate WER and CER for predictions.""" |
|
results = [] |
|
total_ref_words = 0 |
|
total_ref_chars = 0 |
|
|
|
for _, row in predictions_df.iterrows(): |
|
id_val = row["id"] |
|
if id_val not in references: |
|
continue |
|
|
|
reference = normalize_text(references[id_val]) |
|
hypothesis = normalize_text(row["text"]) |
|
|
|
if not reference or not hypothesis: |
|
continue |
|
|
|
reference_words = reference.split() |
|
hypothesis_words = hypothesis.split() |
|
reference_chars = list(reference) |
|
|
|
try: |
|
sample_wer = wer(reference, hypothesis) |
|
sample_cer = cer(reference, hypothesis) |
|
|
|
sample_wer = min(sample_wer, 2.0) |
|
sample_cer = min(sample_cer, 2.0) |
|
|
|
total_ref_words += len(reference_words) |
|
total_ref_chars += len(reference_chars) |
|
|
|
results.append({ |
|
"id": id_val, |
|
"reference": reference, |
|
"hypothesis": hypothesis, |
|
"ref_word_count": len(reference_words), |
|
"ref_char_count": len(reference_chars), |
|
"wer": sample_wer, |
|
"cer": sample_cer |
|
}) |
|
except Exception as e: |
|
print(f"Error processing sample {id_val}: {str(e)}") |
|
pass |
|
|
|
if not results: |
|
raise ValueError("No valid samples for WER/CER calculation") |
|
|
|
avg_wer = sum(item["wer"] for item in results) / len(results) |
|
avg_cer = sum(item["cer"] for item in results) / len(results) |
|
|
|
weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words |
|
weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars |
|
|
|
return avg_wer, avg_cer, weighted_wer, weighted_cer, results |
|
|
|
def format_as_percentage(value): |
|
"""Convert decimal to percentage with 2 decimal places""" |
|
return f"{value * 100:.2f}%" |
|
|
|
def get_performance_badge(score): |
|
"""Get performance badge based on score""" |
|
if score < 0.15: |
|
return "π Excellent" |
|
elif score < 0.30: |
|
return "π₯ Good" |
|
else: |
|
return "π Fair" |
|
|
|
def add_medals_to_models(df, score_col="Combined_Score"): |
|
"""Add medals to top-performing models""" |
|
if df.empty or score_col not in df.columns: |
|
return df |
|
|
|
df_copy = df.copy() |
|
|
|
|
|
df_copy[f"{score_col}_float"] = pd.to_numeric(df_copy[score_col], errors='coerce') |
|
|
|
|
|
df_copy = df_copy.sort_values(by=f"{score_col}_float", ascending=True, na_position='last').reset_index(drop=True) |
|
|
|
|
|
valid_scores = df_copy[f"{score_col}_float"].dropna().unique() |
|
valid_scores.sort() |
|
|
|
|
|
medals = ["π", "π₯", "π₯"] |
|
|
|
def get_medal(score): |
|
if pd.isna(score): |
|
return "" |
|
rank = np.where(valid_scores == score)[0] |
|
if len(rank) > 0 and rank[0] < len(medals): |
|
return medals[rank[0]] + " " |
|
return "" |
|
|
|
df_copy["Medal"] = df_copy[f"{score_col}_float"].apply(get_medal) |
|
df_copy["Model_Name"] = df_copy["Medal"] + df_copy["Model_Name"].astype(str) |
|
|
|
|
|
df_copy = df_copy.drop(columns=[f"{score_col}_float", "Medal"]) |
|
|
|
return df_copy |
|
|
|
def prepare_leaderboard_for_display(df, sort_by="Combined_Score"): |
|
"""Format leaderboard for display with ranking and percentages""" |
|
if df is None or len(df) == 0: |
|
return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"]) |
|
|
|
display_df = df.copy() |
|
|
|
|
|
display_df = add_medals_to_models(display_df, sort_by) |
|
|
|
|
|
display_df[f"{sort_by}_float"] = pd.to_numeric(display_df[sort_by], errors='coerce') |
|
display_df = display_df.sort_values(f"{sort_by}_float", ascending=True, na_position='last') |
|
|
|
|
|
display_df.insert(0, "Rank", range(1, len(display_df) + 1)) |
|
|
|
|
|
for col in ["WER", "CER", "Combined_Score"]: |
|
if col in display_df.columns: |
|
display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}" if pd.notna(x) else "---") |
|
|
|
|
|
display_df["Performance"] = display_df["Combined_Score"].apply(lambda x: get_performance_badge(x) if pd.notna(x) else "---") |
|
|
|
|
|
display_df["Model"] = display_df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in str(x) else str(x)) |
|
|
|
|
|
if "timestamp" in display_df.columns: |
|
display_df["Date"] = pd.to_datetime(display_df["timestamp"], errors='coerce').dt.strftime("%Y-%m-%d") |
|
else: |
|
display_df["Date"] = "---" |
|
|
|
|
|
display_columns = ["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"] |
|
available_columns = [col for col in display_columns if col in display_df.columns] |
|
|
|
|
|
temp_cols = [col for col in display_df.columns if col.endswith("_float")] |
|
display_df = display_df.drop(columns=temp_cols, errors='ignore') |
|
|
|
return display_df[available_columns] |
|
|
|
def create_performance_chart(): |
|
"""Create performance visualization chart""" |
|
try: |
|
df = pd.read_csv(leaderboard_file) |
|
if len(df) == 0: |
|
return None |
|
|
|
|
|
df = df.sort_values("Combined_Score") |
|
|
|
fig = go.Figure() |
|
|
|
|
|
fig.add_trace(go.Bar( |
|
name="WER", |
|
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x), |
|
y=df["WER"] * 100, |
|
marker_color='#ff7f0e', |
|
hovertemplate='<b>%{x}</b><br>WER: %{y:.2f}%<extra></extra>' |
|
)) |
|
|
|
|
|
fig.add_trace(go.Bar( |
|
name="CER", |
|
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x), |
|
y=df["CER"] * 100, |
|
marker_color='#2ca02c', |
|
hovertemplate='<b>%{x}</b><br>CER: %{y:.2f}%<extra></extra>' |
|
)) |
|
|
|
|
|
fig.add_trace(go.Scatter( |
|
name="Combined Score", |
|
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x), |
|
y=df["Combined_Score"] * 100, |
|
mode='lines+markers', |
|
line=dict(color='#d62728', width=3), |
|
marker=dict(size=8), |
|
hovertemplate='<b>%{x}</b><br>Combined Score: %{y:.2f}%<extra></extra>' |
|
)) |
|
|
|
fig.update_layout( |
|
title={ |
|
'text': "π Model Performance Comparison", |
|
'x': 0.5, |
|
'font': {'size': 18, 'family': 'Rubik'} |
|
}, |
|
xaxis_title="Model", |
|
yaxis_title="Error Rate (%)", |
|
hovermode='x unified', |
|
height=500, |
|
showlegend=True, |
|
plot_bgcolor='rgba(0,0,0,0)', |
|
paper_bgcolor='rgba(0,0,0,0)', |
|
font=dict(family="Inter", size=12), |
|
legend=dict( |
|
orientation="h", |
|
yanchor="bottom", |
|
y=1.02, |
|
xanchor="right", |
|
x=1 |
|
) |
|
) |
|
|
|
return fig |
|
except Exception as e: |
|
print(f"Error creating chart: {str(e)}") |
|
return None |
|
|
|
def get_leaderboard_stats(): |
|
"""Get summary statistics for the leaderboard""" |
|
try: |
|
df = pd.read_csv(leaderboard_file) |
|
if len(df) == 0: |
|
return """ |
|
<div class="stats-grid"> |
|
<div class="stat-card"> |
|
<div class="stat-number">0</div> |
|
<div class="stat-label">Models Submitted</div> |
|
</div> |
|
</div> |
|
""" |
|
|
|
best_model = df.loc[df["Combined_Score"].idxmin()] |
|
total_models = len(df) |
|
avg_wer = df["WER"].mean() |
|
avg_cer = df["CER"].mean() |
|
|
|
return f""" |
|
<div class="stats-grid"> |
|
<div class="stat-card"> |
|
<div class="stat-number">{total_models}</div> |
|
<div class="stat-label">Models Evaluated</div> |
|
</div> |
|
<div class="stat-card"> |
|
<div class="stat-number">{format_as_percentage(best_model['Combined_Score'])}</div> |
|
<div class="stat-label">Best Combined Score</div> |
|
</div> |
|
<div class="stat-card"> |
|
<div class="stat-number">{format_as_percentage(avg_wer)}</div> |
|
<div class="stat-label">Average WER</div> |
|
</div> |
|
<div class="stat-card"> |
|
<div class="stat-number">{format_as_percentage(avg_cer)}</div> |
|
<div class="stat-label">Average CER</div> |
|
</div> |
|
</div> |
|
|
|
<div style="text-align: center; margin-top: 20px;"> |
|
<h4>π Current Champion: {best_model['Model_Name']}</h4> |
|
</div> |
|
""" |
|
except Exception as e: |
|
return f"<p>Error loading stats: {str(e)}</p>" |
|
|
|
def update_ranking(method): |
|
"""Update leaderboard ranking based on selected method""" |
|
try: |
|
current_lb = pd.read_csv(leaderboard_file) |
|
|
|
if "Combined_Score" not in current_lb.columns: |
|
current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3 |
|
|
|
sort_column = "Combined_Score" |
|
if method == "WER Only": |
|
sort_column = "WER" |
|
elif method == "CER Only": |
|
sort_column = "CER" |
|
|
|
return prepare_leaderboard_for_display(current_lb, sort_column) |
|
|
|
except Exception as e: |
|
print(f"Error updating ranking: {str(e)}") |
|
return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"]) |
|
|
|
def compare_models(model_1_name, model_2_name): |
|
"""Compare two models performance""" |
|
try: |
|
df = pd.read_csv(leaderboard_file) |
|
|
|
if model_1_name == model_2_name: |
|
return pd.DataFrame([{"Info": "Please select two different models to compare."}]) |
|
|
|
model_1 = df[df["Model_Name"] == model_1_name] |
|
model_2 = df[df["Model_Name"] == model_2_name] |
|
|
|
if model_1.empty or model_2.empty: |
|
return pd.DataFrame([{"Info": "One or both models not found in leaderboard."}]) |
|
|
|
m1 = model_1.iloc[0] |
|
m2 = model_2.iloc[0] |
|
|
|
comparison_data = { |
|
"Metric": ["WER", "CER", "Combined Score"], |
|
model_1_name.split("/")[-1]: [ |
|
f"{m1['WER']*100:.2f}%", |
|
f"{m1['CER']*100:.2f}%", |
|
f"{m1['Combined_Score']*100:.2f}%" |
|
], |
|
model_2_name.split("/")[-1]: [ |
|
f"{m2['WER']*100:.2f}%", |
|
f"{m2['CER']*100:.2f}%", |
|
f"{m2['Combined_Score']*100:.2f}%" |
|
], |
|
"Difference": [ |
|
f"{(m1['WER'] - m2['WER'])*100:+.2f}%", |
|
f"{(m1['CER'] - m2['CER'])*100:+.2f}%", |
|
f"{(m1['Combined_Score'] - m2['Combined_Score'])*100:+.2f}%" |
|
] |
|
} |
|
|
|
return pd.DataFrame(comparison_data) |
|
|
|
except Exception as e: |
|
return pd.DataFrame([{"Error": f"Error comparing models: {str(e)}"}]) |
|
|
|
def process_submission(model_name, csv_file, model_type, origin_country): |
|
"""Process a new model submission with enhanced metadata""" |
|
if not model_name or not model_name.strip(): |
|
return "β **Error:** Please provide a model name.", None, None |
|
|
|
if not csv_file: |
|
return "β **Error:** Please upload a CSV file.", None, None |
|
|
|
try: |
|
df = pd.read_csv(csv_file) |
|
|
|
if len(df) == 0: |
|
return "β **Error:** Uploaded CSV is empty.", None, None |
|
|
|
if set(df.columns) != {"id", "text"}: |
|
return f"β **Error:** CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None, None |
|
|
|
if df["id"].duplicated().any(): |
|
dup_ids = df[df["id"].duplicated()]["id"].unique() |
|
return f"β **Error:** Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None, None |
|
|
|
missing_ids = set(references.keys()) - set(df["id"]) |
|
extra_ids = set(df["id"]) - set(references.keys()) |
|
|
|
if missing_ids: |
|
return f"β **Error:** Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None, None |
|
|
|
if extra_ids: |
|
return f"β **Error:** Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None, None |
|
|
|
try: |
|
avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df) |
|
|
|
if avg_wer < 0.001: |
|
return "β **Error:** WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None, None |
|
|
|
except Exception as e: |
|
return f"β **Error calculating metrics:** {str(e)}", None, None |
|
|
|
|
|
leaderboard = pd.read_csv(leaderboard_file) |
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
combined_score = avg_wer * 0.7 + avg_cer * 0.3 |
|
|
|
if model_name in leaderboard["Model_Name"].values: |
|
idx = leaderboard[leaderboard["Model_Name"] == model_name].index |
|
leaderboard.loc[idx, "WER"] = avg_wer |
|
leaderboard.loc[idx, "CER"] = avg_cer |
|
leaderboard.loc[idx, "Combined_Score"] = combined_score |
|
leaderboard.loc[idx, "timestamp"] = timestamp |
|
leaderboard.loc[idx, "Type"] = model_type |
|
leaderboard.loc[idx, "Origin"] = origin_country |
|
updated_leaderboard = leaderboard |
|
else: |
|
new_entry = pd.DataFrame( |
|
[[model_name, avg_wer, avg_cer, combined_score, timestamp, model_type, origin_country, "ASR"]], |
|
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"] |
|
) |
|
updated_leaderboard = pd.concat([leaderboard, new_entry]) |
|
|
|
updated_leaderboard = updated_leaderboard.sort_values("Combined_Score") |
|
updated_leaderboard.to_csv(leaderboard_file, index=False) |
|
|
|
display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard) |
|
chart = create_performance_chart() |
|
|
|
badge = get_performance_badge(combined_score) |
|
|
|
success_msg = f""" |
|
β
**Submission processed successfully!** |
|
|
|
**{model_name}** ({model_type} from {origin_country}) |
|
- **WER:** {format_as_percentage(avg_wer)} |
|
- **CER:** {format_as_percentage(avg_cer)} |
|
- **Combined Score:** {format_as_percentage(combined_score)} |
|
- **Performance:** {badge} |
|
""" |
|
|
|
return success_msg, display_leaderboard, chart |
|
|
|
except Exception as e: |
|
return f"β **Error processing submission:** {str(e)}", None, None |
|
|
|
def get_current_leaderboard(): |
|
"""Get the current leaderboard data for display""" |
|
try: |
|
if os.path.exists(leaderboard_file): |
|
current_leaderboard = pd.read_csv(leaderboard_file) |
|
|
|
|
|
required_columns = ["Combined_Score", "Type", "Origin", "Task"] |
|
for col in required_columns: |
|
if col not in current_leaderboard.columns: |
|
if col == "Combined_Score": |
|
current_leaderboard[col] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3 |
|
else: |
|
current_leaderboard[col] = "Unknown" if col != "Task" else "ASR" |
|
|
|
current_leaderboard.to_csv(leaderboard_file, index=False) |
|
return current_leaderboard |
|
else: |
|
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]) |
|
except Exception as e: |
|
print(f"Error getting leaderboard: {str(e)}") |
|
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]) |
|
|
|
def create_leaderboard_table(): |
|
"""Create and format the leaderboard table for display""" |
|
leaderboard_data = get_current_leaderboard() |
|
return prepare_leaderboard_for_display(leaderboard_data) |
|
|
|
def df_to_html(df): |
|
"""Convert DataFrame to HTML with custom styling""" |
|
if df.empty: |
|
return "<p style='text-align: center; color: #666;'>No data available</p>" |
|
|
|
|
|
html = df.to_html(index=False, escape=False, classes="leaderboard-table") |
|
|
|
|
|
html = html.replace('<table class="leaderboard-table"', |
|
'<table class="leaderboard-table" style="width: 100%; margin: 0 auto;"') |
|
|
|
return html |
|
|
|
|
|
with gr.Blocks( |
|
title="π²π± Bambara ASR Leaderboard | MALIBA-AI", |
|
css=sahara_style_css, |
|
head=custom_head_html, |
|
theme=gr.themes.Soft() |
|
) as demo: |
|
|
|
|
|
gr.HTML(new_header_html) |
|
|
|
|
|
with gr.Row(): |
|
gr.Button("π MALIBA-AI Website", link="https://maliba-ai.org/", elem_classes=['nav-button']) |
|
gr.Button("π HF Dataset Repo", link="https://huggingface.co/datasets/sudoping01/bambara-speech-recognition-benchmark", elem_classes=['nav-button']) |
|
gr.Button("π€ MALIBA-AI Hub", link="https://huggingface.co/MALIBA-AI", elem_classes=['nav-button']) |
|
gr.Button("π Documentation", link="https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard", elem_classes=['nav-button']) |
|
|
|
with gr.Group(elem_classes="content-card"): |
|
|
|
stats_html = gr.HTML(get_leaderboard_stats()) |
|
|
|
with gr.Tabs() as tabs: |
|
with gr.TabItem("π
Main Leaderboard", id="main"): |
|
gr.HTML("<h2>Main Leaderboard</h2>") |
|
|
|
initial_leaderboard = create_leaderboard_table() |
|
|
|
with gr.Row(): |
|
ranking_method = gr.Radio( |
|
["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"], |
|
label="π Ranking Method", |
|
value="Combined Score (WER 70%, CER 30%)", |
|
info="Choose how to rank the models" |
|
) |
|
|
|
leaderboard_view = gr.DataFrame( |
|
value=initial_leaderboard, |
|
interactive=False, |
|
label="π Leaderboard Rankings - Lower scores indicate better performance", |
|
wrap=True, |
|
height=400 |
|
) |
|
|
|
|
|
gr.Markdown("### π Visual Performance Comparison") |
|
performance_chart = gr.Plot( |
|
value=create_performance_chart(), |
|
label="Model Performance Visualization" |
|
) |
|
|
|
ranking_method.change( |
|
fn=update_ranking, |
|
inputs=[ranking_method], |
|
outputs=[leaderboard_view] |
|
) |
|
|
|
with gr.Accordion("π Understanding ASR Metrics", open=False): |
|
gr.Markdown(""" |
|
## π― Automatic Speech Recognition Evaluation Metrics |
|
|
|
### Word Error Rate (WER) |
|
**WER** measures transcription accuracy at the word level: |
|
- **Formula:** `(Substitutions + Insertions + Deletions) / Total Reference Words` |
|
- **Range:** 0% (perfect) to 100%+ (very poor) |
|
- **Interpretation:** |
|
- 0-5%: π Excellent performance |
|
- 5-15%: π₯ Good performance |
|
- 15-30%: π Fair performance |
|
- 30%+: Poor performance |
|
|
|
### Character Error Rate (CER) |
|
**CER** measures transcription accuracy at the character level: |
|
- **Advantage:** More granular than WER, captures partial matches |
|
- **Benefit for Bambara:** Particularly valuable for agglutinative languages |
|
- **Typical Range:** Usually lower than WER values |
|
|
|
### Combined Score (Primary Ranking Metric) |
|
**Formula:** `Combined Score = 0.7 Γ WER + 0.3 Γ CER` |
|
- **Rationale:** Balanced evaluation emphasizing word-level accuracy |
|
- **Usage:** Primary metric for model ranking |
|
|
|
### π― Performance Categories |
|
- π **Excellent**: < 15% Combined Score |
|
- π₯ **Good**: 15-30% Combined Score |
|
- π **Fair**: > 30% Combined Score |
|
""") |
|
|
|
with gr.TabItem("π€ Submit New Model", id="submit"): |
|
gr.HTML("<h2>Submit Your Bambara ASR Model</h2>") |
|
|
|
gr.Markdown(""" |
|
### π Ready to benchmark your model? Submit your results and join the leaderboard! |
|
|
|
Follow these steps to submit your Bambara ASR model for evaluation. |
|
""") |
|
|
|
with gr.Group(elem_classes="form-section"): |
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
model_name_input = gr.Textbox( |
|
label="π€ Model Name", |
|
placeholder="e.g., MALIBA-AI/bambara-whisper-large", |
|
info="Use a descriptive name (organization/model format preferred)" |
|
) |
|
|
|
model_type = gr.Dropdown( |
|
label="π·οΈ Model Type", |
|
choices=["Whisper-based", "Wav2Vec2", "Foundation", "Custom", "Fine-tuned", "Multilingual", "Other"], |
|
value="Custom", |
|
info="Select the type/architecture of your model" |
|
) |
|
|
|
origin_country = gr.Dropdown( |
|
label="π Origin/Institution", |
|
choices=["Mali", "Senegal", "Burkina Faso", "Niger", "Guinea", "Ivory Coast", "USA", "France", "Canada", "UK", "Other"], |
|
value="Mali", |
|
info="Country or region of the developing institution" |
|
) |
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown(""" |
|
#### π Submission Requirements |
|
|
|
**CSV Format:** |
|
- Columns: `id`, `text` |
|
- Match all reference dataset IDs |
|
- No duplicate IDs |
|
- Text transcriptions in Bambara |
|
|
|
**Data Quality:** |
|
- Clean, normalized text |
|
- Consistent formatting |
|
- Complete coverage of test set |
|
""") |
|
|
|
csv_upload = gr.File( |
|
label="π Upload Predictions CSV", |
|
file_types=[".csv"], |
|
info="Upload your model's transcriptions in the required CSV format" |
|
) |
|
|
|
submit_btn = gr.Button("π Submit Model", variant="primary", size="lg", elem_classes=['gradio-button', 'primary']) |
|
|
|
output_msg = gr.Markdown(label="π’ Submission Status") |
|
|
|
with gr.Row(): |
|
leaderboard_display = gr.DataFrame( |
|
label="π Updated Leaderboard", |
|
value=initial_leaderboard, |
|
interactive=False, |
|
wrap=True, |
|
height=400 |
|
) |
|
|
|
updated_chart = gr.Plot( |
|
label="π Updated Performance Chart" |
|
) |
|
|
|
submit_btn.click( |
|
fn=process_submission, |
|
inputs=[model_name_input, csv_upload, model_type, origin_country], |
|
outputs=[output_msg, leaderboard_display, updated_chart] |
|
) |
|
|
|
with gr.TabItem("π Compare Models", id="compare"): |
|
gr.HTML("<h2>Compare Two Models</h2>") |
|
|
|
gr.Markdown("### Select two models to compare their performance side-by-side") |
|
|
|
with gr.Row(): |
|
current_data = get_current_leaderboard() |
|
model_names = current_data["Model_Name"].tolist() if not current_data.empty else [] |
|
|
|
model_1_dropdown = gr.Dropdown( |
|
choices=model_names, |
|
label="π€ Model 1", |
|
info="Select the first model for comparison" |
|
) |
|
model_2_dropdown = gr.Dropdown( |
|
choices=model_names, |
|
label="π€ Model 2", |
|
info="Select the second model for comparison" |
|
) |
|
|
|
compare_btn = gr.Button("β‘ Compare Models", variant="primary", elem_classes=['gradio-button', 'primary']) |
|
|
|
comparison_note = gr.Markdown(""" |
|
**Note on Comparison Results:** |
|
- Positive difference values (π’) indicate Model 1 performed better |
|
- Negative difference values (π΄) indicate Model 2 performed better |
|
- Lower error rates indicate better performance |
|
""", visible=False) |
|
|
|
comparison_output = gr.DataFrame( |
|
label="π Model Comparison Results", |
|
value=pd.DataFrame([{"Info": "Select two models and click Compare to see the results."}]), |
|
interactive=False |
|
) |
|
|
|
def update_comparison_table(m1, m2): |
|
if not m1 or not m2: |
|
return gr.update(visible=False), pd.DataFrame([{"Info": "Please select both models before clicking Compare."}]) |
|
|
|
if m1 == m2: |
|
return gr.update(visible=False), pd.DataFrame([{"Info": "Please select two different models to compare."}]) |
|
|
|
df = compare_models(m1, m2) |
|
return gr.update(visible=True), df |
|
|
|
compare_btn.click( |
|
fn=update_comparison_table, |
|
inputs=[model_1_dropdown, model_2_dropdown], |
|
outputs=[comparison_note, comparison_output] |
|
) |
|
|
|
with gr.TabItem("π Dataset & Methodology", id="dataset"): |
|
gr.HTML("<h2>Dataset & Methodology</h2>") |
|
|
|
gr.Markdown(""" |
|
## π― About the Bambara Speech Recognition Benchmark |
|
|
|
### π Dataset Overview |
|
|
|
Our benchmark is built on the **`sudoping01/bambara-speech-recognition-benchmark`** dataset, featuring: |
|
|
|
- **ποΈ Diverse Audio Samples:** Various speakers, dialects, and recording conditions |
|
- **π£οΈ Speaker Variety:** Multiple native Bambara speakers from different regions |
|
- **π΅ Acoustic Diversity:** Different recording environments and quality levels |
|
- **β
Quality Assurance:** Manually validated transcriptions |
|
- **π Content Variety:** Multiple domains and speaking styles |
|
|
|
### π¬ Evaluation Methodology |
|
|
|
#### Text Normalization Process |
|
1. **Lowercase conversion** for consistency |
|
2. **Punctuation removal** to focus on linguistic content |
|
3. **Whitespace normalization** for standardized formatting |
|
4. **Unicode normalization** for proper character handling |
|
|
|
#### Quality Controls |
|
- **Outlier Detection:** Extreme error rates are capped to prevent skewing |
|
- **Data Validation:** Comprehensive format and completeness checks |
|
- **Duplicate Prevention:** Automatic detection of duplicate submissions |
|
- **Missing Data Handling:** Identification of incomplete submissions |
|
|
|
### π How to Participate |
|
|
|
#### Step 1: Access the Dataset |
|
```python |
|
from datasets import load_dataset |
|
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark") |
|
``` |
|
|
|
#### Step 2: Generate Predictions |
|
- Process the audio files with your ASR model |
|
- Generate transcriptions for each audio sample |
|
- Ensure your model outputs text in Bambara language |
|
|
|
#### Step 3: Format Results |
|
Create a CSV file with exactly these columns: |
|
- **`id`**: Sample identifier (must match dataset IDs) |
|
- **`text`**: Your model's transcription |
|
|
|
#### Step 4: Submit & Evaluate |
|
- Upload your CSV using the submission form |
|
- Your model will be automatically evaluated |
|
- Results appear on the leaderboard immediately |
|
|
|
### π Recognition & Impact |
|
|
|
**Top-performing models will be:** |
|
- Featured prominently on our leaderboard |
|
- Highlighted in MALIBA-AI communications |
|
- Considered for inclusion in production systems |
|
- Invited to present at community events |
|
|
|
### π€ Community Guidelines |
|
|
|
- **Reproducibility:** Please provide model details and methodology |
|
- **Fair Play:** No data leakage or unfair advantages |
|
- **Collaboration:** Share insights and learnings with the community |
|
- **Attribution:** Properly cite the benchmark in publications |
|
|
|
### π Technical Specifications |
|
|
|
| Aspect | Details | |
|
|--------|---------| |
|
| **Audio Format** | WAV, various sample rates | |
|
| **Language** | Bambara (bam) | |
|
| **Evaluation Metrics** | WER, CER, Combined Score | |
|
| **Text Encoding** | UTF-8 | |
|
| **Submission Format** | CSV with id, text columns | |
|
""") |
|
|
|
|
|
with gr.Group(elem_classes="content-card"): |
|
gr.HTML(""" |
|
<div class="citation-block"> |
|
<h2>π Citation</h2> |
|
<p>If you use the Bambara ASR Leaderboard for your scientific publication, or if you find the resources useful, please cite our work:</p> |
|
<pre> |
|
@misc{bambara_asr_leaderboard_2025, |
|
title={Bambara Speech Recognition Leaderboard}, |
|
author={MALIBA-AI Team}, |
|
year={2025}, |
|
url={https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard}, |
|
note={A community initiative for advancing Bambara speech recognition technology} |
|
} |
|
</pre> |
|
</div> |
|
""") |
|
|
|
gr.HTML(""" |
|
<div style="text-align: center; margin-top: 30px; padding-top: 20px; border-top: 2px solid #e9ecef;"> |
|
<h3 style="color: #7d3561; margin-bottom: 15px;">About MALIBA-AI</h3> |
|
<p style="font-size: 16px; line-height: 1.6; max-width: 800px; margin: 0 auto;"> |
|
<strong>MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation</strong><br> |
|
<em>"No Malian Language Left Behind"</em> |
|
</p> |
|
<p style="margin-top: 15px;"> |
|
This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology. |
|
For more information, visit <a href="https://maliba-ai.org/" style="color: #7d3561; font-weight: 600;">MALIBA-AI</a> or |
|
<a href="https://huggingface.co/MALIBA-AI" style="color: #7d3561; font-weight: 600;">our Hugging Face page</a>. |
|
</p> |
|
<div style="margin-top: 20px;"> |
|
<span style="font-size: 2em;">π²π±</span> |
|
<span style="margin: 0 20px; color: #7d3561; font-weight: 600;">β’</span> |
|
<span style="font-size: 2em;">π€</span> |
|
<span style="margin: 0 20px; color: #7d3561; font-weight: 600;">β’</span> |
|
<span style="font-size: 2em;">π</span> |
|
</div> |
|
</div> |
|
""") |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |