Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,412 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
from datasets import load_dataset
|
@@ -5,15 +414,337 @@ from jiwer import wer, cer
|
|
5 |
import os
|
6 |
from datetime import datetime
|
7 |
import re
|
8 |
-
|
|
|
9 |
from huggingface_hub import login
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# Login to Hugging Face Hub (if token is available)
|
12 |
token = os.environ.get("HG_TOKEN")
|
13 |
if token:
|
14 |
login(token)
|
15 |
|
16 |
-
|
17 |
try:
|
18 |
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
|
19 |
references = {row["id"]: row["text"] for row in dataset}
|
@@ -22,25 +753,30 @@ except Exception as e:
|
|
22 |
print(f"Error loading dataset: {str(e)}")
|
23 |
references = {}
|
24 |
|
25 |
-
|
26 |
leaderboard_file = "leaderboard.csv"
|
27 |
if not os.path.exists(leaderboard_file):
|
28 |
-
|
29 |
sample_data = [
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
pd.DataFrame(sample_data,
|
34 |
-
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
|
35 |
print(f"Created new leaderboard file with sample data")
|
36 |
else:
|
37 |
leaderboard_df = pd.read_csv(leaderboard_file)
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
leaderboard_df.
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
print(f"Loaded leaderboard with {len(leaderboard_df)} entries")
|
45 |
|
46 |
def normalize_text(text):
|
@@ -103,7 +839,6 @@ def calculate_metrics(predictions_df):
|
|
103 |
avg_wer = sum(item["wer"] for item in results) / len(results)
|
104 |
avg_cer = sum(item["cer"] for item in results) / len(results)
|
105 |
|
106 |
-
|
107 |
weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
|
108 |
weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
|
109 |
|
@@ -113,26 +848,209 @@ def format_as_percentage(value):
|
|
113 |
"""Convert decimal to percentage with 2 decimal places"""
|
114 |
return f"{value * 100:.2f}%"
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
|
117 |
"""Format leaderboard for display with ranking and percentages"""
|
118 |
if df is None or len(df) == 0:
|
119 |
-
return pd.DataFrame(columns=["Rank", "
|
120 |
|
121 |
-
|
122 |
display_df = df.copy()
|
123 |
|
124 |
-
|
125 |
-
display_df = display_df
|
|
|
|
|
|
|
|
|
126 |
|
|
|
127 |
display_df.insert(0, "Rank", range(1, len(display_df) + 1))
|
128 |
|
|
|
129 |
for col in ["WER", "CER", "Combined_Score"]:
|
130 |
if col in display_df.columns:
|
131 |
-
display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")
|
132 |
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
def update_ranking(method):
|
138 |
"""Update leaderboard ranking based on selected method"""
|
@@ -152,52 +1070,91 @@ def update_ranking(method):
|
|
152 |
|
153 |
except Exception as e:
|
154 |
print(f"Error updating ranking: {str(e)}")
|
155 |
-
return pd.DataFrame(columns=["Rank", "
|
156 |
|
157 |
-
def
|
158 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
if not model_name or not model_name.strip():
|
160 |
-
return "Error
|
161 |
|
162 |
if not csv_file:
|
163 |
-
return "Error
|
164 |
|
165 |
try:
|
166 |
df = pd.read_csv(csv_file)
|
167 |
|
168 |
if len(df) == 0:
|
169 |
-
return "Error
|
170 |
|
171 |
if set(df.columns) != {"id", "text"}:
|
172 |
-
return f"Error
|
173 |
|
174 |
if df["id"].duplicated().any():
|
175 |
dup_ids = df[df["id"].duplicated()]["id"].unique()
|
176 |
-
return f"Error
|
177 |
|
178 |
missing_ids = set(references.keys()) - set(df["id"])
|
179 |
extra_ids = set(df["id"]) - set(references.keys())
|
180 |
|
181 |
if missing_ids:
|
182 |
-
return f"Error
|
183 |
|
184 |
if extra_ids:
|
185 |
-
return f"Error
|
186 |
|
187 |
try:
|
188 |
avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
|
189 |
|
190 |
-
# Check for suspiciously low values
|
191 |
if avg_wer < 0.001:
|
192 |
-
return "Error
|
193 |
|
194 |
except Exception as e:
|
195 |
-
return f"Error calculating metrics
|
196 |
|
197 |
-
|
198 |
leaderboard = pd.read_csv(leaderboard_file)
|
199 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
200 |
-
|
201 |
combined_score = avg_wer * 0.7 + avg_cer * 0.3
|
202 |
|
203 |
if model_name in leaderboard["Model_Name"].values:
|
@@ -206,11 +1163,13 @@ def process_submission(model_name, csv_file):
|
|
206 |
leaderboard.loc[idx, "CER"] = avg_cer
|
207 |
leaderboard.loc[idx, "Combined_Score"] = combined_score
|
208 |
leaderboard.loc[idx, "timestamp"] = timestamp
|
|
|
|
|
209 |
updated_leaderboard = leaderboard
|
210 |
else:
|
211 |
new_entry = pd.DataFrame(
|
212 |
-
[[model_name, avg_wer, avg_cer, combined_score, timestamp]],
|
213 |
-
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
|
214 |
)
|
215 |
updated_leaderboard = pd.concat([leaderboard, new_entry])
|
216 |
|
@@ -218,11 +1177,24 @@ def process_submission(model_name, csv_file):
|
|
218 |
updated_leaderboard.to_csv(leaderboard_file, index=False)
|
219 |
|
220 |
display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
|
|
|
|
|
|
|
221 |
|
222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
except Exception as e:
|
225 |
-
return f"Error processing submission
|
226 |
|
227 |
def get_current_leaderboard():
|
228 |
"""Get the current leaderboard data for display"""
|
@@ -230,178 +1202,376 @@ def get_current_leaderboard():
|
|
230 |
if os.path.exists(leaderboard_file):
|
231 |
current_leaderboard = pd.read_csv(leaderboard_file)
|
232 |
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
return current_leaderboard
|
238 |
else:
|
239 |
-
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
|
240 |
except Exception as e:
|
241 |
print(f"Error getting leaderboard: {str(e)}")
|
242 |
-
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
|
243 |
|
244 |
def create_leaderboard_table():
|
245 |
"""Create and format the leaderboard table for display"""
|
246 |
leaderboard_data = get_current_leaderboard()
|
247 |
return prepare_leaderboard_for_display(leaderboard_data)
|
248 |
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
This leaderboard tracks and evaluates speech recognition models for the Bambara language.
|
255 |
-
Models are ranked based on Word Error Rate (WER), Character Error Rate (CER), and a combined score.
|
256 |
-
|
257 |
-
## Current Models Performance
|
258 |
-
"""
|
259 |
-
)
|
260 |
|
261 |
-
|
|
|
262 |
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
gr.Markdown(f"""
|
267 |
-
### π Current Best Model: **{best_model['Model_Name']}**
|
268 |
-
* WER: **{best_model['WER']*100:.2f}%**
|
269 |
-
* CER: **{best_model['CER']*100:.2f}%**
|
270 |
-
* Combined Score: **{best_model['Combined_Score']*100:.2f}%**
|
271 |
-
""")
|
272 |
|
273 |
-
|
274 |
-
with gr.TabItem("π
Model Rankings"):
|
275 |
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
|
301 |
### Word Error Rate (WER)
|
302 |
-
WER measures
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
|
|
|
|
|
|
307 |
|
308 |
### Character Error Rate (CER)
|
309 |
-
CER measures accuracy at the character level:
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
|
314 |
-
### Combined Score
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
)
|
320 |
-
|
321 |
-
with gr.TabItem("π Submit New Results"):
|
322 |
-
gr.Markdown(
|
323 |
-
"""
|
324 |
-
### Submit a new model for evaluation
|
325 |
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
)
|
338 |
-
gr.Markdown("*Use a descriptive name to identify your model*")
|
339 |
|
340 |
-
with gr.
|
341 |
-
|
342 |
-
|
343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
)
|
345 |
-
gr.Markdown("*CSV with columns: id, text*")
|
346 |
-
|
347 |
-
submit_btn = gr.Button("Submit", variant="primary")
|
348 |
-
output_msg = gr.Textbox(label="Status", interactive=False)
|
349 |
-
leaderboard_display = gr.DataFrame(
|
350 |
-
label="Updated Leaderboard",
|
351 |
-
value=initial_leaderboard,
|
352 |
-
interactive=False
|
353 |
-
)
|
354 |
-
|
355 |
-
submit_btn.click(
|
356 |
-
fn=process_submission,
|
357 |
-
inputs=[model_name_input, csv_upload],
|
358 |
-
outputs=[output_msg, leaderboard_display]
|
359 |
-
)
|
360 |
|
361 |
-
|
362 |
-
|
363 |
-
"""
|
364 |
-
## About the Benchmark Dataset
|
365 |
|
366 |
-
|
|
|
367 |
|
368 |
-
|
369 |
-
* Includes various speakers, accents, and dialects
|
370 |
-
* Covers different speech styles and recording conditions
|
371 |
-
* Transcribed and validated
|
372 |
|
373 |
-
|
374 |
|
375 |
-
|
|
|
|
|
|
|
|
|
376 |
|
377 |
-
|
378 |
-
2. Run your ASR model on the audio files
|
379 |
-
3. Generate a CSV file with 'id' and 'text' columns
|
380 |
-
4. Submit your results using the form in the "Submit New Results" tab
|
381 |
|
382 |
-
|
|
|
|
|
|
|
|
|
383 |
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
|
406 |
if __name__ == "__main__":
|
407 |
demo.launch()
|
|
|
1 |
+
# import gradio as gr
|
2 |
+
# import pandas as pd
|
3 |
+
# from datasets import load_dataset
|
4 |
+
# from jiwer import wer, cer
|
5 |
+
# import os
|
6 |
+
# from datetime import datetime
|
7 |
+
# import re
|
8 |
+
|
9 |
+
# from huggingface_hub import login
|
10 |
+
|
11 |
+
# # Login to Hugging Face Hub (if token is available)
|
12 |
+
# token = os.environ.get("HG_TOKEN")
|
13 |
+
# if token:
|
14 |
+
# login(token)
|
15 |
+
|
16 |
+
|
17 |
+
# try:
|
18 |
+
# dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
|
19 |
+
# references = {row["id"]: row["text"] for row in dataset}
|
20 |
+
# print(f"Loaded {len(references)} reference transcriptions")
|
21 |
+
# except Exception as e:
|
22 |
+
# print(f"Error loading dataset: {str(e)}")
|
23 |
+
# references = {}
|
24 |
+
|
25 |
+
|
26 |
+
# leaderboard_file = "leaderboard.csv"
|
27 |
+
# if not os.path.exists(leaderboard_file):
|
28 |
+
|
29 |
+
# sample_data = [
|
30 |
+
# ["test_1", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
|
31 |
+
# ["test_2", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
|
32 |
+
# ]
|
33 |
+
# pd.DataFrame(sample_data,
|
34 |
+
# columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
|
35 |
+
# print(f"Created new leaderboard file with sample data")
|
36 |
+
# else:
|
37 |
+
# leaderboard_df = pd.read_csv(leaderboard_file)
|
38 |
+
|
39 |
+
|
40 |
+
# if "Combined_Score" not in leaderboard_df.columns:
|
41 |
+
# leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
|
42 |
+
# leaderboard_df.to_csv(leaderboard_file, index=False)
|
43 |
+
# print(f"Added Combined_Score column to existing leaderboard")
|
44 |
+
# print(f"Loaded leaderboard with {len(leaderboard_df)} entries")
|
45 |
+
|
46 |
+
# def normalize_text(text):
|
47 |
+
# """Normalize text for WER/CER calculation"""
|
48 |
+
# if not isinstance(text, str):
|
49 |
+
# text = str(text)
|
50 |
+
|
51 |
+
# text = text.lower()
|
52 |
+
# text = re.sub(r'[^\w\s]', '', text)
|
53 |
+
# text = re.sub(r'\s+', ' ', text).strip()
|
54 |
+
# return text
|
55 |
+
|
56 |
+
# def calculate_metrics(predictions_df):
|
57 |
+
# """Calculate WER and CER for predictions."""
|
58 |
+
# results = []
|
59 |
+
# total_ref_words = 0
|
60 |
+
# total_ref_chars = 0
|
61 |
+
|
62 |
+
# for _, row in predictions_df.iterrows():
|
63 |
+
# id_val = row["id"]
|
64 |
+
# if id_val not in references:
|
65 |
+
# continue
|
66 |
+
|
67 |
+
# reference = normalize_text(references[id_val])
|
68 |
+
# hypothesis = normalize_text(row["text"])
|
69 |
+
|
70 |
+
# if not reference or not hypothesis:
|
71 |
+
# continue
|
72 |
+
|
73 |
+
# reference_words = reference.split()
|
74 |
+
# hypothesis_words = hypothesis.split()
|
75 |
+
# reference_chars = list(reference)
|
76 |
+
|
77 |
+
# try:
|
78 |
+
# sample_wer = wer(reference, hypothesis)
|
79 |
+
# sample_cer = cer(reference, hypothesis)
|
80 |
+
|
81 |
+
# sample_wer = min(sample_wer, 2.0)
|
82 |
+
# sample_cer = min(sample_cer, 2.0)
|
83 |
+
|
84 |
+
# total_ref_words += len(reference_words)
|
85 |
+
# total_ref_chars += len(reference_chars)
|
86 |
+
|
87 |
+
# results.append({
|
88 |
+
# "id": id_val,
|
89 |
+
# "reference": reference,
|
90 |
+
# "hypothesis": hypothesis,
|
91 |
+
# "ref_word_count": len(reference_words),
|
92 |
+
# "ref_char_count": len(reference_chars),
|
93 |
+
# "wer": sample_wer,
|
94 |
+
# "cer": sample_cer
|
95 |
+
# })
|
96 |
+
# except Exception as e:
|
97 |
+
# print(f"Error processing sample {id_val}: {str(e)}")
|
98 |
+
# pass
|
99 |
+
|
100 |
+
# if not results:
|
101 |
+
# raise ValueError("No valid samples for WER/CER calculation")
|
102 |
+
|
103 |
+
# avg_wer = sum(item["wer"] for item in results) / len(results)
|
104 |
+
# avg_cer = sum(item["cer"] for item in results) / len(results)
|
105 |
+
|
106 |
+
|
107 |
+
# weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
|
108 |
+
# weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
|
109 |
+
|
110 |
+
# return avg_wer, avg_cer, weighted_wer, weighted_cer, results
|
111 |
+
|
112 |
+
# def format_as_percentage(value):
|
113 |
+
# """Convert decimal to percentage with 2 decimal places"""
|
114 |
+
# return f"{value * 100:.2f}%"
|
115 |
+
|
116 |
+
# def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
|
117 |
+
# """Format leaderboard for display with ranking and percentages"""
|
118 |
+
# if df is None or len(df) == 0:
|
119 |
+
# return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
120 |
+
|
121 |
+
|
122 |
+
# display_df = df.copy()
|
123 |
+
|
124 |
+
|
125 |
+
# display_df = display_df.sort_values(sort_by)
|
126 |
+
|
127 |
+
# display_df.insert(0, "Rank", range(1, len(display_df) + 1))
|
128 |
+
|
129 |
+
# for col in ["WER", "CER", "Combined_Score"]:
|
130 |
+
# if col in display_df.columns:
|
131 |
+
# display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
# return display_df
|
136 |
+
|
137 |
+
# def update_ranking(method):
|
138 |
+
# """Update leaderboard ranking based on selected method"""
|
139 |
+
# try:
|
140 |
+
# current_lb = pd.read_csv(leaderboard_file)
|
141 |
+
|
142 |
+
# if "Combined_Score" not in current_lb.columns:
|
143 |
+
# current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3
|
144 |
+
|
145 |
+
# sort_column = "Combined_Score"
|
146 |
+
# if method == "WER Only":
|
147 |
+
# sort_column = "WER"
|
148 |
+
# elif method == "CER Only":
|
149 |
+
# sort_column = "CER"
|
150 |
+
|
151 |
+
# return prepare_leaderboard_for_display(current_lb, sort_column)
|
152 |
+
|
153 |
+
# except Exception as e:
|
154 |
+
# print(f"Error updating ranking: {str(e)}")
|
155 |
+
# return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
156 |
+
|
157 |
+
# def process_submission(model_name, csv_file):
|
158 |
+
# """Process a new model submission"""
|
159 |
+
# if not model_name or not model_name.strip():
|
160 |
+
# return "Error: Please provide a model name.", None
|
161 |
+
|
162 |
+
# if not csv_file:
|
163 |
+
# return "Error: Please upload a CSV file.", None
|
164 |
+
|
165 |
+
# try:
|
166 |
+
# df = pd.read_csv(csv_file)
|
167 |
+
|
168 |
+
# if len(df) == 0:
|
169 |
+
# return "Error: Uploaded CSV is empty.", None
|
170 |
+
|
171 |
+
# if set(df.columns) != {"id", "text"}:
|
172 |
+
# return f"Error: CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None
|
173 |
+
|
174 |
+
# if df["id"].duplicated().any():
|
175 |
+
# dup_ids = df[df["id"].duplicated()]["id"].unique()
|
176 |
+
# return f"Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None
|
177 |
+
|
178 |
+
# missing_ids = set(references.keys()) - set(df["id"])
|
179 |
+
# extra_ids = set(df["id"]) - set(references.keys())
|
180 |
+
|
181 |
+
# if missing_ids:
|
182 |
+
# return f"Error: Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None
|
183 |
+
|
184 |
+
# if extra_ids:
|
185 |
+
# return f"Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None
|
186 |
+
|
187 |
+
# try:
|
188 |
+
# avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
|
189 |
+
|
190 |
+
# # Check for suspiciously low values
|
191 |
+
# if avg_wer < 0.001:
|
192 |
+
# return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
|
193 |
+
|
194 |
+
# except Exception as e:
|
195 |
+
# return f"Error calculating metrics: {str(e)}", None
|
196 |
+
|
197 |
+
|
198 |
+
# leaderboard = pd.read_csv(leaderboard_file)
|
199 |
+
# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
200 |
+
|
201 |
+
# combined_score = avg_wer * 0.7 + avg_cer * 0.3
|
202 |
+
|
203 |
+
# if model_name in leaderboard["Model_Name"].values:
|
204 |
+
# idx = leaderboard[leaderboard["Model_Name"] == model_name].index
|
205 |
+
# leaderboard.loc[idx, "WER"] = avg_wer
|
206 |
+
# leaderboard.loc[idx, "CER"] = avg_cer
|
207 |
+
# leaderboard.loc[idx, "Combined_Score"] = combined_score
|
208 |
+
# leaderboard.loc[idx, "timestamp"] = timestamp
|
209 |
+
# updated_leaderboard = leaderboard
|
210 |
+
# else:
|
211 |
+
# new_entry = pd.DataFrame(
|
212 |
+
# [[model_name, avg_wer, avg_cer, combined_score, timestamp]],
|
213 |
+
# columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
|
214 |
+
# )
|
215 |
+
# updated_leaderboard = pd.concat([leaderboard, new_entry])
|
216 |
+
|
217 |
+
# updated_leaderboard = updated_leaderboard.sort_values("Combined_Score")
|
218 |
+
# updated_leaderboard.to_csv(leaderboard_file, index=False)
|
219 |
+
|
220 |
+
# display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
|
221 |
+
|
222 |
+
# return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard
|
223 |
+
|
224 |
+
# except Exception as e:
|
225 |
+
# return f"Error processing submission: {str(e)}", None
|
226 |
+
|
227 |
+
# def get_current_leaderboard():
|
228 |
+
# """Get the current leaderboard data for display"""
|
229 |
+
# try:
|
230 |
+
# if os.path.exists(leaderboard_file):
|
231 |
+
# current_leaderboard = pd.read_csv(leaderboard_file)
|
232 |
+
|
233 |
+
# if "Combined_Score" not in current_leaderboard.columns:
|
234 |
+
# current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
|
235 |
+
# current_leaderboard.to_csv(leaderboard_file, index=False)
|
236 |
+
|
237 |
+
# return current_leaderboard
|
238 |
+
# else:
|
239 |
+
# return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
|
240 |
+
# except Exception as e:
|
241 |
+
# print(f"Error getting leaderboard: {str(e)}")
|
242 |
+
# return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
|
243 |
+
|
244 |
+
# def create_leaderboard_table():
|
245 |
+
# """Create and format the leaderboard table for display"""
|
246 |
+
# leaderboard_data = get_current_leaderboard()
|
247 |
+
# return prepare_leaderboard_for_display(leaderboard_data)
|
248 |
+
|
249 |
+
# with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
|
250 |
+
# gr.Markdown(
|
251 |
+
# """
|
252 |
+
# # π²π± Bambara ASR Leaderboard
|
253 |
+
|
254 |
+
# This leaderboard tracks and evaluates speech recognition models for the Bambara language.
|
255 |
+
# Models are ranked based on Word Error Rate (WER), Character Error Rate (CER), and a combined score.
|
256 |
+
|
257 |
+
# ## Current Models Performance
|
258 |
+
# """
|
259 |
+
# )
|
260 |
+
|
261 |
+
# current_data = get_current_leaderboard()
|
262 |
+
|
263 |
+
|
264 |
+
# if len(current_data) > 0:
|
265 |
+
# best_model = current_data.sort_values("Combined_Score").iloc[0]
|
266 |
+
# gr.Markdown(f"""
|
267 |
+
# ### π Current Best Model: **{best_model['Model_Name']}**
|
268 |
+
# * WER: **{best_model['WER']*100:.2f}%**
|
269 |
+
# * CER: **{best_model['CER']*100:.2f}%**
|
270 |
+
# * Combined Score: **{best_model['Combined_Score']*100:.2f}%**
|
271 |
+
# """)
|
272 |
+
|
273 |
+
# with gr.Tabs() as tabs:
|
274 |
+
# with gr.TabItem("π
Model Rankings"):
|
275 |
+
|
276 |
+
# initial_leaderboard = create_leaderboard_table()
|
277 |
+
|
278 |
+
# ranking_method = gr.Radio(
|
279 |
+
# ["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
|
280 |
+
# label="Ranking Method",
|
281 |
+
# value="Combined Score (WER 70%, CER 30%)"
|
282 |
+
# )
|
283 |
+
|
284 |
+
# leaderboard_view = gr.DataFrame(
|
285 |
+
# value=initial_leaderboard,
|
286 |
+
# interactive=False,
|
287 |
+
# label="Models are ranked by selected metric - lower is better"
|
288 |
+
# )
|
289 |
+
|
290 |
+
# ranking_method.change(
|
291 |
+
# fn=update_ranking,
|
292 |
+
# inputs=[ranking_method],
|
293 |
+
# outputs=[leaderboard_view]
|
294 |
+
# )
|
295 |
+
|
296 |
+
# with gr.Accordion("Metrics Explanation", open=False):
|
297 |
+
# gr.Markdown(
|
298 |
+
# """
|
299 |
+
# ## Understanding ASR Metrics
|
300 |
+
|
301 |
+
# ### Word Error Rate (WER)
|
302 |
+
# WER measures how accurately the ASR system recognizes whole words:
|
303 |
+
# * Lower values indicate better performance
|
304 |
+
# * Calculated as: (Substitutions + Insertions + Deletions) / Total Words
|
305 |
+
# * A WER of 0% means perfect transcription
|
306 |
+
# * A WER of 20% means approximately 1 in 5 words contains an error
|
307 |
+
|
308 |
+
# ### Character Error Rate (CER)
|
309 |
+
# CER measures accuracy at the character level:
|
310 |
+
# * More fine-grained than WER
|
311 |
+
# * Better at capturing partial word matches
|
312 |
+
# * Particularly useful for agglutinative languages like Bambara
|
313 |
+
|
314 |
+
# ### Combined Score
|
315 |
+
# * Weighted average: 70% WER + 30% CER
|
316 |
+
# * Provides a balanced evaluation of model performance
|
317 |
+
# * Used as the primary ranking metric
|
318 |
+
# """
|
319 |
+
# )
|
320 |
+
|
321 |
+
# with gr.TabItem("π Submit New Results"):
|
322 |
+
# gr.Markdown(
|
323 |
+
# """
|
324 |
+
# ### Submit a new model for evaluation
|
325 |
+
|
326 |
+
# Upload a CSV file with the following format:
|
327 |
+
# * Must contain exactly two columns: 'id' and 'text'
|
328 |
+
# * The 'id' column should match the reference dataset IDs
|
329 |
+
# * The 'text' column should contain your model's transcriptions
|
330 |
+
# """
|
331 |
+
# )
|
332 |
+
|
333 |
+
# with gr.Row():
|
334 |
+
# model_name_input = gr.Textbox(
|
335 |
+
# label="Model Name",
|
336 |
+
# placeholder="e.g., MALIBA-AI/bambara-asr"
|
337 |
+
# )
|
338 |
+
# gr.Markdown("*Use a descriptive name to identify your model*")
|
339 |
+
|
340 |
+
# with gr.Row():
|
341 |
+
# csv_upload = gr.File(
|
342 |
+
# label="Upload CSV File",
|
343 |
+
# file_types=[".csv"]
|
344 |
+
# )
|
345 |
+
# gr.Markdown("*CSV with columns: id, text*")
|
346 |
+
|
347 |
+
# submit_btn = gr.Button("Submit", variant="primary")
|
348 |
+
# output_msg = gr.Textbox(label="Status", interactive=False)
|
349 |
+
# leaderboard_display = gr.DataFrame(
|
350 |
+
# label="Updated Leaderboard",
|
351 |
+
# value=initial_leaderboard,
|
352 |
+
# interactive=False
|
353 |
+
# )
|
354 |
+
|
355 |
+
# submit_btn.click(
|
356 |
+
# fn=process_submission,
|
357 |
+
# inputs=[model_name_input, csv_upload],
|
358 |
+
# outputs=[output_msg, leaderboard_display]
|
359 |
+
# )
|
360 |
+
|
361 |
+
# with gr.TabItem("π Benchmark Dataset"):
|
362 |
+
# gr.Markdown(
|
363 |
+
# """
|
364 |
+
# ## About the Benchmark Dataset
|
365 |
+
|
366 |
+
# This leaderboard uses the **[sudoping01/bambara-speech-recognition-benchmark](https://huggingface.co/datasets/MALIBA-AI/bambara-speech-recognition-leaderboard)** dataset:
|
367 |
+
|
368 |
+
# * Contains diverse Bambara speech samples
|
369 |
+
# * Includes various speakers, accents, and dialects
|
370 |
+
# * Covers different speech styles and recording conditions
|
371 |
+
# * Transcribed and validated
|
372 |
+
|
373 |
+
# ### How to Generate Predictions
|
374 |
+
|
375 |
+
# To submit results to this leaderboard:
|
376 |
+
|
377 |
+
# 1. Download the audio files from the benchmark dataset
|
378 |
+
# 2. Run your ASR model on the audio files
|
379 |
+
# 3. Generate a CSV file with 'id' and 'text' columns
|
380 |
+
# 4. Submit your results using the form in the "Submit New Results" tab
|
381 |
+
|
382 |
+
# ### Evaluation Guidelines
|
383 |
+
|
384 |
+
# * Text is normalized (lowercase, punctuation removed) before metrics calculation
|
385 |
+
# * Extreme outliers are capped to prevent skewing results
|
386 |
+
# * All submissions are validated for format and completeness
|
387 |
+
|
388 |
+
# NB: This work is a collaboration between MALIBA-AI, RobotsMali AI4D-LAB and Djelia
|
389 |
+
# """
|
390 |
+
# )
|
391 |
+
|
392 |
+
# gr.Markdown(
|
393 |
+
# """
|
394 |
+
# ---
|
395 |
+
# ### About MALIBA-AI
|
396 |
+
|
397 |
+
# **MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation**
|
398 |
+
|
399 |
+
# *"No Malian Language Left Behind"*
|
400 |
+
|
401 |
+
# This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
|
402 |
+
# For more information, visit [MALIBA-AI on Hugging Face](https://huggingface.co/MALIBA-AI).
|
403 |
+
# """
|
404 |
+
# )
|
405 |
+
|
406 |
+
# if __name__ == "__main__":
|
407 |
+
# demo.launch()
|
408 |
+
|
409 |
+
|
410 |
import gradio as gr
|
411 |
import pandas as pd
|
412 |
from datasets import load_dataset
|
|
|
414 |
import os
|
415 |
from datetime import datetime
|
416 |
import re
|
417 |
+
import plotly.express as px
|
418 |
+
import plotly.graph_objects as go
|
419 |
from huggingface_hub import login
|
420 |
+
import numpy as np
|
421 |
+
|
422 |
+
# Custom CSS inspired by Sahara leaderboard
|
423 |
+
custom_head_html = """
|
424 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
425 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
426 |
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Rubik:wght@400;600&display=swap" rel="stylesheet">
|
427 |
+
"""
|
428 |
+
|
429 |
+
# Header with MALIBA-AI branding
|
430 |
+
new_header_html = """
|
431 |
+
<center>
|
432 |
+
<br><br>
|
433 |
+
<div style="display: flex; align-items: center; justify-content: center; gap: 20px; margin-bottom: 20px;">
|
434 |
+
<div style="font-size: 4em;">π²π±</div>
|
435 |
+
<div>
|
436 |
+
<h1 style="margin: 0; font-family: 'Rubik', sans-serif; color: #2f3b7d; font-size: 2.5em; font-weight: 700;">
|
437 |
+
Bambara ASR Leaderboard
|
438 |
+
</h1>
|
439 |
+
<p style="margin: 5px 0 0 0; font-size: 1.2em; color: #7d3561; font-weight: 600;">
|
440 |
+
Powered by MALIBA-AI β’ "No Malian Language Left Behind"
|
441 |
+
</p>
|
442 |
+
</div>
|
443 |
+
<div style="font-size: 4em;">ποΈ</div>
|
444 |
+
</div>
|
445 |
+
</center>
|
446 |
+
"""
|
447 |
+
|
448 |
+
# Advanced CSS styling inspired by Sahara
|
449 |
+
sahara_style_css = """
|
450 |
+
/* Global Styles */
|
451 |
+
div[class*="gradio-container"] {
|
452 |
+
background: #FFFBF5 !important;
|
453 |
+
color: #000 !important;
|
454 |
+
font-family: 'Inter', sans-serif !important;
|
455 |
+
}
|
456 |
+
|
457 |
+
div.svelte-1nguped {
|
458 |
+
background: white !important;
|
459 |
+
}
|
460 |
+
|
461 |
+
.fillable.svelte-15jxnnn.svelte-15jxnnn:not(.fill_width) {
|
462 |
+
max-width: 1580px !important;
|
463 |
+
}
|
464 |
+
|
465 |
+
/* Navigation Buttons */
|
466 |
+
.nav-button {
|
467 |
+
background-color: #117b75 !important;
|
468 |
+
color: #fff !important;
|
469 |
+
font-weight: bold !important;
|
470 |
+
border-radius: 8px !important;
|
471 |
+
border: none !important;
|
472 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
|
473 |
+
transition: all 0.3s ease !important;
|
474 |
+
}
|
475 |
+
|
476 |
+
.nav-button:hover {
|
477 |
+
background-color: #0f6b66 !important;
|
478 |
+
color: #e8850e !important;
|
479 |
+
transform: translateY(-1px) !important;
|
480 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
|
481 |
+
}
|
482 |
+
|
483 |
+
/* Content Cards */
|
484 |
+
.content-section {
|
485 |
+
padding: 40px 0;
|
486 |
+
}
|
487 |
+
|
488 |
+
.content-card {
|
489 |
+
background-color: #fff !important;
|
490 |
+
border-radius: 16px !important;
|
491 |
+
box-shadow: 0 10px 25px -5px rgba(0,0,0,0.1), 0 8px 10px -6px rgba(0,0,0,0.1) !important;
|
492 |
+
padding: 40px !important;
|
493 |
+
margin-bottom: 30px !important;
|
494 |
+
border: 1px solid rgba(0,0,0,0.05) !important;
|
495 |
+
}
|
496 |
+
|
497 |
+
/* Typography */
|
498 |
+
.content-card h2 {
|
499 |
+
font-family: "Rubik", sans-serif !important;
|
500 |
+
font-size: 32px !important;
|
501 |
+
font-weight: 700 !important;
|
502 |
+
line-height: 1.25 !important;
|
503 |
+
letter-spacing: -1px !important;
|
504 |
+
color: #2f3b7d !important;
|
505 |
+
margin-bottom: 20px !important;
|
506 |
+
text-align: center !important;
|
507 |
+
}
|
508 |
+
|
509 |
+
.content-card h3 {
|
510 |
+
font-size: 22px !important;
|
511 |
+
color: #2f3b7d !important;
|
512 |
+
font-weight: 600 !important;
|
513 |
+
margin-bottom: 15px !important;
|
514 |
+
}
|
515 |
+
|
516 |
+
.content-card h4 {
|
517 |
+
font-family: "Rubik", sans-serif !important;
|
518 |
+
color: #7d3561 !important;
|
519 |
+
font-weight: 600 !important;
|
520 |
+
margin-bottom: 10px !important;
|
521 |
+
}
|
522 |
+
|
523 |
+
.title {
|
524 |
+
color: #7d3561 !important;
|
525 |
+
font-weight: 600 !important;
|
526 |
+
}
|
527 |
+
|
528 |
+
/* Tab Styling */
|
529 |
+
.tab-wrapper.svelte-1tcem6n.svelte-1tcem6n {
|
530 |
+
display: flex;
|
531 |
+
align-items: center;
|
532 |
+
justify-content: space-between;
|
533 |
+
position: relative;
|
534 |
+
height: auto !important;
|
535 |
+
padding-bottom: 0 !important;
|
536 |
+
}
|
537 |
+
|
538 |
+
.selected.svelte-1tcem6n.svelte-1tcem6n {
|
539 |
+
background-color: #7d3561 !important;
|
540 |
+
color: #fff !important;
|
541 |
+
border-radius: 8px 8px 0 0 !important;
|
542 |
+
}
|
543 |
+
|
544 |
+
button.svelte-1tcem6n.svelte-1tcem6n {
|
545 |
+
color: #7d3561 !important;
|
546 |
+
font-weight: 600 !important;
|
547 |
+
font-size: 16px !important;
|
548 |
+
padding: 12px 20px !important;
|
549 |
+
background-color: #fff !important;
|
550 |
+
border-radius: 8px 8px 0 0 !important;
|
551 |
+
border: 2px solid #e9ecef !important;
|
552 |
+
border-bottom: none !important;
|
553 |
+
transition: all 0.3s ease !important;
|
554 |
+
}
|
555 |
+
|
556 |
+
button.svelte-1tcem6n.svelte-1tcem6n:hover {
|
557 |
+
background-color: #f8f9fa !important;
|
558 |
+
border-color: #7d3561 !important;
|
559 |
+
}
|
560 |
+
|
561 |
+
.tab-container.svelte-1tcem6n.svelte-1tcem6n:after {
|
562 |
+
content: "";
|
563 |
+
position: absolute;
|
564 |
+
bottom: 0;
|
565 |
+
left: 0;
|
566 |
+
right: 0;
|
567 |
+
height: 3px;
|
568 |
+
background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important;
|
569 |
+
}
|
570 |
+
|
571 |
+
/* Table Styling */
|
572 |
+
div[class*="gradio-container"] .prose table {
|
573 |
+
color: #000 !important;
|
574 |
+
border: 2px solid #dca02a !important;
|
575 |
+
border-radius: 12px !important;
|
576 |
+
margin-bottom: 20px !important;
|
577 |
+
margin-left: auto !important;
|
578 |
+
margin-right: auto !important;
|
579 |
+
width: 100% !important;
|
580 |
+
border-collapse: separate !important;
|
581 |
+
border-spacing: 0 !important;
|
582 |
+
overflow: hidden !important;
|
583 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important;
|
584 |
+
}
|
585 |
+
|
586 |
+
div[class*="gradio-container"] .prose thead tr {
|
587 |
+
background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important;
|
588 |
+
}
|
589 |
+
|
590 |
+
div[class*="gradio-container"] .prose th {
|
591 |
+
color: #fff !important;
|
592 |
+
font-weight: 700 !important;
|
593 |
+
font-size: 14px !important;
|
594 |
+
padding: 15px 10px !important;
|
595 |
+
text-align: center !important;
|
596 |
+
border: none !important;
|
597 |
+
}
|
598 |
+
|
599 |
+
div[class*="gradio-container"] .prose td {
|
600 |
+
font-size: 14px !important;
|
601 |
+
padding: 12px 10px !important;
|
602 |
+
border: none !important;
|
603 |
+
text-align: center !important;
|
604 |
+
color: #000 !important;
|
605 |
+
border-bottom: 1px solid #f8f9fa !important;
|
606 |
+
}
|
607 |
+
|
608 |
+
div[class*="gradio-container"] .prose tbody tr:nth-child(even) {
|
609 |
+
background-color: #f8f9fa !important;
|
610 |
+
}
|
611 |
+
|
612 |
+
div[class*="gradio-container"] .prose tbody tr:hover {
|
613 |
+
background-color: #e3f2fd !important;
|
614 |
+
transition: background-color 0.2s ease !important;
|
615 |
+
}
|
616 |
+
|
617 |
+
/* First column (model names) styling */
|
618 |
+
div[class*="gradio-container"] .prose th:first-child,
|
619 |
+
div[class*="gradio-container"] .prose td:first-child {
|
620 |
+
text-align: left !important;
|
621 |
+
min-width: 250px !important;
|
622 |
+
font-weight: 600 !important;
|
623 |
+
}
|
624 |
+
|
625 |
+
/* Performance badges */
|
626 |
+
.performance-badge {
|
627 |
+
display: inline-block;
|
628 |
+
padding: 4px 8px;
|
629 |
+
border-radius: 12px;
|
630 |
+
font-size: 12px;
|
631 |
+
font-weight: 600;
|
632 |
+
margin-left: 8px;
|
633 |
+
}
|
634 |
+
|
635 |
+
.badge-excellent {
|
636 |
+
background: #d4edda;
|
637 |
+
color: #155724;
|
638 |
+
}
|
639 |
+
|
640 |
+
.badge-good {
|
641 |
+
background: #fff3cd;
|
642 |
+
color: #856404;
|
643 |
+
}
|
644 |
+
|
645 |
+
.badge-fair {
|
646 |
+
background: #f8d7da;
|
647 |
+
color: #721c24;
|
648 |
+
}
|
649 |
+
|
650 |
+
/* Stats cards */
|
651 |
+
.stats-grid {
|
652 |
+
display: grid;
|
653 |
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
654 |
+
gap: 20px;
|
655 |
+
margin: 20px 0;
|
656 |
+
}
|
657 |
+
|
658 |
+
.stat-card {
|
659 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
660 |
+
color: white;
|
661 |
+
padding: 20px;
|
662 |
+
border-radius: 12px;
|
663 |
+
text-align: center;
|
664 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
665 |
+
}
|
666 |
+
|
667 |
+
.stat-number {
|
668 |
+
font-size: 2em;
|
669 |
+
font-weight: 700;
|
670 |
+
margin-bottom: 5px;
|
671 |
+
}
|
672 |
+
|
673 |
+
.stat-label {
|
674 |
+
font-size: 0.9em;
|
675 |
+
opacity: 0.9;
|
676 |
+
}
|
677 |
+
|
678 |
+
/* Form styling */
|
679 |
+
.form-section {
|
680 |
+
background: #f8f9fa;
|
681 |
+
border-radius: 12px;
|
682 |
+
padding: 25px;
|
683 |
+
margin: 20px 0;
|
684 |
+
border-left: 4px solid #7d3561;
|
685 |
+
}
|
686 |
+
|
687 |
+
/* Citation block */
|
688 |
+
.citation-block {
|
689 |
+
background-color: #FDF6E3 !important;
|
690 |
+
border-radius: 12px !important;
|
691 |
+
padding: 25px !important;
|
692 |
+
border-left: 4px solid #D97706 !important;
|
693 |
+
margin: 20px 0 !important;
|
694 |
+
}
|
695 |
+
|
696 |
+
/* Dropdown styling */
|
697 |
+
.gradio-dropdown {
|
698 |
+
border-radius: 8px !important;
|
699 |
+
border: 2px solid #e9ecef !important;
|
700 |
+
}
|
701 |
+
|
702 |
+
.gradio-dropdown:focus {
|
703 |
+
border-color: #7d3561 !important;
|
704 |
+
box-shadow: 0 0 0 3px rgba(125, 53, 97, 0.1) !important;
|
705 |
+
}
|
706 |
+
|
707 |
+
/* Button styling */
|
708 |
+
.gradio-button {
|
709 |
+
border-radius: 8px !important;
|
710 |
+
font-weight: 600 !important;
|
711 |
+
transition: all 0.3s ease !important;
|
712 |
+
}
|
713 |
+
|
714 |
+
.gradio-button.primary {
|
715 |
+
background: linear-gradient(135deg, #7d3561 0%, #2f3b7d 100%) !important;
|
716 |
+
border: none !important;
|
717 |
+
color: white !important;
|
718 |
+
}
|
719 |
+
|
720 |
+
.gradio-button.primary:hover {
|
721 |
+
transform: translateY(-2px) !important;
|
722 |
+
box-shadow: 0 4px 12px rgba(125, 53, 97, 0.3) !important;
|
723 |
+
}
|
724 |
+
|
725 |
+
/* Responsive design */
|
726 |
+
@media (max-width: 768px) {
|
727 |
+
.content-card {
|
728 |
+
padding: 20px !important;
|
729 |
+
margin-bottom: 20px !important;
|
730 |
+
}
|
731 |
+
|
732 |
+
.content-card h2 {
|
733 |
+
font-size: 24px !important;
|
734 |
+
}
|
735 |
+
|
736 |
+
.stats-grid {
|
737 |
+
grid-template-columns: 1fr !important;
|
738 |
+
}
|
739 |
+
}
|
740 |
+
"""
|
741 |
|
742 |
# Login to Hugging Face Hub (if token is available)
|
743 |
token = os.environ.get("HG_TOKEN")
|
744 |
if token:
|
745 |
login(token)
|
746 |
|
747 |
+
# Load dataset
|
748 |
try:
|
749 |
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
|
750 |
references = {row["id"]: row["text"] for row in dataset}
|
|
|
753 |
print(f"Error loading dataset: {str(e)}")
|
754 |
references = {}
|
755 |
|
756 |
+
# Initialize leaderboard
|
757 |
leaderboard_file = "leaderboard.csv"
|
758 |
if not os.path.exists(leaderboard_file):
|
|
|
759 |
sample_data = [
|
760 |
+
["MALIBA-AI/bambara-whisper-small", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Whisper-based", "Mali", "ASR"],
|
761 |
+
["OpenAI/whisper-base", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Foundation", "USA", "ASR"],
|
762 |
+
]
|
763 |
pd.DataFrame(sample_data,
|
764 |
+
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]).to_csv(leaderboard_file, index=False)
|
765 |
print(f"Created new leaderboard file with sample data")
|
766 |
else:
|
767 |
leaderboard_df = pd.read_csv(leaderboard_file)
|
768 |
|
769 |
+
# Add new columns if they don't exist
|
770 |
+
required_columns = ["Combined_Score", "Type", "Origin", "Task"]
|
771 |
+
for col in required_columns:
|
772 |
+
if col not in leaderboard_df.columns:
|
773 |
+
if col == "Combined_Score":
|
774 |
+
leaderboard_df[col] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
|
775 |
+
else:
|
776 |
+
default_val = "Unknown" if col != "Task" else "ASR"
|
777 |
+
leaderboard_df[col] = default_val
|
778 |
+
|
779 |
+
leaderboard_df.to_csv(leaderboard_file, index=False)
|
780 |
print(f"Loaded leaderboard with {len(leaderboard_df)} entries")
|
781 |
|
782 |
def normalize_text(text):
|
|
|
839 |
avg_wer = sum(item["wer"] for item in results) / len(results)
|
840 |
avg_cer = sum(item["cer"] for item in results) / len(results)
|
841 |
|
|
|
842 |
weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
|
843 |
weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
|
844 |
|
|
|
848 |
"""Convert decimal to percentage with 2 decimal places"""
|
849 |
return f"{value * 100:.2f}%"
|
850 |
|
851 |
+
def get_performance_badge(score):
|
852 |
+
"""Get performance badge based on score"""
|
853 |
+
if score < 0.15:
|
854 |
+
return "π Excellent"
|
855 |
+
elif score < 0.30:
|
856 |
+
return "π₯ Good"
|
857 |
+
else:
|
858 |
+
return "π Fair"
|
859 |
+
|
860 |
+
def add_medals_to_models(df, score_col="Combined_Score"):
|
861 |
+
"""Add medals to top-performing models"""
|
862 |
+
if df.empty or score_col not in df.columns:
|
863 |
+
return df
|
864 |
+
|
865 |
+
df_copy = df.copy()
|
866 |
+
|
867 |
+
# Convert score to float for sorting
|
868 |
+
df_copy[f"{score_col}_float"] = pd.to_numeric(df_copy[score_col], errors='coerce')
|
869 |
+
|
870 |
+
# Sort by score (ascending - lower is better for error rates)
|
871 |
+
df_copy = df_copy.sort_values(by=f"{score_col}_float", ascending=True, na_position='last').reset_index(drop=True)
|
872 |
+
|
873 |
+
# Get unique scores for ranking
|
874 |
+
valid_scores = df_copy[f"{score_col}_float"].dropna().unique()
|
875 |
+
valid_scores.sort()
|
876 |
+
|
877 |
+
# Assign medals
|
878 |
+
medals = ["π", "π₯", "π₯"]
|
879 |
+
|
880 |
+
def get_medal(score):
|
881 |
+
if pd.isna(score):
|
882 |
+
return ""
|
883 |
+
rank = np.where(valid_scores == score)[0]
|
884 |
+
if len(rank) > 0 and rank[0] < len(medals):
|
885 |
+
return medals[rank[0]] + " "
|
886 |
+
return ""
|
887 |
+
|
888 |
+
df_copy["Medal"] = df_copy[f"{score_col}_float"].apply(get_medal)
|
889 |
+
df_copy["Model_Name"] = df_copy["Medal"] + df_copy["Model_Name"].astype(str)
|
890 |
+
|
891 |
+
# Clean up temporary columns
|
892 |
+
df_copy = df_copy.drop(columns=[f"{score_col}_float", "Medal"])
|
893 |
+
|
894 |
+
return df_copy
|
895 |
+
|
896 |
def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
|
897 |
"""Format leaderboard for display with ranking and percentages"""
|
898 |
if df is None or len(df) == 0:
|
899 |
+
return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"])
|
900 |
|
|
|
901 |
display_df = df.copy()
|
902 |
|
903 |
+
# Add medals first
|
904 |
+
display_df = add_medals_to_models(display_df, sort_by)
|
905 |
+
|
906 |
+
# Sort by the specified column
|
907 |
+
display_df[f"{sort_by}_float"] = pd.to_numeric(display_df[sort_by], errors='coerce')
|
908 |
+
display_df = display_df.sort_values(f"{sort_by}_float", ascending=True, na_position='last')
|
909 |
|
910 |
+
# Add rank
|
911 |
display_df.insert(0, "Rank", range(1, len(display_df) + 1))
|
912 |
|
913 |
+
# Format percentages
|
914 |
for col in ["WER", "CER", "Combined_Score"]:
|
915 |
if col in display_df.columns:
|
916 |
+
display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}" if pd.notna(x) else "---")
|
917 |
|
918 |
+
# Add performance badges
|
919 |
+
display_df["Performance"] = display_df["Combined_Score"].apply(lambda x: get_performance_badge(x) if pd.notna(x) else "---")
|
920 |
+
|
921 |
+
# Shorten model names for display
|
922 |
+
display_df["Model"] = display_df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in str(x) else str(x))
|
923 |
+
|
924 |
+
# Format date
|
925 |
+
if "timestamp" in display_df.columns:
|
926 |
+
display_df["Date"] = pd.to_datetime(display_df["timestamp"], errors='coerce').dt.strftime("%Y-%m-%d")
|
927 |
+
else:
|
928 |
+
display_df["Date"] = "---"
|
929 |
+
|
930 |
+
# Select and reorder columns
|
931 |
+
display_columns = ["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"]
|
932 |
+
available_columns = [col for col in display_columns if col in display_df.columns]
|
933 |
|
934 |
+
# Clean up temporary columns
|
935 |
+
temp_cols = [col for col in display_df.columns if col.endswith("_float")]
|
936 |
+
display_df = display_df.drop(columns=temp_cols, errors='ignore')
|
937 |
+
|
938 |
+
return display_df[available_columns]
|
939 |
+
|
940 |
+
def create_performance_chart():
|
941 |
+
"""Create performance visualization chart"""
|
942 |
+
try:
|
943 |
+
df = pd.read_csv(leaderboard_file)
|
944 |
+
if len(df) == 0:
|
945 |
+
return None
|
946 |
+
|
947 |
+
# Sort by Combined_Score
|
948 |
+
df = df.sort_values("Combined_Score")
|
949 |
+
|
950 |
+
fig = go.Figure()
|
951 |
+
|
952 |
+
# Add WER bars
|
953 |
+
fig.add_trace(go.Bar(
|
954 |
+
name="WER",
|
955 |
+
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
|
956 |
+
y=df["WER"] * 100,
|
957 |
+
marker_color='#ff7f0e',
|
958 |
+
hovertemplate='<b>%{x}</b><br>WER: %{y:.2f}%<extra></extra>'
|
959 |
+
))
|
960 |
+
|
961 |
+
# Add CER bars
|
962 |
+
fig.add_trace(go.Bar(
|
963 |
+
name="CER",
|
964 |
+
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
|
965 |
+
y=df["CER"] * 100,
|
966 |
+
marker_color='#2ca02c',
|
967 |
+
hovertemplate='<b>%{x}</b><br>CER: %{y:.2f}%<extra></extra>'
|
968 |
+
))
|
969 |
+
|
970 |
+
# Add Combined Score line
|
971 |
+
fig.add_trace(go.Scatter(
|
972 |
+
name="Combined Score",
|
973 |
+
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
|
974 |
+
y=df["Combined_Score"] * 100,
|
975 |
+
mode='lines+markers',
|
976 |
+
line=dict(color='#d62728', width=3),
|
977 |
+
marker=dict(size=8),
|
978 |
+
hovertemplate='<b>%{x}</b><br>Combined Score: %{y:.2f}%<extra></extra>'
|
979 |
+
))
|
980 |
+
|
981 |
+
fig.update_layout(
|
982 |
+
title={
|
983 |
+
'text': "π Model Performance Comparison",
|
984 |
+
'x': 0.5,
|
985 |
+
'font': {'size': 18, 'family': 'Rubik'}
|
986 |
+
},
|
987 |
+
xaxis_title="Model",
|
988 |
+
yaxis_title="Error Rate (%)",
|
989 |
+
hovermode='x unified',
|
990 |
+
height=500,
|
991 |
+
showlegend=True,
|
992 |
+
plot_bgcolor='rgba(0,0,0,0)',
|
993 |
+
paper_bgcolor='rgba(0,0,0,0)',
|
994 |
+
font=dict(family="Inter", size=12),
|
995 |
+
legend=dict(
|
996 |
+
orientation="h",
|
997 |
+
yanchor="bottom",
|
998 |
+
y=1.02,
|
999 |
+
xanchor="right",
|
1000 |
+
x=1
|
1001 |
+
)
|
1002 |
+
)
|
1003 |
+
|
1004 |
+
return fig
|
1005 |
+
except Exception as e:
|
1006 |
+
print(f"Error creating chart: {str(e)}")
|
1007 |
+
return None
|
1008 |
+
|
1009 |
+
def get_leaderboard_stats():
|
1010 |
+
"""Get summary statistics for the leaderboard"""
|
1011 |
+
try:
|
1012 |
+
df = pd.read_csv(leaderboard_file)
|
1013 |
+
if len(df) == 0:
|
1014 |
+
return """
|
1015 |
+
<div class="stats-grid">
|
1016 |
+
<div class="stat-card">
|
1017 |
+
<div class="stat-number">0</div>
|
1018 |
+
<div class="stat-label">Models Submitted</div>
|
1019 |
+
</div>
|
1020 |
+
</div>
|
1021 |
+
"""
|
1022 |
+
|
1023 |
+
best_model = df.loc[df["Combined_Score"].idxmin()]
|
1024 |
+
total_models = len(df)
|
1025 |
+
avg_wer = df["WER"].mean()
|
1026 |
+
avg_cer = df["CER"].mean()
|
1027 |
+
|
1028 |
+
return f"""
|
1029 |
+
<div class="stats-grid">
|
1030 |
+
<div class="stat-card">
|
1031 |
+
<div class="stat-number">{total_models}</div>
|
1032 |
+
<div class="stat-label">Models Evaluated</div>
|
1033 |
+
</div>
|
1034 |
+
<div class="stat-card">
|
1035 |
+
<div class="stat-number">{format_as_percentage(best_model['Combined_Score'])}</div>
|
1036 |
+
<div class="stat-label">Best Combined Score</div>
|
1037 |
+
</div>
|
1038 |
+
<div class="stat-card">
|
1039 |
+
<div class="stat-number">{format_as_percentage(avg_wer)}</div>
|
1040 |
+
<div class="stat-label">Average WER</div>
|
1041 |
+
</div>
|
1042 |
+
<div class="stat-card">
|
1043 |
+
<div class="stat-number">{format_as_percentage(avg_cer)}</div>
|
1044 |
+
<div class="stat-label">Average CER</div>
|
1045 |
+
</div>
|
1046 |
+
</div>
|
1047 |
+
|
1048 |
+
<div style="text-align: center; margin-top: 20px;">
|
1049 |
+
<h4>π Current Champion: {best_model['Model_Name']}</h4>
|
1050 |
+
</div>
|
1051 |
+
"""
|
1052 |
+
except Exception as e:
|
1053 |
+
return f"<p>Error loading stats: {str(e)}</p>"
|
1054 |
|
1055 |
def update_ranking(method):
|
1056 |
"""Update leaderboard ranking based on selected method"""
|
|
|
1070 |
|
1071 |
except Exception as e:
|
1072 |
print(f"Error updating ranking: {str(e)}")
|
1073 |
+
return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"])
|
1074 |
|
1075 |
+
def compare_models(model_1_name, model_2_name):
|
1076 |
+
"""Compare two models performance"""
|
1077 |
+
try:
|
1078 |
+
df = pd.read_csv(leaderboard_file)
|
1079 |
+
|
1080 |
+
if model_1_name == model_2_name:
|
1081 |
+
return pd.DataFrame([{"Info": "Please select two different models to compare."}])
|
1082 |
+
|
1083 |
+
model_1 = df[df["Model_Name"] == model_1_name]
|
1084 |
+
model_2 = df[df["Model_Name"] == model_2_name]
|
1085 |
+
|
1086 |
+
if model_1.empty or model_2.empty:
|
1087 |
+
return pd.DataFrame([{"Info": "One or both models not found in leaderboard."}])
|
1088 |
+
|
1089 |
+
m1 = model_1.iloc[0]
|
1090 |
+
m2 = model_2.iloc[0]
|
1091 |
+
|
1092 |
+
comparison_data = {
|
1093 |
+
"Metric": ["WER", "CER", "Combined Score"],
|
1094 |
+
model_1_name.split("/")[-1]: [
|
1095 |
+
f"{m1['WER']*100:.2f}%",
|
1096 |
+
f"{m1['CER']*100:.2f}%",
|
1097 |
+
f"{m1['Combined_Score']*100:.2f}%"
|
1098 |
+
],
|
1099 |
+
model_2_name.split("/")[-1]: [
|
1100 |
+
f"{m2['WER']*100:.2f}%",
|
1101 |
+
f"{m2['CER']*100:.2f}%",
|
1102 |
+
f"{m2['Combined_Score']*100:.2f}%"
|
1103 |
+
],
|
1104 |
+
"Difference": [
|
1105 |
+
f"{(m1['WER'] - m2['WER'])*100:+.2f}%",
|
1106 |
+
f"{(m1['CER'] - m2['CER'])*100:+.2f}%",
|
1107 |
+
f"{(m1['Combined_Score'] - m2['Combined_Score'])*100:+.2f}%"
|
1108 |
+
]
|
1109 |
+
}
|
1110 |
+
|
1111 |
+
return pd.DataFrame(comparison_data)
|
1112 |
+
|
1113 |
+
except Exception as e:
|
1114 |
+
return pd.DataFrame([{"Error": f"Error comparing models: {str(e)}"}])
|
1115 |
+
|
1116 |
+
def process_submission(model_name, csv_file, model_type, origin_country):
|
1117 |
+
"""Process a new model submission with enhanced metadata"""
|
1118 |
if not model_name or not model_name.strip():
|
1119 |
+
return "β **Error:** Please provide a model name.", None, None
|
1120 |
|
1121 |
if not csv_file:
|
1122 |
+
return "β **Error:** Please upload a CSV file.", None, None
|
1123 |
|
1124 |
try:
|
1125 |
df = pd.read_csv(csv_file)
|
1126 |
|
1127 |
if len(df) == 0:
|
1128 |
+
return "β **Error:** Uploaded CSV is empty.", None, None
|
1129 |
|
1130 |
if set(df.columns) != {"id", "text"}:
|
1131 |
+
return f"β **Error:** CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None, None
|
1132 |
|
1133 |
if df["id"].duplicated().any():
|
1134 |
dup_ids = df[df["id"].duplicated()]["id"].unique()
|
1135 |
+
return f"β **Error:** Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None, None
|
1136 |
|
1137 |
missing_ids = set(references.keys()) - set(df["id"])
|
1138 |
extra_ids = set(df["id"]) - set(references.keys())
|
1139 |
|
1140 |
if missing_ids:
|
1141 |
+
return f"β **Error:** Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None, None
|
1142 |
|
1143 |
if extra_ids:
|
1144 |
+
return f"β **Error:** Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None, None
|
1145 |
|
1146 |
try:
|
1147 |
avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
|
1148 |
|
|
|
1149 |
if avg_wer < 0.001:
|
1150 |
+
return "β **Error:** WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None, None
|
1151 |
|
1152 |
except Exception as e:
|
1153 |
+
return f"β **Error calculating metrics:** {str(e)}", None, None
|
1154 |
|
1155 |
+
# Update leaderboard
|
1156 |
leaderboard = pd.read_csv(leaderboard_file)
|
1157 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
|
1158 |
combined_score = avg_wer * 0.7 + avg_cer * 0.3
|
1159 |
|
1160 |
if model_name in leaderboard["Model_Name"].values:
|
|
|
1163 |
leaderboard.loc[idx, "CER"] = avg_cer
|
1164 |
leaderboard.loc[idx, "Combined_Score"] = combined_score
|
1165 |
leaderboard.loc[idx, "timestamp"] = timestamp
|
1166 |
+
leaderboard.loc[idx, "Type"] = model_type
|
1167 |
+
leaderboard.loc[idx, "Origin"] = origin_country
|
1168 |
updated_leaderboard = leaderboard
|
1169 |
else:
|
1170 |
new_entry = pd.DataFrame(
|
1171 |
+
[[model_name, avg_wer, avg_cer, combined_score, timestamp, model_type, origin_country, "ASR"]],
|
1172 |
+
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]
|
1173 |
)
|
1174 |
updated_leaderboard = pd.concat([leaderboard, new_entry])
|
1175 |
|
|
|
1177 |
updated_leaderboard.to_csv(leaderboard_file, index=False)
|
1178 |
|
1179 |
display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
|
1180 |
+
chart = create_performance_chart()
|
1181 |
+
|
1182 |
+
badge = get_performance_badge(combined_score)
|
1183 |
|
1184 |
+
success_msg = f"""
|
1185 |
+
β
**Submission processed successfully!**
|
1186 |
+
|
1187 |
+
**{model_name}** ({model_type} from {origin_country})
|
1188 |
+
- **WER:** {format_as_percentage(avg_wer)}
|
1189 |
+
- **CER:** {format_as_percentage(avg_cer)}
|
1190 |
+
- **Combined Score:** {format_as_percentage(combined_score)}
|
1191 |
+
- **Performance:** {badge}
|
1192 |
+
"""
|
1193 |
+
|
1194 |
+
return success_msg, display_leaderboard, chart
|
1195 |
|
1196 |
except Exception as e:
|
1197 |
+
return f"β **Error processing submission:** {str(e)}", None, None
|
1198 |
|
1199 |
def get_current_leaderboard():
|
1200 |
"""Get the current leaderboard data for display"""
|
|
|
1202 |
if os.path.exists(leaderboard_file):
|
1203 |
current_leaderboard = pd.read_csv(leaderboard_file)
|
1204 |
|
1205 |
+
# Ensure all required columns exist
|
1206 |
+
required_columns = ["Combined_Score", "Type", "Origin", "Task"]
|
1207 |
+
for col in required_columns:
|
1208 |
+
if col not in current_leaderboard.columns:
|
1209 |
+
if col == "Combined_Score":
|
1210 |
+
current_leaderboard[col] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
|
1211 |
+
else:
|
1212 |
+
current_leaderboard[col] = "Unknown" if col != "Task" else "ASR"
|
1213 |
+
|
1214 |
+
current_leaderboard.to_csv(leaderboard_file, index=False)
|
1215 |
return current_leaderboard
|
1216 |
else:
|
1217 |
+
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"])
|
1218 |
except Exception as e:
|
1219 |
print(f"Error getting leaderboard: {str(e)}")
|
1220 |
+
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"])
|
1221 |
|
1222 |
def create_leaderboard_table():
|
1223 |
"""Create and format the leaderboard table for display"""
|
1224 |
leaderboard_data = get_current_leaderboard()
|
1225 |
return prepare_leaderboard_for_display(leaderboard_data)
|
1226 |
|
1227 |
+
def df_to_html(df):
|
1228 |
+
"""Convert DataFrame to HTML with custom styling"""
|
1229 |
+
if df.empty:
|
1230 |
+
return "<p style='text-align: center; color: #666;'>No data available</p>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1231 |
|
1232 |
+
# Convert DataFrame to HTML
|
1233 |
+
html = df.to_html(index=False, escape=False, classes="leaderboard-table")
|
1234 |
|
1235 |
+
# Add custom styling
|
1236 |
+
html = html.replace('<table class="leaderboard-table"',
|
1237 |
+
'<table class="leaderboard-table" style="width: 100%; margin: 0 auto;"')
|
|
|
|
|
|
|
|
|
|
|
|
|
1238 |
|
1239 |
+
return html
|
|
|
1240 |
|
1241 |
+
# Main Gradio Interface
|
1242 |
+
with gr.Blocks(
|
1243 |
+
title="π²π± Bambara ASR Leaderboard | MALIBA-AI",
|
1244 |
+
css=sahara_style_css,
|
1245 |
+
head=custom_head_html,
|
1246 |
+
theme=gr.themes.Soft()
|
1247 |
+
) as demo:
|
1248 |
+
|
1249 |
+
# Header Section
|
1250 |
+
gr.HTML(new_header_html)
|
1251 |
+
|
1252 |
+
# Navigation Buttons
|
1253 |
+
with gr.Row():
|
1254 |
+
gr.Button("π MALIBA-AI Website", link="https://maliba-ai.org/", elem_classes=['nav-button'])
|
1255 |
+
gr.Button("π HF Dataset Repo", link="https://huggingface.co/datasets/sudoping01/bambara-speech-recognition-benchmark", elem_classes=['nav-button'])
|
1256 |
+
gr.Button("π€ MALIBA-AI Hub", link="https://huggingface.co/MALIBA-AI", elem_classes=['nav-button'])
|
1257 |
+
gr.Button("π Documentation", link="https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard", elem_classes=['nav-button'])
|
1258 |
+
|
1259 |
+
with gr.Group(elem_classes="content-card"):
|
1260 |
+
# Stats display
|
1261 |
+
stats_html = gr.HTML(get_leaderboard_stats())
|
1262 |
+
|
1263 |
+
with gr.Tabs() as tabs:
|
1264 |
+
with gr.TabItem("π
Main Leaderboard", id="main"):
|
1265 |
+
gr.HTML("<h2>Main Leaderboard</h2>")
|
1266 |
+
|
1267 |
+
initial_leaderboard = create_leaderboard_table()
|
1268 |
+
|
1269 |
+
with gr.Row():
|
1270 |
+
ranking_method = gr.Radio(
|
1271 |
+
["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
|
1272 |
+
label="π Ranking Method",
|
1273 |
+
value="Combined Score (WER 70%, CER 30%)",
|
1274 |
+
info="Choose how to rank the models"
|
1275 |
+
)
|
1276 |
+
|
1277 |
+
leaderboard_view = gr.DataFrame(
|
1278 |
+
value=initial_leaderboard,
|
1279 |
+
interactive=False,
|
1280 |
+
label="π Leaderboard Rankings - Lower scores indicate better performance",
|
1281 |
+
wrap=True,
|
1282 |
+
height=400
|
1283 |
+
)
|
1284 |
+
|
1285 |
+
# Performance chart
|
1286 |
+
gr.Markdown("### π Visual Performance Comparison")
|
1287 |
+
performance_chart = gr.Plot(
|
1288 |
+
value=create_performance_chart(),
|
1289 |
+
label="Model Performance Visualization"
|
1290 |
+
)
|
1291 |
+
|
1292 |
+
ranking_method.change(
|
1293 |
+
fn=update_ranking,
|
1294 |
+
inputs=[ranking_method],
|
1295 |
+
outputs=[leaderboard_view]
|
1296 |
+
)
|
1297 |
+
|
1298 |
+
with gr.Accordion("π Understanding ASR Metrics", open=False):
|
1299 |
+
gr.Markdown("""
|
1300 |
+
## π― Automatic Speech Recognition Evaluation Metrics
|
1301 |
|
1302 |
### Word Error Rate (WER)
|
1303 |
+
**WER** measures transcription accuracy at the word level:
|
1304 |
+
- **Formula:** `(Substitutions + Insertions + Deletions) / Total Reference Words`
|
1305 |
+
- **Range:** 0% (perfect) to 100%+ (very poor)
|
1306 |
+
- **Interpretation:**
|
1307 |
+
- 0-5%: π Excellent performance
|
1308 |
+
- 5-15%: π₯ Good performance
|
1309 |
+
- 15-30%: π Fair performance
|
1310 |
+
- 30%+: Poor performance
|
1311 |
|
1312 |
### Character Error Rate (CER)
|
1313 |
+
**CER** measures transcription accuracy at the character level:
|
1314 |
+
- **Advantage:** More granular than WER, captures partial matches
|
1315 |
+
- **Benefit for Bambara:** Particularly valuable for agglutinative languages
|
1316 |
+
- **Typical Range:** Usually lower than WER values
|
1317 |
|
1318 |
+
### Combined Score (Primary Ranking Metric)
|
1319 |
+
**Formula:** `Combined Score = 0.7 Γ WER + 0.3 Γ CER`
|
1320 |
+
- **Rationale:** Balanced evaluation emphasizing word-level accuracy
|
1321 |
+
- **Usage:** Primary metric for model ranking
|
1322 |
+
|
1323 |
+
### π― Performance Categories
|
1324 |
+
- π **Excellent**: < 15% Combined Score
|
1325 |
+
- π₯ **Good**: 15-30% Combined Score
|
1326 |
+
- π **Fair**: > 30% Combined Score
|
1327 |
+
""")
|
1328 |
+
|
1329 |
+
with gr.TabItem("π€ Submit New Model", id="submit"):
|
1330 |
+
gr.HTML("<h2>Submit Your Bambara ASR Model</h2>")
|
1331 |
+
|
1332 |
+
gr.Markdown("""
|
1333 |
+
### π Ready to benchmark your model? Submit your results and join the leaderboard!
|
1334 |
+
|
1335 |
+
Follow these steps to submit your Bambara ASR model for evaluation.
|
1336 |
+
""")
|
1337 |
+
|
1338 |
+
with gr.Group(elem_classes="form-section"):
|
1339 |
+
with gr.Row():
|
1340 |
+
with gr.Column(scale=2):
|
1341 |
+
model_name_input = gr.Textbox(
|
1342 |
+
label="π€ Model Name",
|
1343 |
+
placeholder="e.g., MALIBA-AI/bambara-whisper-large",
|
1344 |
+
info="Use a descriptive name (organization/model format preferred)"
|
1345 |
+
)
|
1346 |
+
|
1347 |
+
model_type = gr.Dropdown(
|
1348 |
+
label="π·οΈ Model Type",
|
1349 |
+
choices=["Whisper-based", "Wav2Vec2", "Foundation", "Custom", "Fine-tuned", "Multilingual", "Other"],
|
1350 |
+
value="Custom",
|
1351 |
+
info="Select the type/architecture of your model"
|
1352 |
+
)
|
1353 |
+
|
1354 |
+
origin_country = gr.Dropdown(
|
1355 |
+
label="π Origin/Institution",
|
1356 |
+
choices=["Mali", "Senegal", "Burkina Faso", "Niger", "Guinea", "Ivory Coast", "USA", "France", "Canada", "UK", "Other"],
|
1357 |
+
value="Mali",
|
1358 |
+
info="Country or region of the developing institution"
|
1359 |
+
)
|
1360 |
+
|
1361 |
+
with gr.Column(scale=1):
|
1362 |
+
gr.Markdown("""
|
1363 |
+
#### π Submission Requirements
|
1364 |
+
|
1365 |
+
**CSV Format:**
|
1366 |
+
- Columns: `id`, `text`
|
1367 |
+
- Match all reference dataset IDs
|
1368 |
+
- No duplicate IDs
|
1369 |
+
- Text transcriptions in Bambara
|
1370 |
+
|
1371 |
+
**Data Quality:**
|
1372 |
+
- Clean, normalized text
|
1373 |
+
- Consistent formatting
|
1374 |
+
- Complete coverage of test set
|
1375 |
+
""")
|
1376 |
+
|
1377 |
+
csv_upload = gr.File(
|
1378 |
+
label="π Upload Predictions CSV",
|
1379 |
+
file_types=[".csv"],
|
1380 |
+
info="Upload your model's transcriptions in the required CSV format"
|
1381 |
)
|
|
|
|
|
|
|
|
|
|
|
1382 |
|
1383 |
+
submit_btn = gr.Button("π Submit Model", variant="primary", size="lg", elem_classes=['gradio-button', 'primary'])
|
1384 |
+
|
1385 |
+
output_msg = gr.Markdown(label="π’ Submission Status")
|
1386 |
+
|
1387 |
+
with gr.Row():
|
1388 |
+
leaderboard_display = gr.DataFrame(
|
1389 |
+
label="π Updated Leaderboard",
|
1390 |
+
value=initial_leaderboard,
|
1391 |
+
interactive=False,
|
1392 |
+
wrap=True,
|
1393 |
+
height=400
|
1394 |
+
)
|
1395 |
+
|
1396 |
+
updated_chart = gr.Plot(
|
1397 |
+
label="π Updated Performance Chart"
|
1398 |
+
)
|
1399 |
+
|
1400 |
+
submit_btn.click(
|
1401 |
+
fn=process_submission,
|
1402 |
+
inputs=[model_name_input, csv_upload, model_type, origin_country],
|
1403 |
+
outputs=[output_msg, leaderboard_display, updated_chart]
|
1404 |
)
|
|
|
1405 |
|
1406 |
+
with gr.TabItem("π Compare Models", id="compare"):
|
1407 |
+
gr.HTML("<h2>Compare Two Models</h2>")
|
1408 |
+
|
1409 |
+
gr.Markdown("### Select two models to compare their performance side-by-side")
|
1410 |
+
|
1411 |
+
with gr.Row():
|
1412 |
+
current_data = get_current_leaderboard()
|
1413 |
+
model_names = current_data["Model_Name"].tolist() if not current_data.empty else []
|
1414 |
+
|
1415 |
+
model_1_dropdown = gr.Dropdown(
|
1416 |
+
choices=model_names,
|
1417 |
+
label="π€ Model 1",
|
1418 |
+
info="Select the first model for comparison"
|
1419 |
+
)
|
1420 |
+
model_2_dropdown = gr.Dropdown(
|
1421 |
+
choices=model_names,
|
1422 |
+
label="π€ Model 2",
|
1423 |
+
info="Select the second model for comparison"
|
1424 |
+
)
|
1425 |
+
|
1426 |
+
compare_btn = gr.Button("β‘ Compare Models", variant="primary", elem_classes=['gradio-button', 'primary'])
|
1427 |
+
|
1428 |
+
comparison_note = gr.Markdown("""
|
1429 |
+
**Note on Comparison Results:**
|
1430 |
+
- Positive difference values (π’) indicate Model 1 performed better
|
1431 |
+
- Negative difference values (π΄) indicate Model 2 performed better
|
1432 |
+
- Lower error rates indicate better performance
|
1433 |
+
""", visible=False)
|
1434 |
+
|
1435 |
+
comparison_output = gr.DataFrame(
|
1436 |
+
label="π Model Comparison Results",
|
1437 |
+
value=pd.DataFrame([{"Info": "Select two models and click Compare to see the results."}]),
|
1438 |
+
interactive=False
|
1439 |
+
)
|
1440 |
+
|
1441 |
+
def update_comparison_table(m1, m2):
|
1442 |
+
if not m1 or not m2:
|
1443 |
+
return gr.update(visible=False), pd.DataFrame([{"Info": "Please select both models before clicking Compare."}])
|
1444 |
+
|
1445 |
+
if m1 == m2:
|
1446 |
+
return gr.update(visible=False), pd.DataFrame([{"Info": "Please select two different models to compare."}])
|
1447 |
+
|
1448 |
+
df = compare_models(m1, m2)
|
1449 |
+
return gr.update(visible=True), df
|
1450 |
+
|
1451 |
+
compare_btn.click(
|
1452 |
+
fn=update_comparison_table,
|
1453 |
+
inputs=[model_1_dropdown, model_2_dropdown],
|
1454 |
+
outputs=[comparison_note, comparison_output]
|
1455 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1456 |
|
1457 |
+
with gr.TabItem("π Dataset & Methodology", id="dataset"):
|
1458 |
+
gr.HTML("<h2>Dataset & Methodology</h2>")
|
|
|
|
|
1459 |
|
1460 |
+
gr.Markdown("""
|
1461 |
+
## π― About the Bambara Speech Recognition Benchmark
|
1462 |
|
1463 |
+
### π Dataset Overview
|
|
|
|
|
|
|
1464 |
|
1465 |
+
Our benchmark is built on the **`sudoping01/bambara-speech-recognition-benchmark`** dataset, featuring:
|
1466 |
|
1467 |
+
- **ποΈ Diverse Audio Samples:** Various speakers, dialects, and recording conditions
|
1468 |
+
- **π£οΈ Speaker Variety:** Multiple native Bambara speakers from different regions
|
1469 |
+
- **π΅ Acoustic Diversity:** Different recording environments and quality levels
|
1470 |
+
- **β
Quality Assurance:** Manually validated transcriptions
|
1471 |
+
- **π Content Variety:** Multiple domains and speaking styles
|
1472 |
|
1473 |
+
### π¬ Evaluation Methodology
|
|
|
|
|
|
|
1474 |
|
1475 |
+
#### Text Normalization Process
|
1476 |
+
1. **Lowercase conversion** for consistency
|
1477 |
+
2. **Punctuation removal** to focus on linguistic content
|
1478 |
+
3. **Whitespace normalization** for standardized formatting
|
1479 |
+
4. **Unicode normalization** for proper character handling
|
1480 |
|
1481 |
+
#### Quality Controls
|
1482 |
+
- **Outlier Detection:** Extreme error rates are capped to prevent skewing
|
1483 |
+
- **Data Validation:** Comprehensive format and completeness checks
|
1484 |
+
- **Duplicate Prevention:** Automatic detection of duplicate submissions
|
1485 |
+
- **Missing Data Handling:** Identification of incomplete submissions
|
1486 |
+
|
1487 |
+
### π How to Participate
|
1488 |
+
|
1489 |
+
#### Step 1: Access the Dataset
|
1490 |
+
```python
|
1491 |
+
from datasets import load_dataset
|
1492 |
+
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark")
|
1493 |
+
```
|
1494 |
+
|
1495 |
+
#### Step 2: Generate Predictions
|
1496 |
+
- Process the audio files with your ASR model
|
1497 |
+
- Generate transcriptions for each audio sample
|
1498 |
+
- Ensure your model outputs text in Bambara language
|
1499 |
+
|
1500 |
+
#### Step 3: Format Results
|
1501 |
+
Create a CSV file with exactly these columns:
|
1502 |
+
- **`id`**: Sample identifier (must match dataset IDs)
|
1503 |
+
- **`text`**: Your model's transcription
|
1504 |
+
|
1505 |
+
#### Step 4: Submit & Evaluate
|
1506 |
+
- Upload your CSV using the submission form
|
1507 |
+
- Your model will be automatically evaluated
|
1508 |
+
- Results appear on the leaderboard immediately
|
1509 |
+
|
1510 |
+
### π Recognition & Impact
|
1511 |
+
|
1512 |
+
**Top-performing models will be:**
|
1513 |
+
- Featured prominently on our leaderboard
|
1514 |
+
- Highlighted in MALIBA-AI communications
|
1515 |
+
- Considered for inclusion in production systems
|
1516 |
+
- Invited to present at community events
|
1517 |
+
|
1518 |
+
### π€ Community Guidelines
|
1519 |
+
|
1520 |
+
- **Reproducibility:** Please provide model details and methodology
|
1521 |
+
- **Fair Play:** No data leakage or unfair advantages
|
1522 |
+
- **Collaboration:** Share insights and learnings with the community
|
1523 |
+
- **Attribution:** Properly cite the benchmark in publications
|
1524 |
+
|
1525 |
+
### π Technical Specifications
|
1526 |
+
|
1527 |
+
| Aspect | Details |
|
1528 |
+
|--------|---------|
|
1529 |
+
| **Audio Format** | WAV, various sample rates |
|
1530 |
+
| **Language** | Bambara (bam) |
|
1531 |
+
| **Evaluation Metrics** | WER, CER, Combined Score |
|
1532 |
+
| **Text Encoding** | UTF-8 |
|
1533 |
+
| **Submission Format** | CSV with id, text columns |
|
1534 |
+
""")
|
1535 |
+
|
1536 |
+
# Citation and Footer
|
1537 |
+
with gr.Group(elem_classes="content-card"):
|
1538 |
+
gr.HTML("""
|
1539 |
+
<div class="citation-block">
|
1540 |
+
<h2>π Citation</h2>
|
1541 |
+
<p>If you use the Bambara ASR Leaderboard for your scientific publication, or if you find the resources useful, please cite our work:</p>
|
1542 |
+
<pre>
|
1543 |
+
@misc{bambara_asr_leaderboard_2025,
|
1544 |
+
title={Bambara Speech Recognition Leaderboard},
|
1545 |
+
author={MALIBA-AI Team},
|
1546 |
+
year={2025},
|
1547 |
+
url={https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard},
|
1548 |
+
note={A community initiative for advancing Bambara speech recognition technology}
|
1549 |
+
}
|
1550 |
+
</pre>
|
1551 |
+
</div>
|
1552 |
+
""")
|
1553 |
|
1554 |
+
gr.HTML("""
|
1555 |
+
<div style="text-align: center; margin-top: 30px; padding-top: 20px; border-top: 2px solid #e9ecef;">
|
1556 |
+
<h3 style="color: #7d3561; margin-bottom: 15px;">About MALIBA-AI</h3>
|
1557 |
+
<p style="font-size: 16px; line-height: 1.6; max-width: 800px; margin: 0 auto;">
|
1558 |
+
<strong>MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation</strong><br>
|
1559 |
+
<em>"No Malian Language Left Behind"</em>
|
1560 |
+
</p>
|
1561 |
+
<p style="margin-top: 15px;">
|
1562 |
+
This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
|
1563 |
+
For more information, visit <a href="https://maliba-ai.org/" style="color: #7d3561; font-weight: 600;">MALIBA-AI</a> or
|
1564 |
+
<a href="https://huggingface.co/MALIBA-AI" style="color: #7d3561; font-weight: 600;">our Hugging Face page</a>.
|
1565 |
+
</p>
|
1566 |
+
<div style="margin-top: 20px;">
|
1567 |
+
<span style="font-size: 2em;">π²π±</span>
|
1568 |
+
<span style="margin: 0 20px; color: #7d3561; font-weight: 600;">β’</span>
|
1569 |
+
<span style="font-size: 2em;">π€</span>
|
1570 |
+
<span style="margin: 0 20px; color: #7d3561; font-weight: 600;">β’</span>
|
1571 |
+
<span style="font-size: 2em;">π</span>
|
1572 |
+
</div>
|
1573 |
+
</div>
|
1574 |
+
""")
|
1575 |
|
1576 |
if __name__ == "__main__":
|
1577 |
demo.launch()
|