Spaces:

MALIBA-AI
/

bambara-asr-leaderboard

Running

App Files Files Community

sudoping01 commited on 26 days ago

Commit

dce5a82

verified ·

1 Parent(s): 6e99c6f

Update app.py

Browse files

Files changed (1) hide show

app.py +1349 -179

app.py CHANGED Viewed

@@ -1,3 +1,412 @@
 import gradio as gr
 import pandas as pd
 from datasets import load_dataset
@@ -5,15 +414,337 @@ from jiwer import wer, cer
 import os
 from datetime import datetime
 import re
 from huggingface_hub import login
 # Login to Hugging Face Hub (if token is available)
 token = os.environ.get("HG_TOKEN")
 if token:
     login(token)
 try:
     dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
     references = {row["id"]: row["text"] for row in dataset}
@@ -22,25 +753,30 @@ except Exception as e:
     print(f"Error loading dataset: {str(e)}")
     references = {}
 leaderboard_file = "leaderboard.csv"
 if not os.path.exists(leaderboard_file):
     sample_data = [
-          ["test_1", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
-         ["test_2", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
-        ]
     pd.DataFrame(sample_data,
-                 columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
     print(f"Created new leaderboard file with sample data")
 else:
     leaderboard_df = pd.read_csv(leaderboard_file)
-    if "Combined_Score" not in leaderboard_df.columns:
-        leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
-        leaderboard_df.to_csv(leaderboard_file, index=False)
-        print(f"Added Combined_Score column to existing leaderboard")
     print(f"Loaded leaderboard with {len(leaderboard_df)} entries")
 def normalize_text(text):
@@ -103,7 +839,6 @@ def calculate_metrics(predictions_df):
     avg_wer = sum(item["wer"] for item in results) / len(results)
     avg_cer = sum(item["cer"] for item in results) / len(results)
     weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
     weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
@@ -113,26 +848,209 @@ def format_as_percentage(value):
     """Convert decimal to percentage with 2 decimal places"""
     return f"{value * 100:.2f}%"
 def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
     """Format leaderboard for display with ranking and percentages"""
     if df is None or len(df) == 0:
-        return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
     display_df = df.copy()
-    display_df = display_df.sort_values(sort_by)
     display_df.insert(0, "Rank", range(1, len(display_df) + 1))
     for col in ["WER", "CER", "Combined_Score"]:
         if col in display_df.columns:
-            display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")
-    return display_df
 def update_ranking(method):
     """Update leaderboard ranking based on selected method"""
@@ -152,52 +1070,91 @@ def update_ranking(method):
     except Exception as e:
         print(f"Error updating ranking: {str(e)}")
-        return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
-def process_submission(model_name, csv_file):
-    """Process a new model submission"""
     if not model_name or not model_name.strip():
-        return "Error: Please provide a model name.", None
     if not csv_file:
-        return "Error: Please upload a CSV file.", None
     try:
         df = pd.read_csv(csv_file)
         if len(df) == 0:
-            return "Error: Uploaded CSV is empty.", None
         if set(df.columns) != {"id", "text"}:
-            return f"Error: CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None
         if df["id"].duplicated().any():
             dup_ids = df[df["id"].duplicated()]["id"].unique()
-            return f"Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None
         missing_ids = set(references.keys()) - set(df["id"])
         extra_ids = set(df["id"]) - set(references.keys())
         if missing_ids:
-            return f"Error: Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None
         if extra_ids:
-            return f"Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None
         try:
             avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
-            # Check for suspiciously low values
             if avg_wer < 0.001:
-                return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
         except Exception as e:
-            return f"Error calculating metrics: {str(e)}", None
         leaderboard = pd.read_csv(leaderboard_file)
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         combined_score = avg_wer * 0.7 + avg_cer * 0.3
         if model_name in leaderboard["Model_Name"].values:
@@ -206,11 +1163,13 @@ def process_submission(model_name, csv_file):
             leaderboard.loc[idx, "CER"] = avg_cer
             leaderboard.loc[idx, "Combined_Score"] = combined_score
             leaderboard.loc[idx, "timestamp"] = timestamp
             updated_leaderboard = leaderboard
         else:
             new_entry = pd.DataFrame(
-                [[model_name, avg_wer, avg_cer, combined_score, timestamp]],
-                columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
             )
             updated_leaderboard = pd.concat([leaderboard, new_entry])
@@ -218,11 +1177,24 @@ def process_submission(model_name, csv_file):
         updated_leaderboard.to_csv(leaderboard_file, index=False)
         display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
-        return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard
     except Exception as e:
-        return f"Error processing submission: {str(e)}", None
 def get_current_leaderboard():
     """Get the current leaderboard data for display"""
@@ -230,178 +1202,376 @@ def get_current_leaderboard():
         if os.path.exists(leaderboard_file):
             current_leaderboard = pd.read_csv(leaderboard_file)
-            if "Combined_Score" not in current_leaderboard.columns:
-                current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
-                current_leaderboard.to_csv(leaderboard_file, index=False)
             return current_leaderboard
         else:
-            return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
     except Exception as e:
         print(f"Error getting leaderboard: {str(e)}")
-        return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
 def create_leaderboard_table():
     """Create and format the leaderboard table for display"""
     leaderboard_data = get_current_leaderboard()
     return prepare_leaderboard_for_display(leaderboard_data)
-with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
-    gr.Markdown(
-        """
-        # 🇲🇱 Bambara ASR Leaderboard
-        This leaderboard tracks and evaluates speech recognition models for the Bambara language.
-        Models are ranked based on Word Error Rate (WER), Character Error Rate (CER), and a combined score.
-        ## Current Models Performance
-        """
-    )
-    current_data = get_current_leaderboard()
-    if len(current_data) > 0:
-        best_model = current_data.sort_values("Combined_Score").iloc[0]
-        gr.Markdown(f"""
-        ### 🏆 Current Best Model: **{best_model['Model_Name']}**
-        * WER: **{best_model['WER']*100:.2f}%**
-        * CER: **{best_model['CER']*100:.2f}%**
-        * Combined Score: **{best_model['Combined_Score']*100:.2f}%**
-        """)
-    with gr.Tabs() as tabs:
-        with gr.TabItem("🏅 Model Rankings"):
-            initial_leaderboard = create_leaderboard_table()
-            ranking_method = gr.Radio(
-                ["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
-                label="Ranking Method",
-                value="Combined Score (WER 70%, CER 30%)"
-            )
-            leaderboard_view = gr.DataFrame(
-                value=initial_leaderboard,
-                interactive=False,
-                label="Models are ranked by selected metric - lower is better"
-            )
-            ranking_method.change(
-                fn=update_ranking,
-                inputs=[ranking_method],
-                outputs=[leaderboard_view]
-            )
-            with gr.Accordion("Metrics Explanation", open=False):
-                gr.Markdown(
-                    """
-                    ## Understanding ASR Metrics
                     ### Word Error Rate (WER)
-                    WER measures how accurately the ASR system recognizes whole words:
-                    * Lower values indicate better performance
-                    * Calculated as: (Substitutions + Insertions + Deletions) / Total Words
-                    * A WER of 0% means perfect transcription
-                    * A WER of 20% means approximately 1 in 5 words contains an error
                     ### Character Error Rate (CER)
-                    CER measures accuracy at the character level:
-                    * More fine-grained than WER
-                    * Better at capturing partial word matches
-                    * Particularly useful for agglutinative languages like Bambara
-                    ### Combined Score
-                    * Weighted average: 70% WER + 30% CER
-                    * Provides a balanced evaluation of model performance
-                    * Used as the primary ranking metric
-                    """
                 )
-        with gr.TabItem("📊 Submit New Results"):
-            gr.Markdown(
-                """
-                ### Submit a new model for evaluation
-                Upload a CSV file with the following format:
-                * Must contain exactly two columns: 'id' and 'text'
-                * The 'id' column should match the reference dataset IDs
-                * The 'text' column should contain your model's transcriptions
-                """
-            )
-            with gr.Row():
-                model_name_input = gr.Textbox(
-                    label="Model Name",
-                    placeholder="e.g., MALIBA-AI/bambara-asr"
                 )
-                gr.Markdown("*Use a descriptive name to identify your model*")
-            with gr.Row():
-                csv_upload = gr.File(
-                    label="Upload CSV File",
-                    file_types=[".csv"]
                 )
-                gr.Markdown("*CSV with columns: id, text*")
-            submit_btn = gr.Button("Submit", variant="primary")
-            output_msg = gr.Textbox(label="Status", interactive=False)
-            leaderboard_display = gr.DataFrame(
-                label="Updated Leaderboard",
-                value=initial_leaderboard,
-                interactive=False
-            )
-            submit_btn.click(
-                fn=process_submission,
-                inputs=[model_name_input, csv_upload],
-                outputs=[output_msg, leaderboard_display]
-            )
-        with gr.TabItem("📝 Benchmark Dataset"):
-            gr.Markdown(
-                """
-                ## About the Benchmark Dataset
-                This leaderboard uses the **[sudoping01/bambara-speech-recognition-benchmark](https://huggingface.co/datasets/MALIBA-AI/bambara-speech-recognition-leaderboard)** dataset:
-                * Contains diverse Bambara speech samples
-                * Includes various speakers, accents, and dialects
-                * Covers different speech styles and recording conditions
-                * Transcribed and validated
-                ### How to Generate Predictions
-                To submit results to this leaderboard:
-                1. Download the audio files from the benchmark dataset
-                2. Run your ASR model on the audio files
-                3. Generate a CSV file with 'id' and 'text' columns
-                4. Submit your results using the form in the "Submit New Results" tab
-                ### Evaluation Guidelines
-                * Text is normalized (lowercase, punctuation removed) before metrics calculation
-                * Extreme outliers are capped to prevent skewing results
-                * All submissions are validated for format and completeness
-                NB: This work is a collaboration between MALIBA-AI, RobotsMali AI4D-LAB and Djelia
-                """
-            )
-    gr.Markdown(
-        """
-        ---
-        ### About MALIBA-AI
-        **MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation**
-        *"No Malian Language Left Behind"*
-        This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
-        For more information, visit [MALIBA-AI on Hugging Face](https://huggingface.co/MALIBA-AI).
-        """
-    )
 if __name__ == "__main__":
     demo.launch()

+# import gradio as gr
+# import pandas as pd
+# from datasets import load_dataset
+# from jiwer import wer, cer
+# import os
+# from datetime import datetime
+# import re
+# from huggingface_hub import login
+# # Login to Hugging Face Hub (if token is available)
+# token = os.environ.get("HG_TOKEN")
+# if token:
+#     login(token)
+# try:
+#     dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
+#     references = {row["id"]: row["text"] for row in dataset}
+#     print(f"Loaded {len(references)} reference transcriptions")
+# except Exception as e:
+#     print(f"Error loading dataset: {str(e)}")
+#     references = {}
+# leaderboard_file = "leaderboard.csv"
+# if not os.path.exists(leaderboard_file):
+#     sample_data = [
+#           ["test_1", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
+#          ["test_2", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
+#         ]
+#     pd.DataFrame(sample_data,
+#                  columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
+#     print(f"Created new leaderboard file with sample data")
+# else:
+#     leaderboard_df = pd.read_csv(leaderboard_file)
+#     if "Combined_Score" not in leaderboard_df.columns:
+#         leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
+#         leaderboard_df.to_csv(leaderboard_file, index=False)
+#         print(f"Added Combined_Score column to existing leaderboard")
+#     print(f"Loaded leaderboard with {len(leaderboard_df)} entries")
+# def normalize_text(text):
+#     """Normalize text for WER/CER calculation"""
+#     if not isinstance(text, str):
+#         text = str(text)
+#     text = text.lower()
+#     text = re.sub(r'[^\w\s]', '', text)
+#     text = re.sub(r'\s+', ' ', text).strip()
+#     return text
+# def calculate_metrics(predictions_df):
+#     """Calculate WER and CER for predictions."""
+#     results = []
+#     total_ref_words = 0
+#     total_ref_chars = 0
+#     for _, row in predictions_df.iterrows():
+#         id_val = row["id"]
+#         if id_val not in references:
+#             continue
+#         reference = normalize_text(references[id_val])
+#         hypothesis = normalize_text(row["text"])
+#         if not reference or not hypothesis:
+#             continue
+#         reference_words = reference.split()
+#         hypothesis_words = hypothesis.split()
+#         reference_chars = list(reference)
+#         try:
+#             sample_wer = wer(reference, hypothesis)
+#             sample_cer = cer(reference, hypothesis)
+#             sample_wer = min(sample_wer, 2.0)
+#             sample_cer = min(sample_cer, 2.0)
+#             total_ref_words += len(reference_words)
+#             total_ref_chars += len(reference_chars)
+#             results.append({
+#                 "id": id_val,
+#                 "reference": reference,
+#                 "hypothesis": hypothesis,
+#                 "ref_word_count": len(reference_words),
+#                 "ref_char_count": len(reference_chars),
+#                 "wer": sample_wer,
+#                 "cer": sample_cer
+#             })
+#         except Exception as e:
+#             print(f"Error processing sample {id_val}: {str(e)}")
+#             pass
+#     if not results:
+#         raise ValueError("No valid samples for WER/CER calculation")
+#     avg_wer = sum(item["wer"] for item in results) / len(results)
+#     avg_cer = sum(item["cer"] for item in results) / len(results)
+#     weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
+#     weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
+#     return avg_wer, avg_cer, weighted_wer, weighted_cer, results
+# def format_as_percentage(value):
+#     """Convert decimal to percentage with 2 decimal places"""
+#     return f"{value * 100:.2f}%"
+# def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
+#     """Format leaderboard for display with ranking and percentages"""
+#     if df is None or len(df) == 0:
+#         return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
+#     display_df = df.copy()
+#     display_df = display_df.sort_values(sort_by)
+#     display_df.insert(0, "Rank", range(1, len(display_df) + 1))
+#     for col in ["WER", "CER", "Combined_Score"]:
+#         if col in display_df.columns:
+#             display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")
+#     return display_df
+# def update_ranking(method):
+#     """Update leaderboard ranking based on selected method"""
+#     try:
+#         current_lb = pd.read_csv(leaderboard_file)
+#         if "Combined_Score" not in current_lb.columns:
+#             current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3
+#         sort_column = "Combined_Score"
+#         if method == "WER Only":
+#             sort_column = "WER"
+#         elif method == "CER Only":
+#             sort_column = "CER"
+#         return prepare_leaderboard_for_display(current_lb, sort_column)
+#     except Exception as e:
+#         print(f"Error updating ranking: {str(e)}")
+#         return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
+# def process_submission(model_name, csv_file):
+#     """Process a new model submission"""
+#     if not model_name or not model_name.strip():
+#         return "Error: Please provide a model name.", None
+#     if not csv_file:
+#         return "Error: Please upload a CSV file.", None
+#     try:
+#         df = pd.read_csv(csv_file)
+#         if len(df) == 0:
+#             return "Error: Uploaded CSV is empty.", None
+#         if set(df.columns) != {"id", "text"}:
+#             return f"Error: CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None
+#         if df["id"].duplicated().any():
+#             dup_ids = df[df["id"].duplicated()]["id"].unique()
+#             return f"Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None
+#         missing_ids = set(references.keys()) - set(df["id"])
+#         extra_ids = set(df["id"]) - set(references.keys())
+#         if missing_ids:
+#             return f"Error: Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None
+#         if extra_ids:
+#             return f"Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None
+#         try:
+#             avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
+#             # Check for suspiciously low values
+#             if avg_wer < 0.001:
+#                 return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
+#         except Exception as e:
+#             return f"Error calculating metrics: {str(e)}", None
+#         leaderboard = pd.read_csv(leaderboard_file)
+#         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+#         combined_score = avg_wer * 0.7 + avg_cer * 0.3
+#         if model_name in leaderboard["Model_Name"].values:
+#             idx = leaderboard[leaderboard["Model_Name"] == model_name].index
+#             leaderboard.loc[idx, "WER"] = avg_wer
+#             leaderboard.loc[idx, "CER"] = avg_cer
+#             leaderboard.loc[idx, "Combined_Score"] = combined_score
+#             leaderboard.loc[idx, "timestamp"] = timestamp
+#             updated_leaderboard = leaderboard
+#         else:
+#             new_entry = pd.DataFrame(
+#                 [[model_name, avg_wer, avg_cer, combined_score, timestamp]],
+#                 columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
+#             )
+#             updated_leaderboard = pd.concat([leaderboard, new_entry])
+#         updated_leaderboard = updated_leaderboard.sort_values("Combined_Score")
+#         updated_leaderboard.to_csv(leaderboard_file, index=False)
+#         display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
+#         return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard
+#     except Exception as e:
+#         return f"Error processing submission: {str(e)}", None
+# def get_current_leaderboard():
+#     """Get the current leaderboard data for display"""
+#     try:
+#         if os.path.exists(leaderboard_file):
+#             current_leaderboard = pd.read_csv(leaderboard_file)
+#             if "Combined_Score" not in current_leaderboard.columns:
+#                 current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
+#                 current_leaderboard.to_csv(leaderboard_file, index=False)
+#             return current_leaderboard
+#         else:
+#             return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
+#     except Exception as e:
+#         print(f"Error getting leaderboard: {str(e)}")
+#         return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
+# def create_leaderboard_table():
+#     """Create and format the leaderboard table for display"""
+#     leaderboard_data = get_current_leaderboard()
+#     return prepare_leaderboard_for_display(leaderboard_data)
+# with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
+#     gr.Markdown(
+#         """
+#         # 🇲🇱 Bambara ASR Leaderboard
+#         This leaderboard tracks and evaluates speech recognition models for the Bambara language.
+#         Models are ranked based on Word Error Rate (WER), Character Error Rate (CER), and a combined score.
+#         ## Current Models Performance
+#         """
+#     )
+#     current_data = get_current_leaderboard()
+#     if len(current_data) > 0:
+#         best_model = current_data.sort_values("Combined_Score").iloc[0]
+#         gr.Markdown(f"""
+#         ### 🏆 Current Best Model: **{best_model['Model_Name']}**
+#         * WER: **{best_model['WER']*100:.2f}%**
+#         * CER: **{best_model['CER']*100:.2f}%**
+#         * Combined Score: **{best_model['Combined_Score']*100:.2f}%**
+#         """)
+#     with gr.Tabs() as tabs:
+#         with gr.TabItem("🏅 Model Rankings"):
+#             initial_leaderboard = create_leaderboard_table()
+#             ranking_method = gr.Radio(
+#                 ["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
+#                 label="Ranking Method",
+#                 value="Combined Score (WER 70%, CER 30%)"
+#             )
+#             leaderboard_view = gr.DataFrame(
+#                 value=initial_leaderboard,
+#                 interactive=False,
+#                 label="Models are ranked by selected metric - lower is better"
+#             )
+#             ranking_method.change(
+#                 fn=update_ranking,
+#                 inputs=[ranking_method],
+#                 outputs=[leaderboard_view]
+#             )
+#             with gr.Accordion("Metrics Explanation", open=False):
+#                 gr.Markdown(
+#                     """
+#                     ## Understanding ASR Metrics
+#                     ### Word Error Rate (WER)
+#                     WER measures how accurately the ASR system recognizes whole words:
+#                     * Lower values indicate better performance
+#                     * Calculated as: (Substitutions + Insertions + Deletions) / Total Words
+#                     * A WER of 0% means perfect transcription
+#                     * A WER of 20% means approximately 1 in 5 words contains an error
+#                     ### Character Error Rate (CER)
+#                     CER measures accuracy at the character level:
+#                     * More fine-grained than WER
+#                     * Better at capturing partial word matches
+#                     * Particularly useful for agglutinative languages like Bambara
+#                     ### Combined Score
+#                     * Weighted average: 70% WER + 30% CER
+#                     * Provides a balanced evaluation of model performance
+#                     * Used as the primary ranking metric
+#                     """
+#                 )
+#         with gr.TabItem("📊 Submit New Results"):
+#             gr.Markdown(
+#                 """
+#                 ### Submit a new model for evaluation
+#                 Upload a CSV file with the following format:
+#                 * Must contain exactly two columns: 'id' and 'text'
+#                 * The 'id' column should match the reference dataset IDs
+#                 * The 'text' column should contain your model's transcriptions
+#                 """
+#             )
+#             with gr.Row():
+#                 model_name_input = gr.Textbox(
+#                     label="Model Name",
+#                     placeholder="e.g., MALIBA-AI/bambara-asr"
+#                 )
+#                 gr.Markdown("*Use a descriptive name to identify your model*")
+#             with gr.Row():
+#                 csv_upload = gr.File(
+#                     label="Upload CSV File",
+#                     file_types=[".csv"]
+#                 )
+#                 gr.Markdown("*CSV with columns: id, text*")
+#             submit_btn = gr.Button("Submit", variant="primary")
+#             output_msg = gr.Textbox(label="Status", interactive=False)
+#             leaderboard_display = gr.DataFrame(
+#                 label="Updated Leaderboard",
+#                 value=initial_leaderboard,
+#                 interactive=False
+#             )
+#             submit_btn.click(
+#                 fn=process_submission,
+#                 inputs=[model_name_input, csv_upload],
+#                 outputs=[output_msg, leaderboard_display]
+#             )
+#         with gr.TabItem("📝 Benchmark Dataset"):
+#             gr.Markdown(
+#                 """
+#                 ## About the Benchmark Dataset
+#                 This leaderboard uses the **[sudoping01/bambara-speech-recognition-benchmark](https://huggingface.co/datasets/MALIBA-AI/bambara-speech-recognition-leaderboard)** dataset:
+#                 * Contains diverse Bambara speech samples
+#                 * Includes various speakers, accents, and dialects
+#                 * Covers different speech styles and recording conditions
+#                 * Transcribed and validated
+#                 ### How to Generate Predictions
+#                 To submit results to this leaderboard:
+#                 1. Download the audio files from the benchmark dataset
+#                 2. Run your ASR model on the audio files
+#                 3. Generate a CSV file with 'id' and 'text' columns
+#                 4. Submit your results using the form in the "Submit New Results" tab
+#                 ### Evaluation Guidelines
+#                 * Text is normalized (lowercase, punctuation removed) before metrics calculation
+#                 * Extreme outliers are capped to prevent skewing results
+#                 * All submissions are validated for format and completeness
+#                 NB: This work is a collaboration between MALIBA-AI, RobotsMali AI4D-LAB and Djelia
+#                 """
+#             )
+#     gr.Markdown(
+#         """
+#         ---
+#         ### About MALIBA-AI
+#         **MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation**
+#         *"No Malian Language Left Behind"*
+#         This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
+#         For more information, visit [MALIBA-AI on Hugging Face](https://huggingface.co/MALIBA-AI).
+#         """
+#     )
+# if __name__ == "__main__":
+#     demo.launch()
 import gradio as gr
 import pandas as pd
 from datasets import load_dataset
 import os
 from datetime import datetime
 import re
+import plotly.express as px
+import plotly.graph_objects as go
 from huggingface_hub import login
+import numpy as np
+# Custom CSS inspired by Sahara leaderboard
+custom_head_html = """
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Rubik:wght@400;600&display=swap" rel="stylesheet">
+"""
+# Header with MALIBA-AI branding
+new_header_html = """
+<center>
+    <br><br>
+    <div style="display: flex; align-items: center; justify-content: center; gap: 20px; margin-bottom: 20px;">
+        <div style="font-size: 4em;">🇲🇱</div>
+        <div>
+            <h1 style="margin: 0; font-family: 'Rubik', sans-serif; color: #2f3b7d; font-size: 2.5em; font-weight: 700;">
+                Bambara ASR Leaderboard
+            </h1>
+            <p style="margin: 5px 0 0 0; font-size: 1.2em; color: #7d3561; font-weight: 600;">
+                Powered by MALIBA-AI • "No Malian Language Left Behind"
+            </p>
+        </div>
+        <div style="font-size: 4em;">🎙️</div>
+    </div>
+</center>
+"""
+# Advanced CSS styling inspired by Sahara
+sahara_style_css = """
+/* Global Styles */
+div[class*="gradio-container"] {
+    background: #FFFBF5 !important;
+    color: #000 !important;
+    font-family: 'Inter', sans-serif !important;
+}
+div.svelte-1nguped {
+    background: white !important;
+}
+.fillable.svelte-15jxnnn.svelte-15jxnnn:not(.fill_width) {
+    max-width: 1580px !important;
+}
+/* Navigation Buttons */
+.nav-button {
+    background-color: #117b75 !important;
+    color: #fff !important;
+    font-weight: bold !important;
+    border-radius: 8px !important;
+    border: none !important;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
+    transition: all 0.3s ease !important;
+}
+.nav-button:hover {
+    background-color: #0f6b66 !important;
+    color: #e8850e !important;
+    transform: translateY(-1px) !important;
+    box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
+}
+/* Content Cards */
+.content-section {
+    padding: 40px 0;
+}
+.content-card {
+    background-color: #fff !important;
+    border-radius: 16px !important;
+    box-shadow: 0 10px 25px -5px rgba(0,0,0,0.1), 0 8px 10px -6px rgba(0,0,0,0.1) !important;
+    padding: 40px !important;
+    margin-bottom: 30px !important;
+    border: 1px solid rgba(0,0,0,0.05) !important;
+}
+/* Typography */
+.content-card h2 {
+    font-family: "Rubik", sans-serif !important;
+    font-size: 32px !important;
+    font-weight: 700 !important;
+    line-height: 1.25 !important;
+    letter-spacing: -1px !important;
+    color: #2f3b7d !important;
+    margin-bottom: 20px !important;
+    text-align: center !important;
+}
+.content-card h3 {
+    font-size: 22px !important;
+    color: #2f3b7d !important;
+    font-weight: 600 !important;
+    margin-bottom: 15px !important;
+}
+.content-card h4 {
+    font-family: "Rubik", sans-serif !important;
+    color: #7d3561 !important;
+    font-weight: 600 !important;
+    margin-bottom: 10px !important;
+}
+.title {
+    color: #7d3561 !important;
+    font-weight: 600 !important;
+}
+/* Tab Styling */
+.tab-wrapper.svelte-1tcem6n.svelte-1tcem6n {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    position: relative;
+    height: auto !important;
+    padding-bottom: 0 !important;
+}
+.selected.svelte-1tcem6n.svelte-1tcem6n {
+    background-color: #7d3561 !important;
+    color: #fff !important;
+    border-radius: 8px 8px 0 0 !important;
+}
+button.svelte-1tcem6n.svelte-1tcem6n {
+    color: #7d3561 !important;
+    font-weight: 600 !important;
+    font-size: 16px !important;
+    padding: 12px 20px !important;
+    background-color: #fff !important;
+    border-radius: 8px 8px 0 0 !important;
+    border: 2px solid #e9ecef !important;
+    border-bottom: none !important;
+    transition: all 0.3s ease !important;
+}
+button.svelte-1tcem6n.svelte-1tcem6n:hover {
+    background-color: #f8f9fa !important;
+    border-color: #7d3561 !important;
+}
+.tab-container.svelte-1tcem6n.svelte-1tcem6n:after {
+    content: "";
+    position: absolute;
+    bottom: 0;
+    left: 0;
+    right: 0;
+    height: 3px;
+    background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important;
+}
+/* Table Styling */
+div[class*="gradio-container"] .prose table {
+    color: #000 !important;
+    border: 2px solid #dca02a !important;
+    border-radius: 12px !important;
+    margin-bottom: 20px !important;
+    margin-left: auto !important;
+    margin-right: auto !important;
+    width: 100% !important;
+    border-collapse: separate !important;
+    border-spacing: 0 !important;
+    overflow: hidden !important;
+    box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important;
+}
+div[class*="gradio-container"] .prose thead tr {
+    background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important;
+}
+div[class*="gradio-container"] .prose th {
+    color: #fff !important;
+    font-weight: 700 !important;
+    font-size: 14px !important;
+    padding: 15px 10px !important;
+    text-align: center !important;
+    border: none !important;
+}
+div[class*="gradio-container"] .prose td {
+    font-size: 14px !important;
+    padding: 12px 10px !important;
+    border: none !important;
+    text-align: center !important;
+    color: #000 !important;
+    border-bottom: 1px solid #f8f9fa !important;
+}
+div[class*="gradio-container"] .prose tbody tr:nth-child(even) {
+    background-color: #f8f9fa !important;
+}
+div[class*="gradio-container"] .prose tbody tr:hover {
+    background-color: #e3f2fd !important;
+    transition: background-color 0.2s ease !important;
+}
+/* First column (model names) styling */
+div[class*="gradio-container"] .prose th:first-child,
+div[class*="gradio-container"] .prose td:first-child {
+    text-align: left !important;
+    min-width: 250px !important;
+    font-weight: 600 !important;
+}
+/* Performance badges */
+.performance-badge {
+    display: inline-block;
+    padding: 4px 8px;
+    border-radius: 12px;
+    font-size: 12px;
+    font-weight: 600;
+    margin-left: 8px;
+}
+.badge-excellent {
+    background: #d4edda;
+    color: #155724;
+}
+.badge-good {
+    background: #fff3cd;
+    color: #856404;
+}
+.badge-fair {
+    background: #f8d7da;
+    color: #721c24;
+}
+/* Stats cards */
+.stats-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+    gap: 20px;
+    margin: 20px 0;
+}
+.stat-card {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    padding: 20px;
+    border-radius: 12px;
+    text-align: center;
+    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+}
+.stat-number {
+    font-size: 2em;
+    font-weight: 700;
+    margin-bottom: 5px;
+}
+.stat-label {
+    font-size: 0.9em;
+    opacity: 0.9;
+}
+/* Form styling */
+.form-section {
+    background: #f8f9fa;
+    border-radius: 12px;
+    padding: 25px;
+    margin: 20px 0;
+    border-left: 4px solid #7d3561;
+}
+/* Citation block */
+.citation-block {
+    background-color: #FDF6E3 !important;
+    border-radius: 12px !important;
+    padding: 25px !important;
+    border-left: 4px solid #D97706 !important;
+    margin: 20px 0 !important;
+}
+/* Dropdown styling */
+.gradio-dropdown {
+    border-radius: 8px !important;
+    border: 2px solid #e9ecef !important;
+}
+.gradio-dropdown:focus {
+    border-color: #7d3561 !important;
+    box-shadow: 0 0 0 3px rgba(125, 53, 97, 0.1) !important;
+}
+/* Button styling */
+.gradio-button {
+    border-radius: 8px !important;
+    font-weight: 600 !important;
+    transition: all 0.3s ease !important;
+}
+.gradio-button.primary {
+    background: linear-gradient(135deg, #7d3561 0%, #2f3b7d 100%) !important;
+    border: none !important;
+    color: white !important;
+}
+.gradio-button.primary:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 4px 12px rgba(125, 53, 97, 0.3) !important;
+}
+/* Responsive design */
+@media (max-width: 768px) {
+    .content-card {
+        padding: 20px !important;
+        margin-bottom: 20px !important;
+    }
+    .content-card h2 {
+        font-size: 24px !important;
+    }
+    .stats-grid {
+        grid-template-columns: 1fr !important;
+    }
+}
+"""
 # Login to Hugging Face Hub (if token is available)
 token = os.environ.get("HG_TOKEN")
 if token:
     login(token)
+# Load dataset
 try:
     dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
     references = {row["id"]: row["text"] for row in dataset}
     print(f"Error loading dataset: {str(e)}")
     references = {}
+# Initialize leaderboard
 leaderboard_file = "leaderboard.csv"
 if not os.path.exists(leaderboard_file):
     sample_data = [
+        ["MALIBA-AI/bambara-whisper-small", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Whisper-based", "Mali", "ASR"],
+        ["OpenAI/whisper-base", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Foundation", "USA", "ASR"],
+    ]
     pd.DataFrame(sample_data,
+                 columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]).to_csv(leaderboard_file, index=False)
     print(f"Created new leaderboard file with sample data")
 else:
     leaderboard_df = pd.read_csv(leaderboard_file)
+    # Add new columns if they don't exist
+    required_columns = ["Combined_Score", "Type", "Origin", "Task"]
+    for col in required_columns:
+        if col not in leaderboard_df.columns:
+            if col == "Combined_Score":
+                leaderboard_df[col] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
+            else:
+                default_val = "Unknown" if col != "Task" else "ASR"
+                leaderboard_df[col] = default_val
+    leaderboard_df.to_csv(leaderboard_file, index=False)
     print(f"Loaded leaderboard with {len(leaderboard_df)} entries")
 def normalize_text(text):
     avg_wer = sum(item["wer"] for item in results) / len(results)
     avg_cer = sum(item["cer"] for item in results) / len(results)
     weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
     weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars
     """Convert decimal to percentage with 2 decimal places"""
     return f"{value * 100:.2f}%"
+def get_performance_badge(score):
+    """Get performance badge based on score"""
+    if score < 0.15:
+        return "🏆 Excellent"
+    elif score < 0.30:
+        return "🥉 Good"
+    else:
+        return "📈 Fair"
+def add_medals_to_models(df, score_col="Combined_Score"):
+    """Add medals to top-performing models"""
+    if df.empty or score_col not in df.columns:
+        return df
+    df_copy = df.copy()
+    # Convert score to float for sorting
+    df_copy[f"{score_col}_float"] = pd.to_numeric(df_copy[score_col], errors='coerce')
+    # Sort by score (ascending - lower is better for error rates)
+    df_copy = df_copy.sort_values(by=f"{score_col}_float", ascending=True, na_position='last').reset_index(drop=True)
+    # Get unique scores for ranking
+    valid_scores = df_copy[f"{score_col}_float"].dropna().unique()
+    valid_scores.sort()
+    # Assign medals
+    medals = ["🏆", "🥈", "🥉"]
+    def get_medal(score):
+        if pd.isna(score):
+            return ""
+        rank = np.where(valid_scores == score)[0]
+        if len(rank) > 0 and rank[0] < len(medals):
+            return medals[rank[0]] + " "
+        return ""
+    df_copy["Medal"] = df_copy[f"{score_col}_float"].apply(get_medal)
+    df_copy["Model_Name"] = df_copy["Medal"] + df_copy["Model_Name"].astype(str)
+    # Clean up temporary columns
+    df_copy = df_copy.drop(columns=[f"{score_col}_float", "Medal"])
+    return df_copy
 def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
     """Format leaderboard for display with ranking and percentages"""
     if df is None or len(df) == 0:
+        return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"])
     display_df = df.copy()
+    # Add medals first
+    display_df = add_medals_to_models(display_df, sort_by)
+    # Sort by the specified column
+    display_df[f"{sort_by}_float"] = pd.to_numeric(display_df[sort_by], errors='coerce')
+    display_df = display_df.sort_values(f"{sort_by}_float", ascending=True, na_position='last')
+    # Add rank
     display_df.insert(0, "Rank", range(1, len(display_df) + 1))
+    # Format percentages
     for col in ["WER", "CER", "Combined_Score"]:
         if col in display_df.columns:
+            display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}" if pd.notna(x) else "---")
+    # Add performance badges
+    display_df["Performance"] = display_df["Combined_Score"].apply(lambda x: get_performance_badge(x) if pd.notna(x) else "---")
+    # Shorten model names for display
+    display_df["Model"] = display_df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in str(x) else str(x))
+    # Format date
+    if "timestamp" in display_df.columns:
+        display_df["Date"] = pd.to_datetime(display_df["timestamp"], errors='coerce').dt.strftime("%Y-%m-%d")
+    else:
+        display_df["Date"] = "---"
+    # Select and reorder columns
+    display_columns = ["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"]
+    available_columns = [col for col in display_columns if col in display_df.columns]
+    # Clean up temporary columns
+    temp_cols = [col for col in display_df.columns if col.endswith("_float")]
+    display_df = display_df.drop(columns=temp_cols, errors='ignore')
+    return display_df[available_columns]
+def create_performance_chart():
+    """Create performance visualization chart"""
+    try:
+        df = pd.read_csv(leaderboard_file)
+        if len(df) == 0:
+            return None
+        # Sort by Combined_Score
+        df = df.sort_values("Combined_Score")
+        fig = go.Figure()
+        # Add WER bars
+        fig.add_trace(go.Bar(
+            name="WER",
+            x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
+            y=df["WER"] * 100,
+            marker_color='#ff7f0e',
+            hovertemplate='<b>%{x}</b><br>WER: %{y:.2f}%<extra></extra>'
+        ))
+        # Add CER bars
+        fig.add_trace(go.Bar(
+            name="CER",
+            x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
+            y=df["CER"] * 100,
+            marker_color='#2ca02c',
+            hovertemplate='<b>%{x}</b><br>CER: %{y:.2f}%<extra></extra>'
+        ))
+        # Add Combined Score line
+        fig.add_trace(go.Scatter(
+            name="Combined Score",
+            x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
+            y=df["Combined_Score"] * 100,
+            mode='lines+markers',
+            line=dict(color='#d62728', width=3),
+            marker=dict(size=8),
+            hovertemplate='<b>%{x}</b><br>Combined Score: %{y:.2f}%<extra></extra>'
+        ))
+        fig.update_layout(
+            title={
+                'text': "📊 Model Performance Comparison",
+                'x': 0.5,
+                'font': {'size': 18, 'family': 'Rubik'}
+            },
+            xaxis_title="Model",
+            yaxis_title="Error Rate (%)",
+            hovermode='x unified',
+            height=500,
+            showlegend=True,
+            plot_bgcolor='rgba(0,0,0,0)',
+            paper_bgcolor='rgba(0,0,0,0)',
+            font=dict(family="Inter", size=12),
+            legend=dict(
+                orientation="h",
+                yanchor="bottom",
+                y=1.02,
+                xanchor="right",
+                x=1
+            )
+        )
+        return fig
+    except Exception as e:
+        print(f"Error creating chart: {str(e)}")
+        return None
+def get_leaderboard_stats():
+    """Get summary statistics for the leaderboard"""
+    try:
+        df = pd.read_csv(leaderboard_file)
+        if len(df) == 0:
+            return """
+            <div class="stats-grid">
+                <div class="stat-card">
+                    <div class="stat-number">0</div>
+                    <div class="stat-label">Models Submitted</div>
+                </div>
+            </div>
+            """
+        best_model = df.loc[df["Combined_Score"].idxmin()]
+        total_models = len(df)
+        avg_wer = df["WER"].mean()
+        avg_cer = df["CER"].mean()
+        return f"""
+        <div class="stats-grid">
+            <div class="stat-card">
+                <div class="stat-number">{total_models}</div>
+                <div class="stat-label">Models Evaluated</div>
+            </div>
+            <div class="stat-card">
+                <div class="stat-number">{format_as_percentage(best_model['Combined_Score'])}</div>
+                <div class="stat-label">Best Combined Score</div>
+            </div>
+            <div class="stat-card">
+                <div class="stat-number">{format_as_percentage(avg_wer)}</div>
+                <div class="stat-label">Average WER</div>
+            </div>
+            <div class="stat-card">
+                <div class="stat-number">{format_as_percentage(avg_cer)}</div>
+                <div class="stat-label">Average CER</div>
+            </div>
+        </div>
+        <div style="text-align: center; margin-top: 20px;">
+            <h4>🏆 Current Champion: {best_model['Model_Name']}</h4>
+        </div>
+        """
+    except Exception as e:
+        return f"<p>Error loading stats: {str(e)}</p>"
 def update_ranking(method):
     """Update leaderboard ranking based on selected method"""
     except Exception as e:
         print(f"Error updating ranking: {str(e)}")
+        return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"])
+def compare_models(model_1_name, model_2_name):
+    """Compare two models performance"""
+    try:
+        df = pd.read_csv(leaderboard_file)
+        if model_1_name == model_2_name:
+            return pd.DataFrame([{"Info": "Please select two different models to compare."}])
+        model_1 = df[df["Model_Name"] == model_1_name]
+        model_2 = df[df["Model_Name"] == model_2_name]
+        if model_1.empty or model_2.empty:
+            return pd.DataFrame([{"Info": "One or both models not found in leaderboard."}])
+        m1 = model_1.iloc[0]
+        m2 = model_2.iloc[0]
+        comparison_data = {
+            "Metric": ["WER", "CER", "Combined Score"],
+            model_1_name.split("/")[-1]: [
+                f"{m1['WER']*100:.2f}%",
+                f"{m1['CER']*100:.2f}%",
+                f"{m1['Combined_Score']*100:.2f}%"
+            ],
+            model_2_name.split("/")[-1]: [
+                f"{m2['WER']*100:.2f}%",
+                f"{m2['CER']*100:.2f}%",
+                f"{m2['Combined_Score']*100:.2f}%"
+            ],
+            "Difference": [
+                f"{(m1['WER'] - m2['WER'])*100:+.2f}%",
+                f"{(m1['CER'] - m2['CER'])*100:+.2f}%",
+                f"{(m1['Combined_Score'] - m2['Combined_Score'])*100:+.2f}%"
+            ]
+        }
+        return pd.DataFrame(comparison_data)
+    except Exception as e:
+        return pd.DataFrame([{"Error": f"Error comparing models: {str(e)}"}])
+def process_submission(model_name, csv_file, model_type, origin_country):
+    """Process a new model submission with enhanced metadata"""
     if not model_name or not model_name.strip():
+        return "❌ **Error:** Please provide a model name.", None, None
     if not csv_file:
+        return "❌ **Error:** Please upload a CSV file.", None, None
     try:
         df = pd.read_csv(csv_file)
         if len(df) == 0:
+            return "❌ **Error:** Uploaded CSV is empty.", None, None
         if set(df.columns) != {"id", "text"}:
+            return f"❌ **Error:** CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None, None
         if df["id"].duplicated().any():
             dup_ids = df[df["id"].duplicated()]["id"].unique()
+            return f"❌ **Error:** Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None, None
         missing_ids = set(references.keys()) - set(df["id"])
         extra_ids = set(df["id"]) - set(references.keys())
         if missing_ids:
+            return f"❌ **Error:** Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None, None
         if extra_ids:
+            return f"❌ **Error:** Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None, None
         try:
             avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
             if avg_wer < 0.001:
+                return "❌ **Error:** WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None, None
         except Exception as e:
+            return f"❌ **Error calculating metrics:** {str(e)}", None, None
+        # Update leaderboard
         leaderboard = pd.read_csv(leaderboard_file)
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         combined_score = avg_wer * 0.7 + avg_cer * 0.3
         if model_name in leaderboard["Model_Name"].values:
             leaderboard.loc[idx, "CER"] = avg_cer
             leaderboard.loc[idx, "Combined_Score"] = combined_score
             leaderboard.loc[idx, "timestamp"] = timestamp
+            leaderboard.loc[idx, "Type"] = model_type
+            leaderboard.loc[idx, "Origin"] = origin_country
             updated_leaderboard = leaderboard
         else:
             new_entry = pd.DataFrame(
+                [[model_name, avg_wer, avg_cer, combined_score, timestamp, model_type, origin_country, "ASR"]],
+                columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]
             )
             updated_leaderboard = pd.concat([leaderboard, new_entry])
         updated_leaderboard.to_csv(leaderboard_file, index=False)
         display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
+        chart = create_performance_chart()
+        badge = get_performance_badge(combined_score)
+        success_msg = f"""
+        ✅ **Submission processed successfully!**
+        **{model_name}** ({model_type} from {origin_country})
+        - **WER:** {format_as_percentage(avg_wer)}
+        - **CER:** {format_as_percentage(avg_cer)}
+        - **Combined Score:** {format_as_percentage(combined_score)}
+        - **Performance:** {badge}
+        """
+        return success_msg, display_leaderboard, chart
     except Exception as e:
+        return f"❌ **Error processing submission:** {str(e)}", None, None
 def get_current_leaderboard():
     """Get the current leaderboard data for display"""
         if os.path.exists(leaderboard_file):
             current_leaderboard = pd.read_csv(leaderboard_file)
+            # Ensure all required columns exist
+            required_columns = ["Combined_Score", "Type", "Origin", "Task"]
+            for col in required_columns:
+                if col not in current_leaderboard.columns:
+                    if col == "Combined_Score":
+                        current_leaderboard[col] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
+                    else:
+                        current_leaderboard[col] = "Unknown" if col != "Task" else "ASR"
+            current_leaderboard.to_csv(leaderboard_file, index=False)
             return current_leaderboard
         else:
+            return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"])
     except Exception as e:
         print(f"Error getting leaderboard: {str(e)}")
+        return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"])
 def create_leaderboard_table():
     """Create and format the leaderboard table for display"""
     leaderboard_data = get_current_leaderboard()
     return prepare_leaderboard_for_display(leaderboard_data)
+def df_to_html(df):
+    """Convert DataFrame to HTML with custom styling"""
+    if df.empty:
+        return "<p style='text-align: center; color: #666;'>No data available</p>"
+    # Convert DataFrame to HTML
+    html = df.to_html(index=False, escape=False, classes="leaderboard-table")
+    # Add custom styling
+    html = html.replace('<table class="leaderboard-table"',
+                       '<table class="leaderboard-table" style="width: 100%; margin: 0 auto;"')
+    return html
+# Main Gradio Interface
+with gr.Blocks(
+    title="🇲🇱 Bambara ASR Leaderboard | MALIBA-AI",
+    css=sahara_style_css,
+    head=custom_head_html,
+    theme=gr.themes.Soft()
+) as demo:
+    # Header Section
+    gr.HTML(new_header_html)
+    # Navigation Buttons
+    with gr.Row():
+        gr.Button("🌐 MALIBA-AI Website", link="https://maliba-ai.org/", elem_classes=['nav-button'])
+        gr.Button("📊 HF Dataset Repo", link="https://huggingface.co/datasets/sudoping01/bambara-speech-recognition-benchmark", elem_classes=['nav-button'])
+        gr.Button("🤗 MALIBA-AI Hub", link="https://huggingface.co/MALIBA-AI", elem_classes=['nav-button'])
+        gr.Button("📚 Documentation", link="https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard", elem_classes=['nav-button'])
+    with gr.Group(elem_classes="content-card"):
+        # Stats display
+        stats_html = gr.HTML(get_leaderboard_stats())
+        with gr.Tabs() as tabs:
+            with gr.TabItem("🏅 Main Leaderboard", id="main"):
+                gr.HTML("<h2>Main Leaderboard</h2>")
+                initial_leaderboard = create_leaderboard_table()
+                with gr.Row():
+                    ranking_method = gr.Radio(
+                        ["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
+                        label="🔄 Ranking Method",
+                        value="Combined Score (WER 70%, CER 30%)",
+                        info="Choose how to rank the models"
+                    )
+                leaderboard_view = gr.DataFrame(
+                    value=initial_leaderboard,
+                    interactive=False,
+                    label="📋 Leaderboard Rankings - Lower scores indicate better performance",
+                    wrap=True,
+                    height=400
+                )
+                # Performance chart
+                gr.Markdown("### 📊 Visual Performance Comparison")
+                performance_chart = gr.Plot(
+                    value=create_performance_chart(),
+                    label="Model Performance Visualization"
+                )
+                ranking_method.change(
+                    fn=update_ranking,
+                    inputs=[ranking_method],
+                    outputs=[leaderboard_view]
+                )
+                with gr.Accordion("📖 Understanding ASR Metrics", open=False):
+                    gr.Markdown("""
+                    ## 🎯 Automatic Speech Recognition Evaluation Metrics
                     ### Word Error Rate (WER)
+                    **WER** measures transcription accuracy at the word level:
+                    - **Formula:** `(Substitutions + Insertions + Deletions) / Total Reference Words`
+                    - **Range:** 0% (perfect) to 100%+ (very poor)
+                    - **Interpretation:**
+                      - 0-5%: 🏆 Excellent performance
+                      - 5-15%: 🥉 Good performance
+                      - 15-30%: 📈 Fair performance
+                      - 30%+: Poor performance
                     ### Character Error Rate (CER)
+                    **CER** measures transcription accuracy at the character level:
+                    - **Advantage:** More granular than WER, captures partial matches
+                    - **Benefit for Bambara:** Particularly valuable for agglutinative languages
+                    - **Typical Range:** Usually lower than WER values
+                    ### Combined Score (Primary Ranking Metric)
+                    **Formula:** `Combined Score = 0.7 × WER + 0.3 × CER`
+                    - **Rationale:** Balanced evaluation emphasizing word-level accuracy
+                    - **Usage:** Primary metric for model ranking
+                    ### 🎯 Performance Categories
+                    - 🏆 **Excellent**: < 15% Combined Score
+                    - 🥉 **Good**: 15-30% Combined Score
+                    - 📈 **Fair**: > 30% Combined Score
+                    """)
+            with gr.TabItem("📤 Submit New Model", id="submit"):
+                gr.HTML("<h2>Submit Your Bambara ASR Model</h2>")
+                gr.Markdown("""
+                ### 🚀 Ready to benchmark your model? Submit your results and join the leaderboard!
+                Follow these steps to submit your Bambara ASR model for evaluation.
+                """)
+                with gr.Group(elem_classes="form-section"):
+                    with gr.Row():
+                        with gr.Column(scale=2):
+                            model_name_input = gr.Textbox(
+                                label="🤖 Model Name",
+                                placeholder="e.g., MALIBA-AI/bambara-whisper-large",
+                                info="Use a descriptive name (organization/model format preferred)"
+                            )
+                            model_type = gr.Dropdown(
+                                label="🏷️ Model Type",
+                                choices=["Whisper-based", "Wav2Vec2", "Foundation", "Custom", "Fine-tuned", "Multilingual", "Other"],
+                                value="Custom",
+                                info="Select the type/architecture of your model"
+                            )
+                            origin_country = gr.Dropdown(
+                                label="🌍 Origin/Institution",
+                                choices=["Mali", "Senegal", "Burkina Faso", "Niger", "Guinea", "Ivory Coast", "USA", "France", "Canada", "UK", "Other"],
+                                value="Mali",
+                                info="Country or region of the developing institution"
+                            )
+                        with gr.Column(scale=1):
+                            gr.Markdown("""
+                            #### 📋 Submission Requirements
+                            **CSV Format:**
+                            - Columns: `id`, `text`
+                            - Match all reference dataset IDs
+                            - No duplicate IDs
+                            - Text transcriptions in Bambara
+                            **Data Quality:**
+                            - Clean, normalized text
+                            - Consistent formatting
+                            - Complete coverage of test set
+                            """)
+                csv_upload = gr.File(
+                    label="📁 Upload Predictions CSV",
+                    file_types=[".csv"],
+                    info="Upload your model's transcriptions in the required CSV format"
                 )
+                submit_btn = gr.Button("🚀 Submit Model", variant="primary", size="lg", elem_classes=['gradio-button', 'primary'])
+                output_msg = gr.Markdown(label="📢 Submission Status")
+                with gr.Row():
+                    leaderboard_display = gr.DataFrame(
+                        label="📊 Updated Leaderboard",
+                        value=initial_leaderboard,
+                        interactive=False,
+                        wrap=True,
+                        height=400
+                    )
+                    updated_chart = gr.Plot(
+                        label="📈 Updated Performance Chart"
+                    )
+                submit_btn.click(
+                    fn=process_submission,
+                    inputs=[model_name_input, csv_upload, model_type, origin_country],
+                    outputs=[output_msg, leaderboard_display, updated_chart]
                 )
+            with gr.TabItem("🔍 Compare Models", id="compare"):
+                gr.HTML("<h2>Compare Two Models</h2>")
+                gr.Markdown("### Select two models to compare their performance side-by-side")
+                with gr.Row():
+                    current_data = get_current_leaderboard()
+                    model_names = current_data["Model_Name"].tolist() if not current_data.empty else []
+                    model_1_dropdown = gr.Dropdown(
+                        choices=model_names,
+                        label="🤖 Model 1",
+                        info="Select the first model for comparison"
+                    )
+                    model_2_dropdown = gr.Dropdown(
+                        choices=model_names,
+                        label="🤖 Model 2",
+                        info="Select the second model for comparison"
+                    )
+                compare_btn = gr.Button("⚡ Compare Models", variant="primary", elem_classes=['gradio-button', 'primary'])
+                comparison_note = gr.Markdown("""
+                **Note on Comparison Results:**
+                - Positive difference values (🟢) indicate Model 1 performed better
+                - Negative difference values (🔴) indicate Model 2 performed better
+                - Lower error rates indicate better performance
+                """, visible=False)
+                comparison_output = gr.DataFrame(
+                    label="📊 Model Comparison Results",
+                    value=pd.DataFrame([{"Info": "Select two models and click Compare to see the results."}]),
+                    interactive=False
+                )
+                def update_comparison_table(m1, m2):
+                    if not m1 or not m2:
+                        return gr.update(visible=False), pd.DataFrame([{"Info": "Please select both models before clicking Compare."}])
+                    if m1 == m2:
+                        return gr.update(visible=False), pd.DataFrame([{"Info": "Please select two different models to compare."}])
+                    df = compare_models(m1, m2)
+                    return gr.update(visible=True), df
+                compare_btn.click(
+                    fn=update_comparison_table,
+                    inputs=[model_1_dropdown, model_2_dropdown],
+                    outputs=[comparison_note, comparison_output]
                 )
+            with gr.TabItem("📊 Dataset & Methodology", id="dataset"):
+                gr.HTML("<h2>Dataset & Methodology</h2>")
+                gr.Markdown("""
+                ## 🎯 About the Bambara Speech Recognition Benchmark
+                ### 📈 Dataset Overview
+                Our benchmark is built on the **`sudoping01/bambara-speech-recognition-benchmark`** dataset, featuring:
+                - **🎙️ Diverse Audio Samples:** Various speakers, dialects, and recording conditions
+                - **🗣️ Speaker Variety:** Multiple native Bambara speakers from different regions
+                - **🎵 Acoustic Diversity:** Different recording environments and quality levels
+                - **✅ Quality Assurance:** Manually validated transcriptions
+                - **📚 Content Variety:** Multiple domains and speaking styles
+                ### 🔬 Evaluation Methodology
+                #### Text Normalization Process
+                1. **Lowercase conversion** for consistency
+                2. **Punctuation removal** to focus on linguistic content
+                3. **Whitespace normalization** for standardized formatting
+                4. **Unicode normalization** for proper character handling
+                #### Quality Controls
+                - **Outlier Detection:** Extreme error rates are capped to prevent skewing
+                - **Data Validation:** Comprehensive format and completeness checks
+                - **Duplicate Prevention:** Automatic detection of duplicate submissions
+                - **Missing Data Handling:** Identification of incomplete submissions
+                ### 🚀 How to Participate
+                #### Step 1: Access the Dataset
+                ```python
+                from datasets import load_dataset
+                dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark")
+                ```
+                #### Step 2: Generate Predictions
+                - Process the audio files with your ASR model
+                - Generate transcriptions for each audio sample
+                - Ensure your model outputs text in Bambara language
+                #### Step 3: Format Results
+                Create a CSV file with exactly these columns:
+                - **`id`**: Sample identifier (must match dataset IDs)
+                - **`text`**: Your model's transcription
+                #### Step 4: Submit & Evaluate
+                - Upload your CSV using the submission form
+                - Your model will be automatically evaluated
+                - Results appear on the leaderboard immediately
+                ### 🏆 Recognition & Impact
+                **Top-performing models will be:**
+                - Featured prominently on our leaderboard
+                - Highlighted in MALIBA-AI communications
+                - Considered for inclusion in production systems
+                - Invited to present at community events
+                ### 🤝 Community Guidelines
+                - **Reproducibility:** Please provide model details and methodology
+                - **Fair Play:** No data leakage or unfair advantages
+                - **Collaboration:** Share insights and learnings with the community
+                - **Attribution:** Properly cite the benchmark in publications
+                ### 📚 Technical Specifications
+                | Aspect | Details |
+                |--------|---------|
+                | **Audio Format** | WAV, various sample rates |
+                | **Language** | Bambara (bam) |
+                | **Evaluation Metrics** | WER, CER, Combined Score |
+                | **Text Encoding** | UTF-8 |
+                | **Submission Format** | CSV with id, text columns |
+                """)
+    # Citation and Footer
+    with gr.Group(elem_classes="content-card"):
+        gr.HTML("""
+        <div class="citation-block">
+            <h2>📚 Citation</h2>
+            <p>If you use the Bambara ASR Leaderboard for your scientific publication, or if you find the resources useful, please cite our work:</p>
+            <pre>
+@misc{bambara_asr_leaderboard_2025,
+  title={Bambara Speech Recognition Leaderboard},
+  author={MALIBA-AI Team},
+  year={2025},
+  url={https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard},
+  note={A community initiative for advancing Bambara speech recognition technology}
+}
+            </pre>
+        </div>
+        """)
+        gr.HTML("""
+        <div style="text-align: center; margin-top: 30px; padding-top: 20px; border-top: 2px solid #e9ecef;">
+            <h3 style="color: #7d3561; margin-bottom: 15px;">About MALIBA-AI</h3>
+            <p style="font-size: 16px; line-height: 1.6; max-width: 800px; margin: 0 auto;">
+                <strong>MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation</strong><br>
+                <em>"No Malian Language Left Behind"</em>
+            </p>
+            <p style="margin-top: 15px;">
+                This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
+                For more information, visit <a href="https://maliba-ai.org/" style="color: #7d3561; font-weight: 600;">MALIBA-AI</a> or
+                <a href="https://huggingface.co/MALIBA-AI" style="color: #7d3561; font-weight: 600;">our Hugging Face page</a>.
+            </p>
+            <div style="margin-top: 20px;">
+                <span style="font-size: 2em;">🇲🇱</span>
+                <span style="margin: 0 20px; color: #7d3561; font-weight: 600;">•</span>
+                <span style="font-size: 2em;">🤝</span>
+                <span style="margin: 0 20px; color: #7d3561; font-weight: 600;">•</span>
+                <span style="font-size: 2em;">🚀</span>
+            </div>
+        </div>
+        """)
 if __name__ == "__main__":
     demo.launch()