import gradio as gr import pandas as pd import numpy as np from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) from src.display.css_html_js import custom_css def load_speechiq_data(): """Load and process the SpeechIQ results from CSV file.""" try: df = pd.read_csv("SpeechIQ_table.csv") # Round numerical columns to 3 decimal places for better display numerical_cols = ['Remember', 'Understand', 'Apply', 'Speech IQ'] for col in numerical_cols: if col in df.columns: df[col] = df[col].round(3) # Sort by Speech IQ score in descending order df = df.sort_values('Speech IQ', ascending=False) # Add ranking with medal emojis df['Rank'] = '' for i in range(len(df)): if i == 0: df.iloc[i, df.columns.get_loc('Rank')] = '🥇' elif i == 1: df.iloc[i, df.columns.get_loc('Rank')] = '🥈' elif i == 2: df.iloc[i, df.columns.get_loc('Rank')] = '🥉' else: df.iloc[i, df.columns.get_loc('Rank')] = f'{i+1}' # Reorder columns to put Speech IQ first, then Rank column_order = ['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder'] df = df[column_order] return df except Exception as e: print(f"Error loading SpeechIQ data: {e}") # Return empty dataframe with expected columns if file not found return pd.DataFrame(columns=['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder']) def get_top_performers(df): """Get statistics about top performers.""" if df.empty: return "No data available." top_score = df['Speech IQ'].max() top_model = df.loc[df['Speech IQ'].idxmax()] agentic_best = df[df['Model Type'].str.contains('Agentic', na=False)]['Speech IQ'].max() if not df[df['Model Type'].str.contains('Agentic', na=False)].empty else 0 end2end_best = df[df['Model Type'].str.contains('End2End', na=False)]['Speech IQ'].max() if not df[df['Model Type'].str.contains('End2End', na=False)].empty else 0 stats_text = f""" ## 📊 Leaderboard Statistics | Metric | Value | |--------|-------| | 🏆 **Top Performer** | {top_model['Setup']} | | 🎯 **Highest Score** | **{top_score}** | | 🤖 **Best Agentic Model** | {agentic_best} | | 🔄 **Best End2End Model** | {end2end_best} | | 📈 **Total Models** | {len(df)} | """ return stats_text # Load the data speechiq_df = load_speechiq_data() # Create the Gradio interface demo = gr.Blocks(css=custom_css, title="SpeechIQ Leaderboard") with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 SpeechIQ Leaderboard", elem_id="speechiq-leaderboard-tab", id=0): # Main leaderboard table with gr.Row(): leaderboard_table = gr.Dataframe( value=speechiq_df, headers=speechiq_df.columns.tolist() if not speechiq_df.empty else ['rank', 'SIQ', 'remember', 'understand', 'apply', 'moddel', 'setup', 'audio-enc'], interactive=False, elem_classes="leaderboard-table" ) # Legend and explanation with gr.Row(): gr.Markdown(""" ### 📋 Column Explanations - **Rank**: Position ranking with 🥇🥈🥉 medals for top 3 performers - **Speech IQ**: Overall intelligence quotient combining all dimensions (primary metric) - **Remember**: Verbatim accuracy score (WER-based) - **Understand**: Semantic interpretation similarity score - **Apply**: Downstream task performance score - **Model Type**: Architecture approach (Agentic vs End2End) - **Setup**: Specific model configuration and components - **Audio Encoder**: The audio processing component used *Higher scores indicate better performance across all metrics.* """, elem_classes="markdown-text") with gr.TabItem("📊 Analysis", elem_id="analysis-tab", id=1): with gr.Row(): # Create performance comparison charts if not speechiq_df.empty: # Group by model type for comparison agentic_models = speechiq_df[speechiq_df['Model Type'].str.contains('Agentic', na=False)] end2end_models = speechiq_df[speechiq_df['Model Type'].str.contains('End2End', na=False)] comparison_text = f""" ### 🔍 Model Type Comparison **Agentic Models (ASR + LLM):** - Count: {len(agentic_models)} - Average Speech IQ: {agentic_models['Speech IQ'].mean():.2f} - Best Score: {agentic_models['Speech IQ'].max():.2f} **End-to-End Models:** - Count: {len(end2end_models)} - Average Speech IQ: {end2end_models['Speech IQ'].mean():.2f} - Best Score: {end2end_models['Speech IQ'].max():.2f} ### 🎯 Cognitive Dimension Analysis **Remember (Verbatim Accuracy):** - Best performer: {speechiq_df.loc[speechiq_df['Remember'].idxmax(), 'Setup']} ({speechiq_df['Remember'].max():.3f}) **Understand (Semantic Similarity):** - Best performer: {speechiq_df.loc[speechiq_df['Understand'].idxmax(), 'Setup']} ({speechiq_df['Understand'].max():.3f}) **Apply (Task Performance):** - Best performer: {speechiq_df.loc[speechiq_df['Apply'].idxmax(), 'Setup']} ({speechiq_df['Apply'].max():.3f}) """ gr.Markdown(comparison_text, elem_classes="markdown-text") else: gr.Markdown("No data available for analysis.", elem_classes="markdown-text") # Statistics section - moved after table with gr.Row(): gr.Markdown(get_top_performers(speechiq_df), elem_classes="markdown-text stats-section") with gr.TabItem("📝 About", elem_id="about-tab", id=2): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") # Citation section with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=6, elem_id="citation-button", show_copy_button=True, ) # Add refresh functionality with gr.Row(): refresh_button = gr.Button("🔄 Refresh Data", variant="secondary") def refresh_data(): updated_df = load_speechiq_data() return updated_df refresh_button.click( refresh_data, outputs=leaderboard_table ) if __name__ == "__main__": demo.launch(share=False, server_name="0.0.0.0", server_port=7860)