Spaces:
Running
Running
File size: 7,949 Bytes
be6f30c 97984bb be6f30c 97984bb 00bb0cd 97984bb 00bb0cd 97984bb 00bb0cd 97984bb 00bb0cd 97984bb be6f30c 97984bb 28e6718 97984bb 00bb0cd 97984bb 00bb0cd 97984bb 9e096f1 2ee5d1b 9e096f1 97984bb 2ee5d1b 97984bb be6f30c 97984bb be6f30c 97984bb be6f30c 97984bb be6f30c 97984bb 587868d 97984bb 587868d 97984bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import gradio as gr
import pandas as pd
import numpy as np
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
def load_speechiq_data():
"""Load and process the SpeechIQ results from CSV file."""
try:
df = pd.read_csv("SpeechIQ_table.csv")
# Round numerical columns to 3 decimal places for better display
numerical_cols = ['Remember', 'Understand', 'Apply', 'Speech IQ']
for col in numerical_cols:
if col in df.columns:
df[col] = df[col].round(3)
# Sort by Speech IQ score in descending order
df = df.sort_values('Speech IQ', ascending=False)
# Add ranking with medal emojis
df['Rank'] = ''
for i in range(len(df)):
if i == 0:
df.iloc[i, df.columns.get_loc('Rank')] = 'π₯'
elif i == 1:
df.iloc[i, df.columns.get_loc('Rank')] = 'π₯'
elif i == 2:
df.iloc[i, df.columns.get_loc('Rank')] = 'π₯'
else:
df.iloc[i, df.columns.get_loc('Rank')] = f'{i+1}'
# Reorder columns to put Speech IQ first, then Rank
column_order = ['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder']
df = df[column_order]
return df
except Exception as e:
print(f"Error loading SpeechIQ data: {e}")
# Return empty dataframe with expected columns if file not found
return pd.DataFrame(columns=['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder'])
def get_top_performers(df):
"""Get statistics about top performers."""
if df.empty:
return "No data available."
top_score = df['Speech IQ'].max()
top_model = df.loc[df['Speech IQ'].idxmax()]
agentic_best = df[df['Model Type'].str.contains('Agentic', na=False)]['Speech IQ'].max() if not df[df['Model Type'].str.contains('Agentic', na=False)].empty else 0
end2end_best = df[df['Model Type'].str.contains('End2End', na=False)]['Speech IQ'].max() if not df[df['Model Type'].str.contains('End2End', na=False)].empty else 0
stats_text = f"""
## π Leaderboard Statistics
| Metric | Value |
|--------|-------|
| π **Top Performer** | {top_model['Setup']} |
| π― **Highest Score** | **{top_score}** |
| π€ **Best Agentic Model** | {agentic_best} |
| π **Best End2End Model** | {end2end_best} |
| π **Total Models** | {len(df)} |
"""
return stats_text
# Load the data
speechiq_df = load_speechiq_data()
# Create the Gradio interface
demo = gr.Blocks(css=custom_css, title="SpeechIQ Leaderboard")
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("π
SpeechIQ Leaderboard", elem_id="speechiq-leaderboard-tab", id=0):
# Main leaderboard table
with gr.Row():
leaderboard_table = gr.Dataframe(
value=speechiq_df,
headers=speechiq_df.columns.tolist() if not speechiq_df.empty else ['rank', 'SIQ', 'remember', 'understand', 'apply', 'moddel', 'setup', 'audio-enc'],
interactive=False,
elem_classes="leaderboard-table"
)
# Legend and explanation
with gr.Row():
gr.Markdown("""
### π Column Explanations
- **Rank**: Position ranking with π₯π₯π₯ medals for top 3 performers
- **Speech IQ**: Overall intelligence quotient combining all dimensions (primary metric)
- **Remember**: Verbatim accuracy score (WER-based)
- **Understand**: Semantic interpretation similarity score
- **Apply**: Downstream task performance score
- **Model Type**: Architecture approach (Agentic vs End2End)
- **Setup**: Specific model configuration and components
- **Audio Encoder**: The audio processing component used
*Higher scores indicate better performance across all metrics.*
""", elem_classes="markdown-text")
with gr.TabItem("π Analysis", elem_id="analysis-tab", id=1):
with gr.Row():
# Create performance comparison charts
if not speechiq_df.empty:
# Group by model type for comparison
agentic_models = speechiq_df[speechiq_df['Model Type'].str.contains('Agentic', na=False)]
end2end_models = speechiq_df[speechiq_df['Model Type'].str.contains('End2End', na=False)]
comparison_text = f"""
### π Model Type Comparison
**Agentic Models (ASR + LLM):**
- Count: {len(agentic_models)}
- Average Speech IQ: {agentic_models['Speech IQ'].mean():.2f}
- Best Score: {agentic_models['Speech IQ'].max():.2f}
**End-to-End Models:**
- Count: {len(end2end_models)}
- Average Speech IQ: {end2end_models['Speech IQ'].mean():.2f}
- Best Score: {end2end_models['Speech IQ'].max():.2f}
### π― Cognitive Dimension Analysis
**Remember (Verbatim Accuracy):**
- Best performer: {speechiq_df.loc[speechiq_df['Remember'].idxmax(), 'Setup']} ({speechiq_df['Remember'].max():.3f})
**Understand (Semantic Similarity):**
- Best performer: {speechiq_df.loc[speechiq_df['Understand'].idxmax(), 'Setup']} ({speechiq_df['Understand'].max():.3f})
**Apply (Task Performance):**
- Best performer: {speechiq_df.loc[speechiq_df['Apply'].idxmax(), 'Setup']} ({speechiq_df['Apply'].max():.3f})
"""
gr.Markdown(comparison_text, elem_classes="markdown-text")
else:
gr.Markdown("No data available for analysis.", elem_classes="markdown-text")
# Statistics section - moved after table
with gr.Row():
gr.Markdown(get_top_performers(speechiq_df), elem_classes="markdown-text stats-section")
with gr.TabItem("π About", elem_id="about-tab", id=2):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("π Submit", elem_id="submit-tab", id=3):
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
# Citation section
with gr.Row():
with gr.Accordion("π Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=6,
elem_id="citation-button",
show_copy_button=True,
)
# Add refresh functionality
with gr.Row():
refresh_button = gr.Button("π Refresh Data", variant="secondary")
def refresh_data():
updated_df = load_speechiq_data()
return updated_df
refresh_button.click(
refresh_data,
outputs=leaderboard_table
)
if __name__ == "__main__":
demo.launch(share=False, server_name="0.0.0.0", server_port=7860) |