Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
from src.about import ( | |
CITATION_BUTTON_LABEL, | |
CITATION_BUTTON_TEXT, | |
EVALUATION_QUEUE_TEXT, | |
INTRODUCTION_TEXT, | |
LLM_BENCHMARKS_TEXT, | |
TITLE, | |
) | |
from src.display.css_html_js import custom_css | |
def load_speechiq_data(): | |
"""Load and process the SpeechIQ results from CSV file.""" | |
try: | |
df = pd.read_csv("SpeechIQ_table.csv") | |
# Round numerical columns to 3 decimal places for better display | |
numerical_cols = ['Remember', 'Understand', 'Apply', 'Speech IQ'] | |
for col in numerical_cols: | |
if col in df.columns: | |
df[col] = df[col].round(3) | |
# Sort by Speech IQ score in descending order | |
df = df.sort_values('Speech IQ', ascending=False) | |
# Add ranking with medal emojis | |
df['Rank'] = '' | |
for i in range(len(df)): | |
if i == 0: | |
df.iloc[i, df.columns.get_loc('Rank')] = 'π₯' | |
elif i == 1: | |
df.iloc[i, df.columns.get_loc('Rank')] = 'π₯' | |
elif i == 2: | |
df.iloc[i, df.columns.get_loc('Rank')] = 'π₯' | |
else: | |
df.iloc[i, df.columns.get_loc('Rank')] = f'{i+1}' | |
# Reorder columns to put Speech IQ first, then Rank | |
column_order = ['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder'] | |
df = df[column_order] | |
return df | |
except Exception as e: | |
print(f"Error loading SpeechIQ data: {e}") | |
# Return empty dataframe with expected columns if file not found | |
return pd.DataFrame(columns=['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder']) | |
def get_top_performers(df): | |
"""Get statistics about top performers.""" | |
if df.empty: | |
return "No data available." | |
top_score = df['Speech IQ'].max() | |
top_model = df.loc[df['Speech IQ'].idxmax()] | |
agentic_best = df[df['Model Type'].str.contains('Agentic', na=False)]['Speech IQ'].max() if not df[df['Model Type'].str.contains('Agentic', na=False)].empty else 0 | |
end2end_best = df[df['Model Type'].str.contains('End2End', na=False)]['Speech IQ'].max() if not df[df['Model Type'].str.contains('End2End', na=False)].empty else 0 | |
stats_text = f""" | |
## π Leaderboard Statistics | |
| Metric | Value | | |
|--------|-------| | |
| π **Top Performer** | {top_model['Setup']} | | |
| π― **Highest Score** | **{top_score}** | | |
| π€ **Best Agentic Model** | {agentic_best} | | |
| π **Best End2End Model** | {end2end_best} | | |
| π **Total Models** | {len(df)} | | |
""" | |
return stats_text | |
# Load the data | |
speechiq_df = load_speechiq_data() | |
# Create the Gradio interface | |
demo = gr.Blocks(css=custom_css, title="SpeechIQ Leaderboard") | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("π SpeechIQ Leaderboard", elem_id="speechiq-leaderboard-tab", id=0): | |
# Main leaderboard table | |
with gr.Row(): | |
leaderboard_table = gr.Dataframe( | |
value=speechiq_df, | |
headers=speechiq_df.columns.tolist() if not speechiq_df.empty else ['rank', 'SIQ', 'remember', 'understand', 'apply', 'moddel', 'setup', 'audio-enc'], | |
interactive=False, | |
elem_classes="leaderboard-table" | |
) | |
# Legend and explanation | |
with gr.Row(): | |
gr.Markdown(""" | |
### π Column Explanations | |
- **Rank**: Position ranking with π₯π₯π₯ medals for top 3 performers | |
- **Speech IQ**: Overall intelligence quotient combining all dimensions (primary metric) | |
- **Remember**: Verbatim accuracy score (WER-based) | |
- **Understand**: Semantic interpretation similarity score | |
- **Apply**: Downstream task performance score | |
- **Model Type**: Architecture approach (Agentic vs End2End) | |
- **Setup**: Specific model configuration and components | |
- **Audio Encoder**: The audio processing component used | |
*Higher scores indicate better performance across all metrics.* | |
""", elem_classes="markdown-text") | |
with gr.TabItem("π Analysis", elem_id="analysis-tab", id=1): | |
with gr.Row(): | |
# Create performance comparison charts | |
if not speechiq_df.empty: | |
# Group by model type for comparison | |
agentic_models = speechiq_df[speechiq_df['Model Type'].str.contains('Agentic', na=False)] | |
end2end_models = speechiq_df[speechiq_df['Model Type'].str.contains('End2End', na=False)] | |
comparison_text = f""" | |
### π Model Type Comparison | |
**Agentic Models (ASR + LLM):** | |
- Count: {len(agentic_models)} | |
- Average Speech IQ: {agentic_models['Speech IQ'].mean():.2f} | |
- Best Score: {agentic_models['Speech IQ'].max():.2f} | |
**End-to-End Models:** | |
- Count: {len(end2end_models)} | |
- Average Speech IQ: {end2end_models['Speech IQ'].mean():.2f} | |
- Best Score: {end2end_models['Speech IQ'].max():.2f} | |
### π― Cognitive Dimension Analysis | |
**Remember (Verbatim Accuracy):** | |
- Best performer: {speechiq_df.loc[speechiq_df['Remember'].idxmax(), 'Setup']} ({speechiq_df['Remember'].max():.3f}) | |
**Understand (Semantic Similarity):** | |
- Best performer: {speechiq_df.loc[speechiq_df['Understand'].idxmax(), 'Setup']} ({speechiq_df['Understand'].max():.3f}) | |
**Apply (Task Performance):** | |
- Best performer: {speechiq_df.loc[speechiq_df['Apply'].idxmax(), 'Setup']} ({speechiq_df['Apply'].max():.3f}) | |
""" | |
gr.Markdown(comparison_text, elem_classes="markdown-text") | |
else: | |
gr.Markdown("No data available for analysis.", elem_classes="markdown-text") | |
# Statistics section - moved after table | |
with gr.Row(): | |
gr.Markdown(get_top_performers(speechiq_df), elem_classes="markdown-text stats-section") | |
with gr.TabItem("π About", elem_id="about-tab", id=2): | |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
with gr.TabItem("π Submit", elem_id="submit-tab", id=3): | |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") | |
# Citation section | |
with gr.Row(): | |
with gr.Accordion("π Citation", open=False): | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
lines=6, | |
elem_id="citation-button", | |
show_copy_button=True, | |
) | |
# Add refresh functionality | |
with gr.Row(): | |
refresh_button = gr.Button("π Refresh Data", variant="secondary") | |
def refresh_data(): | |
updated_df = load_speechiq_data() | |
return updated_df | |
refresh_button.click( | |
refresh_data, | |
outputs=leaderboard_table | |
) | |
if __name__ == "__main__": | |
demo.launch(share=False, server_name="0.0.0.0", server_port=7860) |