Spaces:
Sleeping
Sleeping
import gradio as gr | |
import plotly.graph_objects as go | |
import pandas as pd | |
def create_sota_plot(df, metric='accuracy'): | |
""" | |
Create a plot showing model performance evolution over time for a selected metric. | |
Parameters: | |
df: DataFrame with columns ['model_name', 'release_date', and metric columns] | |
metric: The metric column to visualize | |
""" | |
# Sort by release date to ensure chronological order | |
df_sorted = df.sort_values('release_date').copy() | |
# Calculate cumulative best (SOTA) for the selected metric | |
df_sorted['cumulative_best'] = df_sorted[metric].cummax() | |
# Identify which models are SOTA (where metric equals cumulative best) | |
df_sorted['is_sota'] = df_sorted[metric] == df_sorted['cumulative_best'] | |
# Get SOTA models for the line | |
sota_df = df_sorted[df_sorted['is_sota']].copy() | |
# Create the plot | |
fig = go.Figure() | |
# Add all models as scatter points (gray for non-SOTA, cyan for SOTA) | |
fig.add_trace(go.Scatter( | |
x=df_sorted['release_date'], | |
y=df_sorted[metric], | |
mode='markers', | |
name='All models', | |
marker=dict( | |
color=['#00CED1' if is_sota else 'lightgray' | |
for is_sota in df_sorted['is_sota']], | |
size=8, | |
opacity=0.7 | |
), | |
text=df_sorted['model_name'], | |
hovertemplate=f'<b>%{{text}}</b><br>Date: %{{x}}<br>{metric.capitalize()}: %{{y:.2f}}<extra></extra>' | |
)) | |
# Add SOTA line (cumulative best) | |
fig.add_trace(go.Scatter( | |
x=df_sorted['release_date'], | |
y=df_sorted['cumulative_best'], | |
mode='lines', | |
name='State-of-the-art (cumulative best)', | |
line=dict(color='#00CED1', width=2, dash='solid'), | |
hovertemplate=f'SOTA {metric.capitalize()}: %{{y:.2f}}<br>Date: %{{x}}<extra></extra>' | |
)) | |
# Add labels for SOTA models (models that improved the best score) | |
for _, row in sota_df.iterrows(): | |
fig.add_annotation( | |
x=row['release_date'], | |
y=row[metric], | |
text=row['model_name'], | |
showarrow=True, | |
arrowhead=2, | |
arrowsize=1, | |
arrowwidth=1, | |
arrowcolor='gray', | |
ax=0, | |
ay=-30, | |
font=dict(size=10) | |
) | |
# Update layout | |
fig.update_layout( | |
title=f'Evolution of Model Performance Over Time - {metric.upper()}', | |
xaxis_title='Release Date', | |
yaxis_title=f'{metric.capitalize()} Score', | |
xaxis=dict( | |
showgrid=True, | |
gridcolor='lightgray' | |
), | |
yaxis=dict( | |
showgrid=True, | |
gridcolor='lightgray' | |
), | |
plot_bgcolor='white', | |
paper_bgcolor='white', | |
height=600, | |
legend=dict( | |
yanchor="top", | |
y=0.99, | |
xanchor="left", | |
x=0.01 | |
), | |
hovermode='closest' | |
) | |
return fig | |
def create_sample_dataframe(): | |
""" | |
Create a sample DataFrame with multiple metrics for model performance. | |
""" | |
# Create sample data with multiple metrics | |
data = { | |
'model_name': [ | |
'SIFT + FVs', 'AlexNet', 'VGG-16', 'GoogLeNet', 'ResNet-50', | |
'SPPNet', 'Inception V2', 'Inception V3', 'ResNet-152', 'DenseNet', | |
'MobileNet', 'NASNET-A(6)', 'EfficientNet', 'Vision Transformer', | |
'CoAtNet-7', 'CLIP', 'DALL-E', 'GPT-Vision', 'Model-X', 'Model-Y', | |
# Add some models that don't improve SOTA | |
'SmallNet-1', 'SmallNet-2', 'BasicCNN', 'SimpleDNN', 'QuickNet', | |
'FastNet', 'LiteModel', 'CompactNet', 'MiniVGG', 'TinyResNet' | |
], | |
'release_date': pd.to_datetime([ | |
'2012-01-15', '2012-09-30', '2014-04-10', '2014-09-17', '2015-12-10', | |
'2014-06-18', '2015-02-11', '2015-12-02', '2016-05-11', '2016-08-25', | |
'2017-04-17', '2017-11-04', '2019-05-28', '2020-10-22', | |
'2021-06-09', '2021-01-05', '2021-01-05', '2022-03-14', '2022-07-20', '2022-11-15', | |
# Dates for non-SOTA models | |
'2013-03-10', '2013-07-22', '2014-01-15', '2015-03-20', '2016-02-14', | |
'2017-06-30', '2018-09-12', '2019-02-28', '2020-04-15', '2021-08-30' | |
]), | |
'accuracy': [ | |
53.0, 65.0, 71.5, 74.8, 76.0, | |
74.0, 78.0, 81.0, 77.8, 79.2, | |
70.6, 82.7, 84.3, 85.2, | |
90.88, 86.5, 87.0, 87.79, 87.73, 88.1, | |
# Scores for non-SOTA models | |
58.0, 62.0, 68.0, 72.0, 73.5, | |
75.0, 78.5, 80.0, 82.0, 84.0 | |
], | |
'top5_accuracy': [ | |
71.0, 82.0, 89.5, 91.2, 92.5, | |
91.0, 93.5, 95.0, 94.0, 94.5, | |
89.5, 96.2, 97.1, 97.5, | |
98.5, 97.8, 98.0, 98.2, 98.1, 98.3, | |
# Top-5 scores for non-SOTA models | |
75.0, 80.0, 85.0, 88.0, 90.0, | |
91.5, 93.0, 95.5, 96.0, 96.5 | |
], | |
'parameters_millions': [ | |
0.5, 62, 138, 6.8, 25.6, | |
21.0, 11.2, 23.8, 60.3, 7.9, | |
4.2, 88.9, 66.0, 86.0, | |
2185.0, 428.0, 1200.0, 1750.0, 890.0, 920.0, | |
# Parameters for non-SOTA models | |
2.5, 3.8, 15.0, 8.5, 5.2, | |
12.0, 3.5, 6.7, 9.0, 11.5 | |
], | |
'flops_billions': [ | |
0.1, 1.5, 15.5, 1.5, 3.8, | |
2.5, 2.0, 5.7, 11.3, 2.8, | |
0.57, 23.8, 9.9, 16.9, | |
420.0, 85.0, 250.0, 380.0, 180.0, 195.0, | |
# FLOPs for non-SOTA models | |
0.3, 0.5, 2.0, 1.2, 0.8, | |
1.8, 0.4, 1.0, 1.5, 2.2 | |
], | |
'inference_time_ms': [ | |
85, 23, 45, 28, 35, | |
32, 26, 30, 48, 38, | |
18, 65, 42, 55, | |
120, 75, 95, 110, 88, 92, | |
# Inference time for non-SOTA models | |
15, 20, 30, 25, 22, | |
28, 12, 18, 24, 35 | |
] | |
} | |
return pd.DataFrame(data) | |
# Create Gradio interface | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# State-of-the-Art Models Timeline with Multiple Metrics") | |
gr.Markdown(""" | |
This visualization shows the evolution of model performance over time across different metrics. | |
Use the dropdown to switch between metrics. The line represents the cumulative best (SOTA) score achieved up to each point in time. | |
""") | |
# Create the main DataFrame inline | |
df_main = create_sample_dataframe() | |
# Get available metrics (exclude non-metric columns) | |
metric_columns = [col for col in df_main.columns if col not in ['model_name', 'release_date']] | |
# Create layout with dropdown in upper right | |
with gr.Row(): | |
with gr.Column(scale=3): | |
# Display data info | |
gr.Markdown(f"**Total models in dataset:** {len(df_main)}") | |
gr.Markdown( | |
f"**Date range:** {df_main['release_date'].min().date()} to {df_main['release_date'].max().date()}") | |
with gr.Column(scale=1): | |
metric_dropdown = gr.Dropdown( | |
choices=metric_columns, | |
value='accuracy', | |
label="Select Metric", | |
interactive=True | |
) | |
plot = gr.Plot(label="Model Performance Evolution") | |
# Function to update plot and statistics | |
def update_plot_and_stats(selected_metric): | |
fig = create_sota_plot(df_main, selected_metric) | |
best_value = df_main[selected_metric].max() | |
best_model = df_main.loc[df_main[selected_metric].idxmax(), 'model_name'] | |
# Format statistics based on metric type | |
if selected_metric == 'parameters_millions': | |
stats_text = f"**Best {selected_metric.replace('_', ' ').title()}:** {best_value:.1f}M ({best_model})" | |
elif selected_metric == 'flops_billions': | |
stats_text = f"**Best {selected_metric.replace('_', ' ').title()}:** {best_value:.1f}B ({best_model})" | |
elif selected_metric == 'inference_time_ms': | |
stats_text = f"**Best {selected_metric.replace('_', ' ').title()}:** {best_value:.1f}ms ({best_model})" | |
else: | |
stats_text = f"**Best {selected_metric.replace('_', ' ').title()}:** {best_value:.2f}% ({best_model})" | |
return fig, stats_text | |
# Display best score for selected metric | |
metric_stats = gr.Markdown() | |
# Create plot on load | |
demo.load( | |
fn=lambda: update_plot_and_stats('accuracy'), | |
outputs=[plot, metric_stats] | |
) | |
# Update plot when metric changes | |
metric_dropdown.change( | |
fn=update_plot_and_stats, | |
inputs=metric_dropdown, | |
outputs=[plot, metric_stats] | |
) | |
# Add interactive controls | |
with gr.Row(): | |
show_data_btn = gr.Button("Show/Hide DataFrame") | |
export_stats_btn = gr.Button("Export Statistics") | |
# DataFrame display (initially hidden) | |
df_display = gr.Dataframe( | |
value=df_main, | |
label="Model Performance Data", | |
visible=False | |
) | |
def toggle_dataframe(): | |
return gr.Dataframe(value=df_main, visible=True) | |
def export_statistics(): | |
stats = [] | |
for metric in metric_columns: | |
best_value = df_main[metric].max() | |
best_model = df_main.loc[df_main[metric].idxmax(), 'model_name'] | |
avg_value = df_main[metric].mean() | |
stats.append({ | |
'Metric': metric.replace('_', ' ').title(), | |
'Best Value': f"{best_value:.2f}", | |
'Best Model': best_model, | |
'Average': f"{avg_value:.2f}" | |
}) | |
stats_df = pd.DataFrame(stats) | |
return gr.Dataframe(value=stats_df, visible=True) | |
stats_display = gr.Dataframe( | |
label="Statistics Summary", | |
visible=False | |
) | |
show_data_btn.click( | |
fn=toggle_dataframe, | |
outputs=df_display | |
) | |
export_stats_btn.click( | |
fn=export_statistics, | |
outputs=stats_display | |
) | |
gr.Markdown(""" | |
### About this visualization: | |
- **Metric Selector**: Use the dropdown in the upper right to switch between different performance metrics | |
- **Cyan line**: Cumulative best (SOTA) score over time for the selected metric | |
- **Cyan dots**: Models that achieved a new SOTA when released | |
- **Gray dots**: Other models that didn't beat the existing SOTA | |
- **Hover over points**: See model names, release dates, and metric values | |
### Available Metrics: | |
- **Accuracy**: Top-1 accuracy on ImageNet (%) | |
- **Top5 Accuracy**: Top-5 accuracy on ImageNet (%) | |
- **Parameters (Millions)**: Model size in millions of parameters | |
- **FLOPs (Billions)**: Computational cost in billions of operations | |
- **Inference Time (ms)**: Time to process a single image | |
""") | |
demo.launch() |