|
import os |
|
import json |
|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
|
|
from pathlib import Path |
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
from huggingface_hub import snapshot_download |
|
|
|
from src.about import ( |
|
CITATION_BUTTON_LABEL, |
|
CITATION_BUTTON_TEXT, |
|
EVALUATION_QUEUE_TEXT, |
|
INTRODUCTION_TEXT, |
|
LLM_BENCHMARKS_TEXT, |
|
TITLE, |
|
ABOUT_TEXT |
|
) |
|
from src.display.css_html_js import custom_css |
|
from src.display.formatting import has_no_nan_values, make_clickable_model, model_hyperlink |
|
|
|
|
|
model_links = { |
|
"LLaVA-v1.5-7B†": "https://huggingface.co/liuhaotian/llava-v1.5-7b", |
|
"Qwen2-VL-7B-Instruct†": "https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct", |
|
"Qwen2-Audio-7B-Instruct†": "https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct", |
|
"Chameleon-7B†": "https://huggingface.co/facebook/chameleon-7b", |
|
"Llama3.1-8B-Instruct†": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct", |
|
"Gemini-1.5-Pro†": "https://deepmind.google/technologies/gemini/pro/", |
|
"GPT-4o†": "https://openai.com/index/hello-gpt-4o/" |
|
} |
|
|
|
data = { |
|
"Model": list(model_links.keys()), |
|
"Perception": [2.66, 2.76, 3.58, 1.44, 1.05, 5.36, 2.66], |
|
"Reasoning": [2.67, 3.07, 4.53, 2.97, 1.20, 5.67, 3.48], |
|
"IF": [2.50, 2.40, 3.40, 2.80, 1.20, 6.70, 4.20], |
|
"Safety": [2.90, 4.05, 2.65, 2.45, 1.35, 6.70, 5.15], |
|
"AMU Score": [2.68, 3.07, 3.54, 2.41, 1.20, 6.11, 3.87], |
|
"Modality Selection": [0.182, 0.177, 0.190, 0.156, 0.231, 0.227, 0.266], |
|
"Instruction Following": [6.61, 7.01, 6.69, 6.09, 7.47, 8.62, 8.62], |
|
"Modality Synergy": [0.43, 0.58, 0.51, 0.54, 0.60, 0.52, 0.58], |
|
"AMG Score": [1.56, 2.16, 1.97, 1.57, 3.08, 3.05, 3.96], |
|
"Overall": [2.12, 2.62, 2.73, 1.99, 2.14, 4.58, 3.92] |
|
} |
|
|
|
df = pd.DataFrame(data).sort_values(by='Overall', ascending=False) |
|
total_models = len(df) |
|
|
|
|
|
COLUMN_GROUPS = { |
|
"ALL": ["Model", "Perception", "Reasoning", "IF", "Safety", "AMU Score", |
|
"Modality Selection", "Instruction Following", "Modality Synergy", |
|
"AMG Score", "Overall"], |
|
"AMU": ["Model", "Perception", "Reasoning", "IF", "Safety", "AMU Score"], |
|
"AMG": ["Model", "Modality Selection", "Instruction Following", "Modality Synergy", "AMG Score"] |
|
} |
|
|
|
def format_table(df): |
|
"""Format the dataframe for display""" |
|
|
|
float_cols = df.select_dtypes(include=['float64']).columns |
|
for col in float_cols: |
|
df[col] = df[col].apply(lambda x: f"{x:.2f}") |
|
|
|
bold_columns = ['AMU Score', 'AMG Score', 'Overall'] |
|
for col in bold_columns: |
|
if col in df.columns: |
|
df[col] = df[col].apply(lambda x: f'**{x}**') |
|
|
|
|
|
|
|
df['Model'] = df['Model'].apply(lambda x: f'[{x}]({model_links[x]})') |
|
|
|
return df |
|
|
|
def regex_table(dataframe, regex, filter_button, column_group="ALL"): |
|
"""Takes a model name as a regex, then returns only the rows that has that in it.""" |
|
|
|
df = dataframe.copy() |
|
|
|
|
|
columns_to_show = COLUMN_GROUPS.get(column_group, COLUMN_GROUPS["ALL"]) |
|
df = df[columns_to_show] |
|
|
|
|
|
if regex: |
|
regex_list = [x.strip() for x in regex.split(",")] |
|
|
|
combined_regex = '|'.join(regex_list) |
|
|
|
df = df[df["Model"].str.contains(combined_regex, case=False, na=False)] |
|
|
|
df = df.sort_values(by='Overall' if 'Overall' in columns_to_show else columns_to_show[-1], ascending=False) |
|
df.reset_index(drop=True, inplace=True) |
|
|
|
|
|
df = format_table(df) |
|
|
|
|
|
df.insert(0, '', range(1, 1 + len(df))) |
|
|
|
return df |
|
|
|
with gr.Blocks(css=custom_css) as app: |
|
gr.HTML(TITLE) |
|
with gr.Row(): |
|
with gr.Column(scale=6): |
|
gr.Markdown(INTRODUCTION_TEXT.format(str(total_models))) |
|
|
|
with gr.Tabs(elem_classes="tab-buttons") as tabs: |
|
with gr.TabItem("🏆 Model Performance Leaderboard"): |
|
with gr.Row(): |
|
search_overall = gr.Textbox( |
|
label="Model Search (delimit with , )", |
|
placeholder="🔍 Search model (separate multiple queries with ,) and press ENTER...", |
|
show_label=False |
|
) |
|
column_group = gr.Radio( |
|
choices=list(COLUMN_GROUPS.keys()), |
|
value="ALL", |
|
label="Select columns to show" |
|
) |
|
|
|
with gr.Row(): |
|
performance_table_hidden = gr.Dataframe( |
|
df, |
|
headers=df.columns.tolist(), |
|
elem_id="performance_table_hidden", |
|
wrap=True, |
|
visible=False, |
|
datatype='markdown', |
|
) |
|
performance_table = gr.Dataframe( |
|
regex_table(df.copy(), "", []), |
|
headers=df.columns.tolist(), |
|
elem_id="performance_table", |
|
wrap=True, |
|
show_label=False, |
|
datatype='markdown', |
|
) |
|
|
|
with gr.TabItem("About"): |
|
with gr.Row(): |
|
gr.Markdown(ABOUT_TEXT) |
|
|
|
with gr.Accordion("📚 Citation", open=False): |
|
citation_button = gr.Textbox( |
|
value=CITATION_BUTTON_TEXT, |
|
lines=7, |
|
label="Copy the following to cite these results.", |
|
elem_id="citation-button", |
|
show_copy_button=True, |
|
) |
|
|
|
|
|
def update_table(search_text, selected_group): |
|
return regex_table(df, search_text, [], selected_group) |
|
|
|
search_overall.change( |
|
update_table, |
|
inputs=[search_overall, column_group], |
|
outputs=performance_table |
|
) |
|
|
|
column_group.change( |
|
update_table, |
|
inputs=[search_overall, column_group], |
|
outputs=performance_table |
|
) |
|
|
|
|
|
scheduler = BackgroundScheduler() |
|
scheduler.add_job(lambda: None, "interval", seconds=18000) |
|
scheduler.start() |
|
|
|
|
|
app.launch(share=True) |