Spaces:

PKU-Alignment
/

EvalAnything-LeaderBoard

Running

File size: 6,679 Bytes

import os
import json
import gradio as gr
import pandas as pd
import numpy as np

from pathlib import Path
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
    ABOUT_TEXT
)
from src.display.css_html_js import custom_css
from src.display.formatting import has_no_nan_values, make_clickable_model, model_hyperlink

# 定义模型性能数据和链接
model_links = {
    "LLaVA-v1.5-7B†": "https://huggingface.co/liuhaotian/llava-v1.5-7b",
    "Qwen2-VL-7B-Instruct†": "https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct",
    "Qwen2-Audio-7B-Instruct†": "https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct",
    "Chameleon-7B†": "https://huggingface.co/facebook/chameleon-7b",
    "Llama3.1-8B-Instruct†": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
    "Gemini-1.5-Pro†": "https://deepmind.google/technologies/gemini/pro/",
    "GPT-4o†": "https://openai.com/index/hello-gpt-4o/"
}

data = {
    "Model": list(model_links.keys()),
    "Perception": [2.66, 2.76, 3.58, 1.44, 1.05, 5.36, 2.66],
    "Reasoning": [2.67, 3.07, 4.53, 2.97, 1.20, 5.67, 3.48],
    "IF": [2.50, 2.40, 3.40, 2.80, 1.20, 6.70, 4.20],
    "Safety": [2.90, 4.05, 2.65, 2.45, 1.35, 6.70, 5.15],
    "AMU Score": [2.68, 3.07, 3.54, 2.41, 1.20, 6.11, 3.87],
    "Modality Selection": [0.182, 0.177, 0.190, 0.156, 0.231, 0.227, 0.266],
    "Instruction Following": [6.61, 7.01, 6.69, 6.09, 7.47, 8.62, 8.62],
    "Modality Synergy": [0.43, 0.58, 0.51, 0.54, 0.60, 0.52, 0.58],
    "AMG Score": [1.56, 2.16, 1.97, 1.57, 3.08, 3.05, 3.96],
    "Overall": [2.12, 2.62, 2.73, 1.99, 2.14, 4.58, 3.92]
}

df = pd.DataFrame(data).sort_values(by='Overall', ascending=False)
total_models = len(df)

# 定义列组
COLUMN_GROUPS = {
    "ALL": ["Model", "Perception", "Reasoning", "IF", "Safety", "AMU Score", 
            "Modality Selection", "Instruction Following", "Modality Synergy", 
            "AMG Score", "Overall"],
    "AMU": ["Model", "Perception", "Reasoning", "IF", "Safety", "AMU Score"],
    "AMG": ["Model", "Modality Selection", "Instruction Following", "Modality Synergy", "AMG Score"]
}

def format_table(df):
    """Format the dataframe for display"""
    # 设置列的显示格式
    float_cols = df.select_dtypes(include=['float64']).columns
    for col in float_cols:
        df[col] = df[col].apply(lambda x: f"{x:.2f}")  # 修改为保留2位小数
        
    bold_columns = ['AMU Score', 'AMG Score', 'Overall']
    for col in bold_columns:
        if col in df.columns:
            df[col] = df[col].apply(lambda x: f'**{x}**')
    
    # 添加模型链接
    # df['Model'] = df['Model'].apply(lambda x: f'<a href="{model_links[x]}" target="_blank">{x}</a>')
    df['Model'] = df['Model'].apply(lambda x: f'[{x}]({model_links[x]})')
    # df['Model'] = df.apply(lambda x: model_hyperlink(model_links[x['Model']], x['Model']), axis=1)
    return df

def regex_table(dataframe, regex, filter_button, column_group="ALL"):
    """Takes a model name as a regex, then returns only the rows that has that in it."""
    # 深拷贝确保不修改原始数据
    df = dataframe.copy()
    
    # 选择要显示的列
    columns_to_show = COLUMN_GROUPS.get(column_group, COLUMN_GROUPS["ALL"])
    df = df[columns_to_show]
    
    # Split regex statement by comma and trim whitespace around regexes
    if regex:
        regex_list = [x.strip() for x in regex.split(",")]
        # Join the list into a single regex pattern with '|' acting as OR
        combined_regex = '|'.join(regex_list)
        # Filter based on model name regex
        df = df[df["Model"].str.contains(combined_regex, case=False, na=False)]
    
    df = df.sort_values(by='Overall' if 'Overall' in columns_to_show else columns_to_show[-1], ascending=False)
    df.reset_index(drop=True, inplace=True)
    
    # Format numbers and add links
    df = format_table(df)
    
    # Add index column
    df.insert(0, '', range(1, 1 + len(df)))
    
    return df

with gr.Blocks(css=custom_css) as app:
    gr.HTML(TITLE)
    with gr.Row():
        with gr.Column(scale=6):
            gr.Markdown(INTRODUCTION_TEXT.format(str(total_models)))
    
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🏆 Model Performance Leaderboard"):
            with gr.Row():
                search_overall = gr.Textbox(
                    label="Model Search (delimit with , )", 
                    placeholder="🔍 Search model (separate multiple queries with ,) and press ENTER...",
                    show_label=False
                )
                column_group = gr.Radio(
                    choices=list(COLUMN_GROUPS.keys()),
                    value="ALL",
                    label="Select columns to show"
                )
            
            with gr.Row():
                performance_table_hidden = gr.Dataframe(
                    df,
                    headers=df.columns.tolist(),
                    elem_id="performance_table_hidden",
                    wrap=True,
                    visible=False,
                    datatype='markdown',
                )
                performance_table = gr.Dataframe(
                    regex_table(df.copy(), "", []),
                    headers=df.columns.tolist(),
                    elem_id="performance_table",
                    wrap=True,
                    show_label=False,
                    datatype='markdown',
                )
        
        with gr.TabItem("About"):
            with gr.Row():
                gr.Markdown(ABOUT_TEXT)
    
    with gr.Accordion("📚 Citation", open=False):
        citation_button = gr.Textbox(
            value=CITATION_BUTTON_TEXT,
            lines=7,
            label="Copy the following to cite these results.",
            elem_id="citation-button",
            show_copy_button=True,
        )
    
    # Set up event handlers
    def update_table(search_text, selected_group):
        return regex_table(df, search_text, [], selected_group)
    
    search_overall.change(
        update_table,
        inputs=[search_overall, column_group],
        outputs=performance_table
    )
    
    column_group.change(
        update_table,
        inputs=[search_overall, column_group],
        outputs=performance_table
    )

# Set up scheduler
scheduler = BackgroundScheduler()
scheduler.add_job(lambda: None, "interval", seconds=18000)  # every 5 hours
scheduler.start()

# Launch the app
app.launch(share=True)