Spaces:

Ayanami0730
/

DeepResearch-Leaderboard

Running

File size: 6,046 Bytes

from __future__ import annotations
import gradio as gr
import pandas as pd
from pathlib import Path
from typing import Union

# 相对于主脚本的路径调整
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"

# 用于标注的常量
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
HIGHLIGHT_EMOJI = "🚀"

# 列名重命名映射
COLUMN_RENAME_MAP = {
    'overall_score': 'overall',
    'comprehensiveness': 'comp.',
    'insight': 'insight',
    'instruction_following': 'inst.',
    'readability': 'read.',
    'citation_accuracy': 'c.acc.',
    'effective_citations': 'eff.c.'
}

# 模型分类映射
MODEL_CATEGORIES = {
    "Deep Research Agent": [
        "gemini-2.5-pro-deepresearch",
        "grok-deeper-search",
        "openai-deepresearch", 
        "perplexity-Research"
    ],
    "LLM with Search": [
        "claude-3-7-sonnet-with-search",
        "perplexity-sonar-reasoning-pro",
        "perplexity-sonar-reasoning",
        "perplexity-sonar-pro",
        "gemini-2.5-pro-with-grounding",
        "gpt-4o-search-preview",
        "perplexity-sonar",
        "gpt-4.1-with-search",
        "gemini-2.5-flash-preview-04-17",
        "gpt-4o-mini-search-preview",
        "gpt-4.1-mini-with-search",
        "claude-3-5-sonnet-with-search"
    ]
}

def load_leaderboard() -> pd.DataFrame:
    if not DATA_PATH.exists():
        raise FileNotFoundError(
            f"Leaderboard file not found: {DATA_PATH}.\n"
            "→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv"
        )
    df = pd.read_csv(DATA_PATH)
    df.columns = [c.strip() for c in df.columns]
    
    def get_category(model_name):
        for category, models in MODEL_CATEGORIES.items():
            if model_name in models:
                return category
        return "Others"
    
    df['category'] = df['model'].apply(get_category)
    return df

def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
    ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True)
    ranked.insert(0, "Rank", range(1, len(ranked) + 1))
    
    # 重命名列名为简写形式
    ranked = ranked.rename(columns=COLUMN_RENAME_MAP)
    
    # 格式化数值列为两位小数
    numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.']
    for col in numeric_columns:
        if col in ranked.columns:
            ranked[col] = ranked[col].round(2)
    
    # 为 Deep Research Agent 添加 HTML 格式（加粗 + 颜色）
    ranked['model'] = ranked.apply(
        lambda row: f'<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} {row["model"]}</span>' 
                    if row['category'] == CATEGORY_TO_HIGHLIGHT 
                    else row['model'],
        axis=1
    )
    
    return ranked

def filter_data(search_text: str, selected_categories: list):
    df = load_leaderboard()
    
    if search_text.strip():
        df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)]
    
    if selected_categories:
        df = df[df['category'].isin(selected_categories)]
    
    ranked_df = make_ranked(df)
    return ranked_df

def create_leaderboard_tab():
    with gr.Tab("🏆Leaderboard"):
        with gr.Row():
            with gr.Column(scale=1):
                search_box = gr.Textbox(
                    label="Model Search", 
                    placeholder="Entering model name to search...",
                    value=""
                )
            with gr.Column(scale=2):
                category_checkboxes = gr.CheckboxGroup(
                    label="Model Categories",
                    choices=list(MODEL_CATEGORIES.keys()),
                    value=list(MODEL_CATEGORIES.keys())
                )
        
        # 初始化数据（不使用样式）
        initial_df = make_ranked(load_leaderboard())
        
        # 获取列数据类型，将 model 列设置为 html
        column_count = len(initial_df.columns)
        datatypes = ["str"] * column_count
        model_col_index = initial_df.columns.get_loc('model')
        datatypes[model_col_index] = "html"
        
        # 创建 Dataframe 组件
        table = gr.Dataframe(
            value=initial_df,
            datatype=datatypes,  # 设置数据类型，model 列为 html
            wrap=False,  # 防止文本换行
            line_breaks=False,  # 单元格内不换行
            max_height=600,  # 设置表格最大高度
            show_label=False,  # 不显示标签
            elem_id="leaderboard_table"  # 添加元素ID
        )

        def update_display(search_text, selected_categories):
            df = filter_data(search_text, selected_categories)
            return df

        # 绑定搜索框和复选框的变化事件
        search_box.change(
            fn=update_display,
            inputs=[search_box, category_checkboxes],
            outputs=table
        )
        category_checkboxes.change(
            fn=update_display,
            inputs=[search_box, category_checkboxes],
            outputs=table
        )
        
        # 在底部添加说明
        with gr.Row():
            gr.Markdown(f"""
            ### 📊 Column Descriptions
            - **Rank**: Model ranking based on overall score
            - **model**: Model name (<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}</span>)
            - **overall**: Overall Score (weighted average of all metrics)
            - **comp.**: Comprehensiveness - How thorough and complete the research is
            - **insight**: Insight Quality - Depth and value of analysis
            - **inst.**: Instruction Following - Adherence to user instructions
            - **read.**: Readability - Clarity and organization of content
            - **c.acc.**: Citation Accuracy - Correctness of references
            - **eff.c.**: Effective Citations - Relevance and quality of sources
            - **category**: Model category
            """)
        
    return search_box