File size: 6,046 Bytes
71d9111
927e909
 
 
d115fb4
927e909
 
 
 
 
71d9111
927e909
71d9111
927e909
b91c8cc
927e909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71d9111
927e909
 
 
 
 
 
 
 
 
 
 
b91c8cc
 
 
 
 
 
71d9111
 
 
 
 
 
 
141f575
71d9111
141f575
927e909
 
 
 
 
 
 
 
 
141f575
71d9111
927e909
 
 
 
71d9111
 
 
 
 
 
 
 
 
 
 
 
927e909
71d9111
b91c8cc
927e909
71d9111
 
 
 
 
 
 
927e909
71d9111
 
 
 
 
 
 
927e909
 
 
71d9111
 
927e909
b91c8cc
927e909
 
 
 
 
 
 
 
 
 
 
71d9111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
927e909
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
from __future__ import annotations
import gradio as gr
import pandas as pd
from pathlib import Path
from typing import Union

# 相对于主脚本的路径调整
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"

# 用于标注的常量
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
HIGHLIGHT_EMOJI = "🚀"

# 列名重命名映射
COLUMN_RENAME_MAP = {
    'overall_score': 'overall',
    'comprehensiveness': 'comp.',
    'insight': 'insight',
    'instruction_following': 'inst.',
    'readability': 'read.',
    'citation_accuracy': 'c.acc.',
    'effective_citations': 'eff.c.'
}

# 模型分类映射
MODEL_CATEGORIES = {
    "Deep Research Agent": [
        "gemini-2.5-pro-deepresearch",
        "grok-deeper-search",
        "openai-deepresearch", 
        "perplexity-Research"
    ],
    "LLM with Search": [
        "claude-3-7-sonnet-with-search",
        "perplexity-sonar-reasoning-pro",
        "perplexity-sonar-reasoning",
        "perplexity-sonar-pro",
        "gemini-2.5-pro-with-grounding",
        "gpt-4o-search-preview",
        "perplexity-sonar",
        "gpt-4.1-with-search",
        "gemini-2.5-flash-preview-04-17",
        "gpt-4o-mini-search-preview",
        "gpt-4.1-mini-with-search",
        "claude-3-5-sonnet-with-search"
    ]
}

def load_leaderboard() -> pd.DataFrame:
    if not DATA_PATH.exists():
        raise FileNotFoundError(
            f"Leaderboard file not found: {DATA_PATH}.\n"
            "→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv"
        )
    df = pd.read_csv(DATA_PATH)
    df.columns = [c.strip() for c in df.columns]
    
    def get_category(model_name):
        for category, models in MODEL_CATEGORIES.items():
            if model_name in models:
                return category
        return "Others"
    
    df['category'] = df['model'].apply(get_category)
    return df

def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
    ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True)
    ranked.insert(0, "Rank", range(1, len(ranked) + 1))
    
    # 重命名列名为简写形式
    ranked = ranked.rename(columns=COLUMN_RENAME_MAP)
    
    # 格式化数值列为两位小数
    numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.']
    for col in numeric_columns:
        if col in ranked.columns:
            ranked[col] = ranked[col].round(2)
    
    # 为 Deep Research Agent 添加 HTML 格式(加粗 + 颜色)
    ranked['model'] = ranked.apply(
        lambda row: f'<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} {row["model"]}</span>' 
                    if row['category'] == CATEGORY_TO_HIGHLIGHT 
                    else row['model'],
        axis=1
    )
    
    return ranked

def filter_data(search_text: str, selected_categories: list):
    df = load_leaderboard()
    
    if search_text.strip():
        df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)]
    
    if selected_categories:
        df = df[df['category'].isin(selected_categories)]
    
    ranked_df = make_ranked(df)
    return ranked_df

def create_leaderboard_tab():
    with gr.Tab("🏆Leaderboard"):
        with gr.Row():
            with gr.Column(scale=1):
                search_box = gr.Textbox(
                    label="Model Search", 
                    placeholder="Entering model name to search...",
                    value=""
                )
            with gr.Column(scale=2):
                category_checkboxes = gr.CheckboxGroup(
                    label="Model Categories",
                    choices=list(MODEL_CATEGORIES.keys()),
                    value=list(MODEL_CATEGORIES.keys())
                )
        
        # 初始化数据(不使用样式)
        initial_df = make_ranked(load_leaderboard())
        
        # 获取列数据类型,将 model 列设置为 html
        column_count = len(initial_df.columns)
        datatypes = ["str"] * column_count
        model_col_index = initial_df.columns.get_loc('model')
        datatypes[model_col_index] = "html"
        
        # 创建 Dataframe 组件
        table = gr.Dataframe(
            value=initial_df,
            datatype=datatypes,  # 设置数据类型,model 列为 html
            wrap=False,  # 防止文本换行
            line_breaks=False,  # 单元格内不换行
            max_height=600,  # 设置表格最大高度
            show_label=False,  # 不显示标签
            elem_id="leaderboard_table"  # 添加元素ID
        )

        def update_display(search_text, selected_categories):
            df = filter_data(search_text, selected_categories)
            return df

        # 绑定搜索框和复选框的变化事件
        search_box.change(
            fn=update_display,
            inputs=[search_box, category_checkboxes],
            outputs=table
        )
        category_checkboxes.change(
            fn=update_display,
            inputs=[search_box, category_checkboxes],
            outputs=table
        )
        
        # 在底部添加说明
        with gr.Row():
            gr.Markdown(f"""
            ### 📊 Column Descriptions
            - **Rank**: Model ranking based on overall score
            - **model**: Model name (<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}</span>)
            - **overall**: Overall Score (weighted average of all metrics)
            - **comp.**: Comprehensiveness - How thorough and complete the research is
            - **insight**: Insight Quality - Depth and value of analysis
            - **inst.**: Instruction Following - Adherence to user instructions
            - **read.**: Readability - Clarity and organization of content
            - **c.acc.**: Citation Accuracy - Correctness of references
            - **eff.c.**: Effective Citations - Relevance and quality of sources
            - **category**: Model category
            """)
        
    return search_box