File size: 6,046 Bytes
71d9111 927e909 d115fb4 927e909 71d9111 927e909 71d9111 927e909 b91c8cc 927e909 71d9111 927e909 b91c8cc 71d9111 141f575 71d9111 141f575 927e909 141f575 71d9111 927e909 71d9111 927e909 71d9111 b91c8cc 927e909 71d9111 927e909 71d9111 927e909 71d9111 927e909 b91c8cc 927e909 71d9111 927e909 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
from __future__ import annotations
import gradio as gr
import pandas as pd
from pathlib import Path
from typing import Union
# 相对于主脚本的路径调整
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"
# 用于标注的常量
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
HIGHLIGHT_EMOJI = "🚀"
# 列名重命名映射
COLUMN_RENAME_MAP = {
'overall_score': 'overall',
'comprehensiveness': 'comp.',
'insight': 'insight',
'instruction_following': 'inst.',
'readability': 'read.',
'citation_accuracy': 'c.acc.',
'effective_citations': 'eff.c.'
}
# 模型分类映射
MODEL_CATEGORIES = {
"Deep Research Agent": [
"gemini-2.5-pro-deepresearch",
"grok-deeper-search",
"openai-deepresearch",
"perplexity-Research"
],
"LLM with Search": [
"claude-3-7-sonnet-with-search",
"perplexity-sonar-reasoning-pro",
"perplexity-sonar-reasoning",
"perplexity-sonar-pro",
"gemini-2.5-pro-with-grounding",
"gpt-4o-search-preview",
"perplexity-sonar",
"gpt-4.1-with-search",
"gemini-2.5-flash-preview-04-17",
"gpt-4o-mini-search-preview",
"gpt-4.1-mini-with-search",
"claude-3-5-sonnet-with-search"
]
}
def load_leaderboard() -> pd.DataFrame:
if not DATA_PATH.exists():
raise FileNotFoundError(
f"Leaderboard file not found: {DATA_PATH}.\n"
"→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv"
)
df = pd.read_csv(DATA_PATH)
df.columns = [c.strip() for c in df.columns]
def get_category(model_name):
for category, models in MODEL_CATEGORIES.items():
if model_name in models:
return category
return "Others"
df['category'] = df['model'].apply(get_category)
return df
def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True)
ranked.insert(0, "Rank", range(1, len(ranked) + 1))
# 重命名列名为简写形式
ranked = ranked.rename(columns=COLUMN_RENAME_MAP)
# 格式化数值列为两位小数
numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.']
for col in numeric_columns:
if col in ranked.columns:
ranked[col] = ranked[col].round(2)
# 为 Deep Research Agent 添加 HTML 格式(加粗 + 颜色)
ranked['model'] = ranked.apply(
lambda row: f'<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} {row["model"]}</span>'
if row['category'] == CATEGORY_TO_HIGHLIGHT
else row['model'],
axis=1
)
return ranked
def filter_data(search_text: str, selected_categories: list):
df = load_leaderboard()
if search_text.strip():
df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)]
if selected_categories:
df = df[df['category'].isin(selected_categories)]
ranked_df = make_ranked(df)
return ranked_df
def create_leaderboard_tab():
with gr.Tab("🏆Leaderboard"):
with gr.Row():
with gr.Column(scale=1):
search_box = gr.Textbox(
label="Model Search",
placeholder="Entering model name to search...",
value=""
)
with gr.Column(scale=2):
category_checkboxes = gr.CheckboxGroup(
label="Model Categories",
choices=list(MODEL_CATEGORIES.keys()),
value=list(MODEL_CATEGORIES.keys())
)
# 初始化数据(不使用样式)
initial_df = make_ranked(load_leaderboard())
# 获取列数据类型,将 model 列设置为 html
column_count = len(initial_df.columns)
datatypes = ["str"] * column_count
model_col_index = initial_df.columns.get_loc('model')
datatypes[model_col_index] = "html"
# 创建 Dataframe 组件
table = gr.Dataframe(
value=initial_df,
datatype=datatypes, # 设置数据类型,model 列为 html
wrap=False, # 防止文本换行
line_breaks=False, # 单元格内不换行
max_height=600, # 设置表格最大高度
show_label=False, # 不显示标签
elem_id="leaderboard_table" # 添加元素ID
)
def update_display(search_text, selected_categories):
df = filter_data(search_text, selected_categories)
return df
# 绑定搜索框和复选框的变化事件
search_box.change(
fn=update_display,
inputs=[search_box, category_checkboxes],
outputs=table
)
category_checkboxes.change(
fn=update_display,
inputs=[search_box, category_checkboxes],
outputs=table
)
# 在底部添加说明
with gr.Row():
gr.Markdown(f"""
### 📊 Column Descriptions
- **Rank**: Model ranking based on overall score
- **model**: Model name (<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}</span>)
- **overall**: Overall Score (weighted average of all metrics)
- **comp.**: Comprehensiveness - How thorough and complete the research is
- **insight**: Insight Quality - Depth and value of analysis
- **inst.**: Instruction Following - Adherence to user instructions
- **read.**: Readability - Clarity and organization of content
- **c.acc.**: Citation Accuracy - Correctness of references
- **eff.c.**: Effective Citations - Relevance and quality of sources
- **category**: Model category
""")
return search_box |