from __future__ import annotations import gradio as gr import pandas as pd from pathlib import Path from typing import Union # 相对于主脚本的路径调整 BASE_DIR = Path(__file__).resolve().parent.parent DATA_PATH = BASE_DIR / "data" / "leaderboard.csv" # 用于标注的常量 CATEGORY_TO_HIGHLIGHT = "Deep Research Agent" HIGHLIGHT_EMOJI = "🚀" # 列名重命名映射 COLUMN_RENAME_MAP = { 'overall_score': 'overall', 'comprehensiveness': 'comp.', 'insight': 'insight', 'instruction_following': 'inst.', 'readability': 'read.', 'citation_accuracy': 'c.acc.', 'effective_citations': 'eff.c.' } # 模型分类映射 MODEL_CATEGORIES = { "Deep Research Agent": [ "gemini-2.5-pro-deepresearch", "grok-deeper-search", "openai-deepresearch", "perplexity-Research" ], "LLM with Search": [ "claude-3-7-sonnet-with-search", "perplexity-sonar-reasoning-pro", "perplexity-sonar-reasoning", "perplexity-sonar-pro", "gemini-2.5-pro-with-grounding", "gpt-4o-search-preview", "perplexity-sonar", "gpt-4.1-with-search", "gemini-2.5-flash-preview-04-17", "gpt-4o-mini-search-preview", "gpt-4.1-mini-with-search", "claude-3-5-sonnet-with-search" ] } def load_leaderboard() -> pd.DataFrame: if not DATA_PATH.exists(): raise FileNotFoundError( f"Leaderboard file not found: {DATA_PATH}.\n" "→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv" ) df = pd.read_csv(DATA_PATH) df.columns = [c.strip() for c in df.columns] def get_category(model_name): for category, models in MODEL_CATEGORIES.items(): if model_name in models: return category return "Others" df['category'] = df['model'].apply(get_category) return df def make_ranked(df: pd.DataFrame) -> pd.DataFrame: ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True) ranked.insert(0, "Rank", range(1, len(ranked) + 1)) # 重命名列名为简写形式 ranked = ranked.rename(columns=COLUMN_RENAME_MAP) # 格式化数值列为两位小数 numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.'] for col in numeric_columns: if col in ranked.columns: ranked[col] = ranked[col].round(2) # 为 Deep Research Agent 添加 HTML 格式(加粗 + 颜色) ranked['model'] = ranked.apply( lambda row: f'{HIGHLIGHT_EMOJI} {row["model"]}' if row['category'] == CATEGORY_TO_HIGHLIGHT else row['model'], axis=1 ) return ranked def filter_data(search_text: str, selected_categories: list): df = load_leaderboard() if search_text.strip(): df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)] if selected_categories: df = df[df['category'].isin(selected_categories)] ranked_df = make_ranked(df) return ranked_df def create_leaderboard_tab(): with gr.Tab("🏆Leaderboard"): with gr.Row(): with gr.Column(scale=1): search_box = gr.Textbox( label="Model Search", placeholder="Entering model name to search...", value="" ) with gr.Column(scale=2): category_checkboxes = gr.CheckboxGroup( label="Model Categories", choices=list(MODEL_CATEGORIES.keys()), value=list(MODEL_CATEGORIES.keys()) ) # 初始化数据(不使用样式) initial_df = make_ranked(load_leaderboard()) # 获取列数据类型,将 model 列设置为 html column_count = len(initial_df.columns) datatypes = ["str"] * column_count model_col_index = initial_df.columns.get_loc('model') datatypes[model_col_index] = "html" # 创建 Dataframe 组件 table = gr.Dataframe( value=initial_df, datatype=datatypes, # 设置数据类型,model 列为 html wrap=False, # 防止文本换行 line_breaks=False, # 单元格内不换行 max_height=600, # 设置表格最大高度 show_label=False, # 不显示标签 elem_id="leaderboard_table" # 添加元素ID ) def update_display(search_text, selected_categories): df = filter_data(search_text, selected_categories) return df # 绑定搜索框和复选框的变化事件 search_box.change( fn=update_display, inputs=[search_box, category_checkboxes], outputs=table ) category_checkboxes.change( fn=update_display, inputs=[search_box, category_checkboxes], outputs=table ) # 在底部添加说明 with gr.Row(): gr.Markdown(f""" ### 📊 Column Descriptions - **Rank**: Model ranking based on overall score - **model**: Model name ({HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}) - **overall**: Overall Score (weighted average of all metrics) - **comp.**: Comprehensiveness - How thorough and complete the research is - **insight**: Insight Quality - Depth and value of analysis - **inst.**: Instruction Following - Adherence to user instructions - **read.**: Readability - Clarity and organization of content - **c.acc.**: Citation Accuracy - Correctness of references - **eff.c.**: Effective Citations - Relevance and quality of sources - **category**: Model category """) return search_box