|
from __future__ import annotations |
|
import gradio as gr |
|
import pandas as pd |
|
from pathlib import Path |
|
from typing import Union |
|
|
|
|
|
BASE_DIR = Path(__file__).resolve().parent.parent |
|
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv" |
|
|
|
|
|
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent" |
|
HIGHLIGHT_EMOJI = "🚀" |
|
|
|
|
|
COLUMN_RENAME_MAP = { |
|
'overall_score': 'overall', |
|
'comprehensiveness': 'comp.', |
|
'insight': 'insight', |
|
'instruction_following': 'inst.', |
|
'readability': 'read.', |
|
'citation_accuracy': 'c.acc.', |
|
'effective_citations': 'eff.c.' |
|
} |
|
|
|
|
|
MODEL_CATEGORIES = { |
|
"Deep Research Agent": [ |
|
"gemini-2.5-pro-deepresearch", |
|
"grok-deeper-search", |
|
"openai-deepresearch", |
|
"perplexity-Research" |
|
], |
|
"LLM with Search": [ |
|
"claude-3-7-sonnet-with-search", |
|
"perplexity-sonar-reasoning-pro", |
|
"perplexity-sonar-reasoning", |
|
"perplexity-sonar-pro", |
|
"gemini-2.5-pro-with-grounding", |
|
"gpt-4o-search-preview", |
|
"perplexity-sonar", |
|
"gpt-4.1-with-search", |
|
"gemini-2.5-flash-preview-04-17", |
|
"gpt-4o-mini-search-preview", |
|
"gpt-4.1-mini-with-search", |
|
"claude-3-5-sonnet-with-search" |
|
] |
|
} |
|
|
|
def load_leaderboard() -> pd.DataFrame: |
|
if not DATA_PATH.exists(): |
|
raise FileNotFoundError( |
|
f"Leaderboard file not found: {DATA_PATH}.\n" |
|
"→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv" |
|
) |
|
df = pd.read_csv(DATA_PATH) |
|
df.columns = [c.strip() for c in df.columns] |
|
|
|
def get_category(model_name): |
|
for category, models in MODEL_CATEGORIES.items(): |
|
if model_name in models: |
|
return category |
|
return "Others" |
|
|
|
df['category'] = df['model'].apply(get_category) |
|
return df |
|
|
|
def make_ranked(df: pd.DataFrame) -> pd.DataFrame: |
|
ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True) |
|
ranked.insert(0, "Rank", range(1, len(ranked) + 1)) |
|
|
|
|
|
ranked = ranked.rename(columns=COLUMN_RENAME_MAP) |
|
|
|
|
|
numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.'] |
|
for col in numeric_columns: |
|
if col in ranked.columns: |
|
ranked[col] = ranked[col].round(2) |
|
|
|
|
|
ranked['model'] = ranked.apply( |
|
lambda row: f'<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} {row["model"]}</span>' |
|
if row['category'] == CATEGORY_TO_HIGHLIGHT |
|
else row['model'], |
|
axis=1 |
|
) |
|
|
|
return ranked |
|
|
|
def filter_data(search_text: str, selected_categories: list): |
|
df = load_leaderboard() |
|
|
|
if search_text.strip(): |
|
df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)] |
|
|
|
if selected_categories: |
|
df = df[df['category'].isin(selected_categories)] |
|
|
|
ranked_df = make_ranked(df) |
|
return ranked_df |
|
|
|
def create_leaderboard_tab(): |
|
with gr.Tab("🏆Leaderboard"): |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
search_box = gr.Textbox( |
|
label="Model Search", |
|
placeholder="Entering model name to search...", |
|
value="" |
|
) |
|
with gr.Column(scale=2): |
|
category_checkboxes = gr.CheckboxGroup( |
|
label="Model Categories", |
|
choices=list(MODEL_CATEGORIES.keys()), |
|
value=list(MODEL_CATEGORIES.keys()) |
|
) |
|
|
|
|
|
initial_df = make_ranked(load_leaderboard()) |
|
|
|
|
|
column_count = len(initial_df.columns) |
|
datatypes = ["str"] * column_count |
|
model_col_index = initial_df.columns.get_loc('model') |
|
datatypes[model_col_index] = "html" |
|
|
|
|
|
table = gr.Dataframe( |
|
value=initial_df, |
|
datatype=datatypes, |
|
wrap=False, |
|
line_breaks=False, |
|
max_height=600, |
|
show_label=False, |
|
elem_id="leaderboard_table" |
|
) |
|
|
|
def update_display(search_text, selected_categories): |
|
df = filter_data(search_text, selected_categories) |
|
return df |
|
|
|
|
|
search_box.change( |
|
fn=update_display, |
|
inputs=[search_box, category_checkboxes], |
|
outputs=table |
|
) |
|
category_checkboxes.change( |
|
fn=update_display, |
|
inputs=[search_box, category_checkboxes], |
|
outputs=table |
|
) |
|
|
|
|
|
with gr.Row(): |
|
gr.Markdown(f""" |
|
### 📊 Column Descriptions |
|
- **Rank**: Model ranking based on overall score |
|
- **model**: Model name (<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}</span>) |
|
- **overall**: Overall Score (weighted average of all metrics) |
|
- **comp.**: Comprehensiveness - How thorough and complete the research is |
|
- **insight**: Insight Quality - Depth and value of analysis |
|
- **inst.**: Instruction Following - Adherence to user instructions |
|
- **read.**: Readability - Clarity and organization of content |
|
- **c.acc.**: Citation Accuracy - Correctness of references |
|
- **eff.c.**: Effective Citations - Relevance and quality of sources |
|
- **category**: Model category |
|
""") |
|
|
|
return search_box |