core_leaderboard

Running

File size: 2,809 Bytes

import gradio as gr
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
import config
from envs import RESULTS_REPO_ID
from pathlib import Path
import pandas as pd
import os
from utils import parse_json_files, create_scatter_plot
from huggingface_hub import snapshot_download

abs_path = Path(__file__).parent

with gr.Blocks() as demo:
    gr.Markdown("""
    # 🥇 Agent Leaderboard
    """)
    df = parse_json_files(os.path.join(abs_path, "evals"))
    
    with gr.Tabs():
        with gr.Tab("SWE-Bench"):
            with gr.Row():
                with gr.Column(scale=1):
                    scatter_plot = gr.Plot(create_scatter_plot(df, "results_total_cost", "results_accuracy", "Cost", "Accuracy", ["agent_name"]))
                with gr.Column(scale=1):
                    Leaderboard(
                        value=df,
                        select_columns=SelectColumns(
                            default_selection=config.SWEBENCH_ON_LOAD_COLUMNS,
                            cant_deselect=["agent_name"],
                            label="Select Columns to Display:",
                        ),
                        search_columns=config.SWEBENCH_SEARCH_COLUMNS,
                        column_widths={"agent_name": 40,
                                       "results_accuracy": 20,
                                       "results_total_cost": 20},
                    )
        with gr.Tab("USACO"):
            with gr.Row():
                with gr.Column(scale=1):
                    scatter_plot = gr.Plot(create_scatter_plot(df, "results_total_cost", "results_accuracy", "Cost", "Accuracy", ["agent_name"]))
                with gr.Column(scale=1):
                    Leaderboard(
                        value=df,
                        select_columns=SelectColumns(
                            default_selection=config.SWEBENCH_ON_LOAD_COLUMNS,
                            cant_deselect=["agent_name"],
                            label="Select Columns to Display:",
                        ),
                        search_columns=config.SWEBENCH_SEARCH_COLUMNS,
                        column_widths={"agent_name": 40,
                                       "results_accuracy": 20,
                                       "results_total_cost": 20},
                    )
        with gr.Tab("About"):
            gr.Markdown((Path(__file__).parent / "about.md").read_text())

if __name__ == "__main__":
    # Download the results from the Hugging Face Hub
    snapshot_download(RESULTS_REPO_ID, 
                    local_dir=abs_path / "evals",
                        repo_type='dataset',
                        tqdm_class=None,
                        etag_timeout=30,
                        max_workers=4,
                    )
    demo.launch()