File size: 2,809 Bytes
8664fba
 
 
1783518
8664fba
 
 
 
1783518
8664fba
 
 
 
 
 
 
5b0a5d3
8664fba
 
 
 
 
1783518
8664fba
 
 
 
 
 
 
 
 
 
 
1783518
8664fba
 
 
 
1783518
8664fba
 
 
 
 
 
 
 
 
 
 
1783518
8664fba
 
356b0eb
8664fba
 
108bc02
 
 
 
 
 
 
 
8664fba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
import config
from envs import RESULTS_REPO_ID
from pathlib import Path
import pandas as pd
import os
from utils import parse_json_files, create_scatter_plot
from huggingface_hub import snapshot_download

abs_path = Path(__file__).parent

with gr.Blocks() as demo:
    gr.Markdown("""
    # 🥇 Agent Leaderboard
    """)
    df = parse_json_files(os.path.join(abs_path, "evals"))
    
    with gr.Tabs():
        with gr.Tab("SWE-Bench"):
            with gr.Row():
                with gr.Column(scale=1):
                    scatter_plot = gr.Plot(create_scatter_plot(df, "results_total_cost", "results_accuracy", "Cost", "Accuracy", ["agent_name"]))
                with gr.Column(scale=1):
                    Leaderboard(
                        value=df,
                        select_columns=SelectColumns(
                            default_selection=config.SWEBENCH_ON_LOAD_COLUMNS,
                            cant_deselect=["agent_name"],
                            label="Select Columns to Display:",
                        ),
                        search_columns=config.SWEBENCH_SEARCH_COLUMNS,
                        column_widths={"agent_name": 40,
                                       "results_accuracy": 20,
                                       "results_total_cost": 20},
                    )
        with gr.Tab("USACO"):
            with gr.Row():
                with gr.Column(scale=1):
                    scatter_plot = gr.Plot(create_scatter_plot(df, "results_total_cost", "results_accuracy", "Cost", "Accuracy", ["agent_name"]))
                with gr.Column(scale=1):
                    Leaderboard(
                        value=df,
                        select_columns=SelectColumns(
                            default_selection=config.SWEBENCH_ON_LOAD_COLUMNS,
                            cant_deselect=["agent_name"],
                            label="Select Columns to Display:",
                        ),
                        search_columns=config.SWEBENCH_SEARCH_COLUMNS,
                        column_widths={"agent_name": 40,
                                       "results_accuracy": 20,
                                       "results_total_cost": 20},
                    )
        with gr.Tab("About"):
            gr.Markdown((Path(__file__).parent / "about.md").read_text())

if __name__ == "__main__":
    # Download the results from the Hugging Face Hub
    snapshot_download(RESULTS_REPO_ID, 
                    local_dir=abs_path / "evals",
                        repo_type='dataset',
                        tqdm_class=None,
                        etag_timeout=30,
                        max_workers=4,
                    )
    demo.launch()