Spaces:
Running
Running
File size: 2,809 Bytes
8664fba 1783518 8664fba 1783518 8664fba 5b0a5d3 8664fba 1783518 8664fba 1783518 8664fba 1783518 8664fba 1783518 8664fba 356b0eb 8664fba 108bc02 8664fba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import gradio as gr
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
import config
from envs import RESULTS_REPO_ID
from pathlib import Path
import pandas as pd
import os
from utils import parse_json_files, create_scatter_plot
from huggingface_hub import snapshot_download
abs_path = Path(__file__).parent
with gr.Blocks() as demo:
gr.Markdown("""
# 🥇 Agent Leaderboard
""")
df = parse_json_files(os.path.join(abs_path, "evals"))
with gr.Tabs():
with gr.Tab("SWE-Bench"):
with gr.Row():
with gr.Column(scale=1):
scatter_plot = gr.Plot(create_scatter_plot(df, "results_total_cost", "results_accuracy", "Cost", "Accuracy", ["agent_name"]))
with gr.Column(scale=1):
Leaderboard(
value=df,
select_columns=SelectColumns(
default_selection=config.SWEBENCH_ON_LOAD_COLUMNS,
cant_deselect=["agent_name"],
label="Select Columns to Display:",
),
search_columns=config.SWEBENCH_SEARCH_COLUMNS,
column_widths={"agent_name": 40,
"results_accuracy": 20,
"results_total_cost": 20},
)
with gr.Tab("USACO"):
with gr.Row():
with gr.Column(scale=1):
scatter_plot = gr.Plot(create_scatter_plot(df, "results_total_cost", "results_accuracy", "Cost", "Accuracy", ["agent_name"]))
with gr.Column(scale=1):
Leaderboard(
value=df,
select_columns=SelectColumns(
default_selection=config.SWEBENCH_ON_LOAD_COLUMNS,
cant_deselect=["agent_name"],
label="Select Columns to Display:",
),
search_columns=config.SWEBENCH_SEARCH_COLUMNS,
column_widths={"agent_name": 40,
"results_accuracy": 20,
"results_total_cost": 20},
)
with gr.Tab("About"):
gr.Markdown((Path(__file__).parent / "about.md").read_text())
if __name__ == "__main__":
# Download the results from the Hugging Face Hub
snapshot_download(RESULTS_REPO_ID,
local_dir=abs_path / "evals",
repo_type='dataset',
tqdm_class=None,
etag_timeout=30,
max_workers=4,
)
demo.launch() |