|
import abc, sys
|
|
import gradio as gr
|
|
|
|
from gen_table import *
|
|
from meta_data import *
|
|
|
|
|
|
|
|
|
|
head_style = """
|
|
<style>
|
|
@media (min-width: 1536px)
|
|
{
|
|
.gradio-container {
|
|
min-width: var(--size-full) !important;
|
|
}
|
|
}
|
|
</style>
|
|
"""
|
|
|
|
with gr.Blocks(title="Cybersecurity Leaderboard", head=
|
|
head_style) as demo:
|
|
struct = load_results()
|
|
timestamp = struct['time']
|
|
EVAL_TIME = format_timestamp(timestamp)
|
|
results = struct['results']
|
|
benchmark_list=list(results.keys())
|
|
|
|
N_DATA = len(benchmark_list)
|
|
DATASETS = benchmark_list
|
|
|
|
gr.Markdown(LEADERBORAD_INTRODUCTION.format(N_DATA,EVAL_TIME))
|
|
structs = [abc.abstractproperty() for _ in range(N_DATA)]
|
|
|
|
with gr.Tabs(elem_id="leaderboard_tabs", elem_classes='tab-buttons') as tabs:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.TabItem('π About', elem_id='about', id=1):
|
|
with open("about.md", 'r', encoding="utf-8") as file:
|
|
gr.Markdown(file.read())
|
|
|
|
for i, benchmark in enumerate(benchmark_list):
|
|
with gr.TabItem(f'π {benchmark} Leaderboard', elem_id=benchmark, id=i + 2):
|
|
if benchmark in LEADERBOARD_MD:
|
|
gr.Markdown(LEADERBOARD_MD[benchmark])
|
|
|
|
s = structs[i]
|
|
s.table, s.check_box = BUILD_L2_DF(results, benchmark)
|
|
s.type_map = s.check_box['type_map']
|
|
|
|
s.checkbox_group = gr.CheckboxGroup(
|
|
choices=s.check_box['all'],
|
|
value=s.check_box['required'],
|
|
label=f'{benchmark} CheckBoxes',
|
|
interactive=True,
|
|
)
|
|
s.headers = s.check_box['essential'] + s.checkbox_group.value
|
|
|
|
if benchmark!='SWE-bench-verified':
|
|
with gr.Row():
|
|
s.model_name = gr.Textbox(
|
|
value='Input the Model Name (fuzzy, case insensitive)',
|
|
label='Model Name',
|
|
interactive=True,
|
|
visible=True)
|
|
else:
|
|
with gr.Row():
|
|
s.model_name = gr.Textbox(
|
|
value='Input the Agent Name (fuzzy, case insensitive)',
|
|
label='Agent Name',
|
|
interactive=True,
|
|
visible=True)
|
|
s.data_component = gr.components.DataFrame(
|
|
value=s.table[s.headers],
|
|
type='pandas',
|
|
datatype=[s.type_map[x] for x in s.headers],
|
|
interactive=False,
|
|
wrap=True,
|
|
visible=True)
|
|
s.dataset = gr.Textbox(value=benchmark, label=benchmark, visible=False)
|
|
|
|
def filter_df_l2(dataset_name, fields, model_name):
|
|
s = structs[benchmark_list.index(dataset_name)]
|
|
headers = s.check_box['essential'] + fields
|
|
df = cp.deepcopy(s.table)
|
|
if dataset_name!="SWE-bench-verified":
|
|
default_val = 'Input the Model Name (fuzzy, case insensitive)'
|
|
else:
|
|
default_val = 'Input the Agent Name (fuzzy, case insensitive)'
|
|
|
|
if model_name != default_val:
|
|
print(model_name)
|
|
model_name = model_name.lower()
|
|
if dataset_name!="SWE-bench-verified":
|
|
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
|
|
else:
|
|
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
|
|
flag = [model_name in name for name in method_names]
|
|
df['TEMP_FLAG'] = flag
|
|
df = df[df['TEMP_FLAG'] == True]
|
|
df.pop('TEMP_FLAG')
|
|
|
|
comp = gr.components.DataFrame(
|
|
value=df[headers],
|
|
type='pandas',
|
|
datatype=[s.type_map[x] for x in headers],
|
|
interactive=False,
|
|
wrap=True,
|
|
visible=True)
|
|
return comp
|
|
|
|
for cbox in [s.checkbox_group]:
|
|
cbox.change(
|
|
fn=filter_df_l2,
|
|
inputs=[s.dataset, s.checkbox_group, s.model_name],
|
|
outputs=s.data_component)
|
|
s.model_name.submit(
|
|
fn=filter_df_l2,
|
|
inputs=[s.dataset, s.checkbox_group, s.model_name],
|
|
outputs=s.data_component)
|
|
|
|
with gr.Row():
|
|
with gr.Accordion('Citation', open=False):
|
|
citation_button = gr.Textbox(
|
|
value=CITATION_BUTTON_TEXT,
|
|
label=CITATION_BUTTON_LABEL,
|
|
elem_id='citation-button')
|
|
|
|
if __name__ == '__main__':
|
|
demo.launch(server_name='0.0.0.0', share=True)
|
|
|