Spaces:

FrontierAICybersecurity
/

Cybersecurity_leaderboard

Running

File size: 7,787 Bytes

import abc, sys
import gradio as gr

from gen_table import *
from meta_data import *

# import pandas as pd
# pd.set_option('display.max_colwidth', 0)

head_style = """
<style>
@media (min-width: 1536px)
{
    .gradio-container {
        min-width: var(--size-full) !important;
    }
}
</style>
"""

with gr.Blocks(title="Frontier AI Cybersecurity Observatory", head=
head_style) as demo:
    struct = load_results()
    timestamp = struct['time']
    EVAL_TIME = format_timestamp(timestamp)
    results = struct['results']
    benchmark_list=list(results.keys())

    N_DATA = len(benchmark_list) 
    DATASETS = benchmark_list

    gr.Markdown(LEADERBORAD_INTRODUCTION.format(N_DATA,EVAL_TIME))
    structs = [abc.abstractproperty() for _ in range(N_DATA)] 

    with gr.Tabs(elem_id="leaderboard_tabs", elem_classes='tab-buttons') as tabs:
        # with gr.TabItem('🏅 Cybersecurity Main Leaderboard', elem_id='main', id=0):
        #     gr.Markdown(LEADERBOARD_MD['MAIN'].format(N_DATA,N_DATA))
        #     _, check_box = BUILD_L1_DF(results, DEFAULT_TASK)
        #     table = generate_table(results, DEFAULT_TASK)

        #     type_map = check_box['type_map']

        #     checkbox_group = gr.CheckboxGroup(
        #         choices=check_box['all'],
        #         value=check_box['required'],
        #         label='Aspects of Cybersecurity Work',
        #         interactive=True,
        #     )

        #     headers = check_box['essential'] + checkbox_group.value
        #     with gr.Row():
        #         model_name = gr.Textbox(
        #             value='Input the Model Name (fuzzy, case insensitive)', 
        #             label='Model Name', 
        #             interactive=True,
        #             visible=True)
        #     data_component = gr.components.DataFrame(
        #         value=table[headers],
        #         type='pandas',
        #         datatype=[type_map[x] for x in headers],
        #         interactive=False,
        #         wrap=True,
        #         visible=True)

        #     def filter_df(fields, model_name):
        #         headers = check_box['essential'] + fields
        #         df = generate_table(results, fields)
                
        #         default_val = 'Input the Model Name (fuzzy, case insensitive)'
        #         if model_name != default_val:
        #             print(model_name)
        #             model_name = model_name.lower()
        #             method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
        #             flag = [model_name in name for name in method_names]
        #             df['TEMP_FLAG'] = flag
        #             df = df[df['TEMP_FLAG'] == True] 
        #             df.pop('TEMP_FLAG')

        #         comp = gr.components.DataFrame(
        #             value=df[headers],
        #             type='pandas',
        #             datatype=[type_map[x] for x in headers],
        #             interactive=False,
        #             wrap=True, 
        #             visible=True)
        #         return comp

        #     for cbox in [checkbox_group]:
        #         cbox.change(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component)
        #     model_name.submit(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component)

        with gr.TabItem('🔍 About', elem_id='about', id=1):
            with open("about.md", 'r', encoding="utf-8") as file:
                gr.Markdown(file.read())

        for i, benchmark in enumerate(benchmark_list):
            with gr.TabItem(f'📊 {benchmark} Leaderboard', elem_id=benchmark, id=i + 2):
                if benchmark in LEADERBOARD_MD:
                    gr.Markdown(LEADERBOARD_MD[benchmark])

                s = structs[i]
                s.table, s.check_box = BUILD_L2_DF(results, benchmark)
                s.type_map = s.check_box['type_map']

                s.checkbox_group = gr.CheckboxGroup(
                    choices=s.check_box['all'],
                    value=s.check_box['required'],
                    label=f'{benchmark} CheckBoxes',
                    interactive=True,
                )
                s.headers = s.check_box['essential'] + s.checkbox_group.value

                if benchmark not in ["SWE-bench-verified", "CyberGym", "BountyBench"]:
                    with gr.Row():
                        s.model_name = gr.Textbox(
                            value='Input the Model Name (fuzzy, case insensitive)', 
                            label='Model Name', 
                            interactive=True,
                            visible=True)
                else:
                    with gr.Row():
                        s.model_name = gr.Textbox(
                            value='Input the Agent Name (fuzzy, case insensitive)', 
                            label='Agent Name', 
                            interactive=True,
                            visible=True)
                s.data_component = gr.components.DataFrame(
                    value=s.table[s.headers],
                    type='pandas',
                    datatype=[s.type_map[x] for x in s.headers],
                    interactive=False,
                    wrap=True,
                    visible=True)
                s.dataset = gr.Textbox(value=benchmark, label=benchmark, visible=False)

                def filter_df_l2(dataset_name, fields, model_name):
                    s = structs[benchmark_list.index(dataset_name)]
                    headers = s.check_box['essential'] + fields
                    df = cp.deepcopy(s.table)
                    if dataset_name not in ["SWE-bench-verified", "CyberGym", "BountyBench"]:
                        default_val = 'Input the Model Name (fuzzy, case insensitive)'
                    else:
                        default_val = 'Input the Agent Name (fuzzy, case insensitive)'

                    if model_name != default_val:
                        print(model_name)
                        model_name = model_name.lower()
                        if dataset_name not in ["SWE-bench-verified", "CyberGym", "BountyBench"]:
                            method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
                        else:
                            method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
                        flag = [model_name in name for name in method_names]
                        df['TEMP_FLAG'] = flag
                        df = df[df['TEMP_FLAG'] == True] 
                        df.pop('TEMP_FLAG')

                    comp = gr.components.DataFrame(
                        value=df[headers],
                        type='pandas',
                        datatype=[s.type_map[x] for x in headers],
                        interactive=False,
                        wrap=True,
                        visible=True)
                    return comp

                for cbox in [s.checkbox_group]:
                    cbox.change(
                        fn=filter_df_l2,
                        inputs=[s.dataset, s.checkbox_group, s.model_name],
                        outputs=s.data_component)
                s.model_name.submit(
                    fn=filter_df_l2, 
                    inputs=[s.dataset, s.checkbox_group, s.model_name],
                    outputs=s.data_component)

    with gr.Row():
        with gr.Accordion('Citation', open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                elem_id='citation-button')

if __name__ == '__main__':
    demo.launch(server_name='0.0.0.0', share=True)