import abc, sys import gradio as gr from gen_table import * from meta_data import * # import pandas as pd # pd.set_option('display.max_colwidth', 0) head_style = """ """ with gr.Blocks(title="Frontier AI Cybersecurity Observatory", head= head_style) as demo: struct = load_results() timestamp = struct['time'] EVAL_TIME = format_timestamp(timestamp) results = struct['results'] benchmark_list=list(results.keys()) N_DATA = len(benchmark_list) DATASETS = benchmark_list gr.Markdown(LEADERBORAD_INTRODUCTION.format(N_DATA,EVAL_TIME)) structs = [abc.abstractproperty() for _ in range(N_DATA)] with gr.Tabs(elem_id="leaderboard_tabs", elem_classes='tab-buttons') as tabs: # with gr.TabItem('🏅 Cybersecurity Main Leaderboard', elem_id='main', id=0): # gr.Markdown(LEADERBOARD_MD['MAIN'].format(N_DATA,N_DATA)) # _, check_box = BUILD_L1_DF(results, DEFAULT_TASK) # table = generate_table(results, DEFAULT_TASK) # type_map = check_box['type_map'] # checkbox_group = gr.CheckboxGroup( # choices=check_box['all'], # value=check_box['required'], # label='Aspects of Cybersecurity Work', # interactive=True, # ) # headers = check_box['essential'] + checkbox_group.value # with gr.Row(): # model_name = gr.Textbox( # value='Input the Model Name (fuzzy, case insensitive)', # label='Model Name', # interactive=True, # visible=True) # data_component = gr.components.DataFrame( # value=table[headers], # type='pandas', # datatype=[type_map[x] for x in headers], # interactive=False, # wrap=True, # visible=True) # def filter_df(fields, model_name): # headers = check_box['essential'] + fields # df = generate_table(results, fields) # default_val = 'Input the Model Name (fuzzy, case insensitive)' # if model_name != default_val: # print(model_name) # model_name = model_name.lower() # method_names = [x.split('')[0].split('>')[-1].lower() for x in df['Model']] # flag = [model_name in name for name in method_names] # df['TEMP_FLAG'] = flag # df = df[df['TEMP_FLAG'] == True] # df.pop('TEMP_FLAG') # comp = gr.components.DataFrame( # value=df[headers], # type='pandas', # datatype=[type_map[x] for x in headers], # interactive=False, # wrap=True, # visible=True) # return comp # for cbox in [checkbox_group]: # cbox.change(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component) # model_name.submit(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component) with gr.TabItem('🔍 About', elem_id='about', id=1): with open("about.md", 'r', encoding="utf-8") as file: gr.Markdown(file.read()) for i, benchmark in enumerate(benchmark_list): with gr.TabItem(f'📊 {benchmark} Leaderboard', elem_id=benchmark, id=i + 2): if benchmark in LEADERBOARD_MD: gr.Markdown(LEADERBOARD_MD[benchmark]) s = structs[i] s.table, s.check_box = BUILD_L2_DF(results, benchmark) s.type_map = s.check_box['type_map'] s.checkbox_group = gr.CheckboxGroup( choices=s.check_box['all'], value=s.check_box['required'], label=f'{benchmark} CheckBoxes', interactive=True, ) s.headers = s.check_box['essential'] + s.checkbox_group.value if benchmark not in ["SWE-bench-verified", "CyberGym", "BountyBench"]: with gr.Row(): s.model_name = gr.Textbox( value='Input the Model Name (fuzzy, case insensitive)', label='Model Name', interactive=True, visible=True) else: with gr.Row(): s.model_name = gr.Textbox( value='Input the Agent Name (fuzzy, case insensitive)', label='Agent Name', interactive=True, visible=True) s.data_component = gr.components.DataFrame( value=s.table[s.headers], type='pandas', datatype=[s.type_map[x] for x in s.headers], interactive=False, wrap=True, visible=True) s.dataset = gr.Textbox(value=benchmark, label=benchmark, visible=False) def filter_df_l2(dataset_name, fields, model_name): s = structs[benchmark_list.index(dataset_name)] headers = s.check_box['essential'] + fields df = cp.deepcopy(s.table) if dataset_name not in ["SWE-bench-verified", "CyberGym", "BountyBench"]: default_val = 'Input the Model Name (fuzzy, case insensitive)' else: default_val = 'Input the Agent Name (fuzzy, case insensitive)' if model_name != default_val: print(model_name) model_name = model_name.lower() if dataset_name not in ["SWE-bench-verified", "CyberGym", "BountyBench"]: method_names = [x.split('')[0].split('>')[-1].lower() for x in df['Model']] else: method_names = [x.split('')[0].split('>')[-1].lower() for x in df['Agent']] flag = [model_name in name for name in method_names] df['TEMP_FLAG'] = flag df = df[df['TEMP_FLAG'] == True] df.pop('TEMP_FLAG') comp = gr.components.DataFrame( value=df[headers], type='pandas', datatype=[s.type_map[x] for x in headers], interactive=False, wrap=True, visible=True) return comp for cbox in [s.checkbox_group]: cbox.change( fn=filter_df_l2, inputs=[s.dataset, s.checkbox_group, s.model_name], outputs=s.data_component) s.model_name.submit( fn=filter_df_l2, inputs=[s.dataset, s.checkbox_group, s.model_name], outputs=s.data_component) with gr.Row(): with gr.Accordion('Citation', open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id='citation-button') if __name__ == '__main__': demo.launch(server_name='0.0.0.0', share=True)