import os import gradio as gr from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns import pandas as pd TITLE = """

LONGCODEU leaderboard

""" LLM_BENCHMARKS_TEXT = """ Welcome to LONGCODEU leadbeboard! We introduce LONGCODEU(https://longcodeu.github.io) to comprehensively evaluate LCLMs' long code understanding ability from four aspects: code unit perception, intra-code unit understanding, inter-code unit relation understanding, and long documentation understanding. More results will be released soon. Please check the leaderboard for the latest updates. """ TASKS = ["Code Unit Perception", "Code Unit Data Flow Analysis", "Code Unit Semantic Analysis", "Dependency Relation Analysis 1", "Dependency Relation Analysis 2", "Semantic Relation Extraction 1", "Semantic Relation Extraction 2", "Long Documentation Understanding"] LEADERBOARD_DF = {} for res in os.listdir("./results"): LEADERBOARD_DF[res.replace(".csv", "")] = pd.read_csv(f"./results/{res}", dtype=str) class AutoEvalColumn: model = {"name": "Model", "type": "str", "displayed_by_default": True, "never_hidden": True} model_type = {"name": "Type", "type": "str", "displayed_by_default": False, "never_hidden": False} model_size = {"name": "Model Size", "type": "str", "displayed_by_default": False, "never_hidden": False} context_size = {"name": "Context Size", "type": "str", "displayed_by_default": False, "never_hidden": False} res_0_8 = {"name": "0~8K", "type": 'str', "displayed_by_default": True, "never_hidden": False} res_8_16 = {"name": "8~16K", "type": 'str', "displayed_by_default": True, "never_hidden": False} res_16_32 = {"name": "16~32K", "type": 'str', "displayed_by_default": True, "never_hidden": False} res_32_64 = {"name": "32~64K", "type": 'str', "displayed_by_default": True, "never_hidden": False} res_64_128 = {"name": "64~128K", "type": 'str', "displayed_by_default": True, "never_hidden": False} ### Space initialisation def init_leaderboard(dataframe): if dataframe is None or dataframe.empty: raise ValueError("Leaderboard DataFrame is empty or None.") return Leaderboard( value=dataframe, datatype=[col["type"] for col in AutoEvalColumn.__dict__.values() if isinstance(col, dict)], select_columns=SelectColumns( default_selection=[ col["name"] for col in AutoEvalColumn.__dict__.values() if isinstance(col, dict) and col["displayed_by_default"] ], cant_deselect=[ col["name"] for col in AutoEvalColumn.__dict__.values() if isinstance(col, dict) and col.get("never_hidden", False) ], label="Select Columns to Display:", ), search_columns=["Model"], filter_columns=[ ColumnFilter("Type", type="checkboxgroup", label="Type"), ColumnFilter("Context Size", type="dropdown", label="Context Size") ], interactive=False ) demo = gr.Blocks() with demo: gr.HTML(TITLE) with gr.Tabs(elem_classes="tab-buttons") as tabs: for i, task_name in enumerate(TASKS): with gr.TabItem(f"🌟 {task_name}"): leaderboard = init_leaderboard(LEADERBOARD_DF[task_name]) with gr.TabItem("📝 About"): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") demo.launch()