Spaces:

YLab-Open
/

BRIDGE-Medical-Leaderboard

Running

App Files Files Community

kevinxie06 commited on Jun 3

Commit

2c0cfba

verified ·

1 Parent(s): 38047f0

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -923

app.py DELETED Viewed

@@ -1,923 +0,0 @@
-import gradio as gr
-from gradio_leaderboard import Leaderboard, SelectColumns, SearchColumns
-import config
-from pathlib import Path
-import pandas as pd
-import json
-import warnings
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, Literal
-import pandas as pd
-from pandas.io.formats.style import Styler
-import semantic_version
-from dataclasses import dataclass, field
-from gradio.components import Component
-from gradio.data_classes import GradioModel
-from gradio.events import Events
-@dataclass
-class SelectColumns:
-    default_selection: Optional[list[str]] = field(default_factory=list)
-    cant_deselect: Optional[list[str]] = field(default_factory=list)
-    allow: bool = True
-    label: Optional[str] = None
-    show_label: bool = True
-    info: Optional[str] = None
-@dataclass
-class ColumnFilter:
-    column: str
-    type: Literal["slider", "dropdown", "checkboxgroup", "boolean"] = None
-    default: Optional[Union[int, float, List[Tuple[str, str]]]] = None
-    choices: Optional[Union[int, float, List[Tuple[str, str]]]] = None
-    label: Optional[str] = None
-    info: Optional[str] = None
-    show_label: bool = True
-    min: Optional[Union[int, float]] = None
-    max: Optional[Union[int, float]] = None
-class DataframeData(GradioModel):
-    headers: List[str]
-    data: Union[List[List[Any]], List[Tuple[Any, ...]]]
-    metadata: Optional[Dict[str, Optional[List[Any]]]] = None
-abs_path = Path(__file__).parent
-# Load the leaderboard data for
-zero_shot_df = pd.read_json("leaderboards/Zero-Shot_leaderboard_data.json", precise_float=True)
-five_shot_df = pd.read_json("leaderboards/Few-Shot_leaderboard_data.json", precise_float=True)
-cot_df = pd.read_json("leaderboards/CoT_leaderboard_data.json", precise_float=True)
-# Original Average Performances
-original_zero_shot_avg_perf = zero_shot_df["Average Performance"]
-original_five_shot_avg_perf = five_shot_df["Average Performance"]
-original_cot_avg_perf = cot_df["Average Performance"]
-# Load the task information json data
-with open("task_information.json", 'r') as file:
-    task_information_json = json.load(file)
-cot_currently_selected_filters = {
-    "Language": [],
-    "Task Type": [],
-    "Clinical Context": [],
-    "Data Access": [],
-    "Applications": [],
-    "Clinical Stage": []
-}
-five_shot_currently_selected_filters = {
-    "Language": [],
-    "Task Type": [],
-    "Clinical Context": [],
-    "Data Access": [],
-    "Applications": [],
-    "Clinical Stage": []
-}
-zero_shot_currently_selected_filters = {
-    "Language": [],
-    "Task Type": [],
-    "Clinical Context": [],
-    "Data Access": [],
-    "Applications": [],
-    "Clinical Stage": []
-}
-# with open("/Users/kevinxie/Desktop/Clinical NLP/Clinical-Text-Leaderboard/leaderboard_data.json", 'r') as file:
-with open("leaderboards/Few-Shot_leaderboard_data.json", 'r') as file:
-    five_shot_leaderboard_json = json.load(file)
-with open("leaderboards/CoT_leaderboard_data.json", 'r') as file:
-    CoT_leaderboard_json = json.load(file)
-with open("leaderboards/Zero-Shot_leaderboard_data.json", 'r') as file:
-    zero_shot_leaderboard_json = json.load(file)
-valid_tasks = {'NUBES', 'NorSynthClinical-NER', 'MEDIQA 2023-sum-A', 'Medication extraction',
-               'IMCS-V2-DAC', 'Cantemist-Coding', 'IFMIR-NER', 'EHRQA-QA', 'Ex4CDS', 'MedDG',
-               'MTS-Temporal', 'CHIP-MDCFNPC', 'n2c2 2014-Diabetes', 'MIMIC-III Outcome.LoS',
-               'n2c2 2014-Hypertension', 'RuCCoN', 'CARES-ICD10 Chapter', 'RuDReC-NER', 'MIMIC-IV DiReCT.Dis',
-               'n2c2 2014-Medication', 'iCorpus', 'Brateca-Hospitalization', 'n2c2 2010-Assertion',
-               'NorSynthClinical-PHI', 'IFMIR - NER&factuality', 'JP-STS', 'NorSynthClinical-RE',
-               'n2c2 2010-Concept', 'BARR2', 'IMCS-V2-NER', 'IMCS-V2-MRG', 'cMedQA', 'MedSTS',
-               'BRONCO150-NER&Status', 'n2c2 2018-ADE&medication', 'CLISTER', 'ClinicalNotes-UPMC',
-               'PPTS', 'CLIP', 'IMCS-V2-SR', 'EHRQA-Sub department', 'BrainMRI-AIS', 'Brateca-Mortality',
-               'meddocan', 'CHIP-CDEE', 'CAS-evidence', 'MEDIQA 2019-RQE', 'Cantemis-Norm', 'MEDIQA 2023-sum-B',
-               'CHIP-CTC', 'C-EMRS', 'CARES ICD10 Block', 'Cantemis-NER', 'CLINpt-NER', 'MEDIQA 2023-chat-A',
-               'n2c2 2014-De-identification', 'n2c2 2014-Hyperlipidemia', 'EHRQA-Primary department',
-               'ADE-Drug dosage', 'IFMIR-Incident type', 'MIMIC-III Outcome.Mortality', 'n2c2 2006-De-identification',
-               'CAS-label', 'MIMIC-IV CDM', 'CodiEsp-ICD-10-CM', 'n2c2 2010-Relation', 'CARES-ICD10 Subblock',
-               'MIE', 'HealthCareMagic-100k', 'ADE-Identification', 'MIMIC-IV DiReCT.PDD', 'ADE-Extraction',
-               'DialMed', 'GOUT-CC-Consensus', 'GraSSCo PHI', 'RuMedNLI', 'RuMedDaNet', 'CBLUE-CDN', 'icliniq-10k',
-               'CARDIO-DE', 'CARES-Area', 'DiSMed-NER', 'CodiEsp-ICD-10-PCS', 'MedNLI', 'MTS', 'MIMIC-IV BHC',
-               'n2c2 2014-CAD'}
-n_models = int(list(zero_shot_leaderboard_json["Model"].keys())[-1]) + 1
-def get_filtered_columns(filter_selections):
-    """
-    Given an array of selected filters, this function will return a list of all
-    the columns that match the criteria.
-    Input:
-        filter_selections: dictionary of all task type filter selections
-    Output:
-        Returns a list of all valid tasks to display (by task name)
-    """
-    # Need to add a flag to this filter so that it only displays those that match all attributes
-    valid_columns = []
-    for task in task_information_json:
-        task_info = task_information_json[task]
-        # Flag to keep track of whether this task is valid
-        isValid = True
-        # Iterate through each attribute of the task
-        for attribute in task_info:
-            # If the filter is empty
-            if not filter_selections[attribute]:
-                continue
-            value = task_info[attribute]
-            # print(filter_selections[attribute])
-            # Handle edge case for multiple categories
-            if "," in value:
-                all_categories = value.split(", ")
-                flag = False
-                for category in all_categories:
-                    if category in filter_selections[attribute]:
-                        flag = True
-                        break
-                if flag:  # one category matches
-                    isValid = True
-                else: # none of the categories matched
-                    isValid  = False
-            # Handle Brazilian Edge Case
-            elif (value == 'Portuguese\n(Brazilian)') and ('Portuguese' in filter_selections[attribute]):
-                isValid = True
-                break
-            elif value not in filter_selections[attribute]:
-            # if filter_selections[attribute] not in task_info[attribute]:
-                isValid = False
-                # break
-        if task in valid_tasks and isValid:
-            valid_columns.append(task)
-    return valid_columns
-def isEmpty(currently_selected_filters):
-    """
-    Checks if there are no selected filters
-    """
-    flag = True
-    for key, value in currently_selected_filters.items():
-        if not value:
-            continue
-        else:
-            return False
-    return True
-####################################################################################################
-####### CoT Filters
-####################################################################################################
-def cot_filter_language(language_choice):
-    # Update the Global store for the currently selected filters
-    cot_currently_selected_filters["Language"] = language_choice
-    if isEmpty(cot_currently_selected_filters):
-        cot_df["Average Performance"] = original_cot_avg_perf
-        return cot_df
-    filtered_cols = get_filtered_columns(cot_currently_selected_filters)
-    updated_performance = cot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)
-    return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Size (B)', 'Average Performance'] + filtered_cols]
-def cot_filter_task_type(task_type_choice):
-    # Update the Global store for the currently selected filters
-    cot_currently_selected_filters["Task Type"] = task_type_choice
-    if isEmpty(cot_currently_selected_filters):
-        cot_df["Average Performance"] = original_cot_avg_perf
-        return cot_df
-    filtered_cols = get_filtered_columns(cot_currently_selected_filters)
-    updated_performance = cot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)
-    return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def cot_filter_clinical_context(clinical_context_choice):
-    # Update the Global store for the currently selected filters
-    cot_currently_selected_filters["Clinical Context"] = clinical_context_choice
-    if isEmpty(cot_currently_selected_filters):
-        cot_df["Average Performance"] = original_cot_avg_perf
-        return cot_df
-    filtered_cols = get_filtered_columns(cot_currently_selected_filters)
-    updated_performance = cot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)
-    return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def cot_filter_applications(applications_choice):
-    # Update the Global store for the currently selected filters
-    cot_currently_selected_filters["Applications"] = applications_choice
-    if isEmpty(cot_currently_selected_filters):
-        cot_df["Average Performance"] = original_cot_avg_perf
-        return cot_df
-    filtered_cols = get_filtered_columns(cot_currently_selected_filters)
-    updated_performance = cot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)
-    return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def cot_filter_stage_options(stage_choice):
-    # Update the Global store for the currently selected filters
-    cot_currently_selected_filters["Clinical Stage"] = stage_choice
-    if isEmpty(cot_currently_selected_filters):
-        cot_df["Average Performance"] = original_cot_avg_perf
-        return cot_df
-    filtered_cols = get_filtered_columns(cot_currently_selected_filters)
-    updated_performance = cot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)
-    return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def cot_filter_data_access(data_access_choice):
-    # Update the Global store for the currently selected filters
-    cot_currently_selected_filters["Data Access"] = data_access_choice
-    if isEmpty(cot_currently_selected_filters):
-        cot_df["Average Performance"] = original_cot_avg_perf
-        return cot_df
-    filtered_cols = get_filtered_columns(cot_currently_selected_filters)
-    updated_performance = cot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)
-    return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def cot_update_average_performance(selected_columns):
-    """
-    When a user clicks filters to filter certain tasks, the average performance
-    of the model should update. This function takes uses the updated filtered columns
-    and calculates the average performances of only those columns. It then updates
-    the leaderboard accordingly.
-    """
-    updated_average_performance = {}
-    for i in range(n_models):
-        performance = 0
-        num_tasks = 0
-        for task in selected_columns:
-            num_tasks += 1
-            performance += float(CoT_leaderboard_json[task][str(i)])
-        if num_tasks == 0:
-            num_tasks = 1
-        updated_average_performance[f"{i}"] = float(round(performance / num_tasks, 2))
-    return updated_average_performance
-####################################################################################################
-####### Few Shot Filters
-####################################################################################################
-def five_shot_filter_language(language_choice):
-    # Update the Global store for the currently selected filters
-    five_shot_currently_selected_filters["Language"] = language_choice
-    if isEmpty(five_shot_currently_selected_filters):
-        five_shot_df["Average Performance"] = original_five_shot_avg_perf
-        return five_shot_df
-    filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)
-    updated_performance = five_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)
-    return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def five_shot_filter_task_type(task_type_choice):
-    # Update the Global store for the currently selected filters
-    five_shot_currently_selected_filters["Task Type"] = task_type_choice
-    if isEmpty(five_shot_currently_selected_filters):
-        five_shot_df["Average Performance"] = original_five_shot_avg_perf
-        return five_shot_df
-    filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)
-    updated_performance = five_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)
-    return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def five_shot_filter_clinical_context(clinical_context_choice):
-    # Update the Global store for the currently selected filters
-    five_shot_currently_selected_filters["Clinical Context"] = clinical_context_choice
-    if isEmpty(five_shot_currently_selected_filters):
-        five_shot_df["Average Performance"] = original_five_shot_avg_perf
-        return five_shot_df
-    filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)
-    updated_performance = five_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)
-    return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def five_shot_filter_applications(applications_choice):
-    # Update the Global store for the currently selected filters
-    five_shot_currently_selected_filters["Applications"] = applications_choice
-    if isEmpty(five_shot_currently_selected_filters):
-        five_shot_df["Average Performance"] = original_five_shot_avg_perf
-        return five_shot_df
-    filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)
-    updated_performance = five_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)
-    return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def five_shot_filter_stage_options(stage_choice):
-    # Update the Global store for the currently selected filters
-    five_shot_currently_selected_filters["Clinical Stage"] = stage_choice
-    if isEmpty(five_shot_currently_selected_filters):
-        five_shot_df["Average Performance"] = original_five_shot_avg_perf
-        return five_shot_df
-    filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)
-    updated_performance = five_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)
-    return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def five_shot_filter_data_access(data_access_choice):
-    # Update the Global store for the currently selected filters
-    five_shot_currently_selected_filters["Data Access"] = data_access_choice
-    if isEmpty(five_shot_currently_selected_filters):
-        five_shot_df["Average Performance"] = original_five_shot_avg_perf
-        return five_shot_df
-    filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)
-    updated_performance = five_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)
-    return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def five_shot_update_average_performance(selected_columns):
-    """
-    When a user clicks filters to filter certain tasks, the average performance
-    of the model should update. This function takes uses the updated filtered columns
-    and calculates the average performances of only those columns. It then updates
-    the leaderboard accordingly.
-    """
-    updated_average_performance = {}
-    for i in range(n_models):
-        performance = 0
-        num_tasks = 0
-        for task in selected_columns:
-            num_tasks += 1
-            performance += float(five_shot_leaderboard_json[task][str(i)])
-        if num_tasks == 0:
-            num_tasks = 1
-        updated_average_performance[f"{i}"] = float(round(performance / num_tasks, 2))
-    return updated_average_performance
-####################################################################################################
-###### Zero Shot Filters
-####################################################################################################
-def zero_shot_filter_language(language_choice):
-    # Update the Global store for the currently selected filters
-    zero_shot_currently_selected_filters["Language"] = language_choice
-    if isEmpty(zero_shot_currently_selected_filters):
-        zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
-        return zero_shot_df
-    filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)
-    updated_performance = zero_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)
-    return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def zero_shot_filter_task_type(task_type_choice):
-    # Update the Global store for the currently selected filters
-    zero_shot_currently_selected_filters["Task Type"] = task_type_choice
-    if isEmpty(zero_shot_currently_selected_filters):
-        zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
-        return zero_shot_df
-    filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)
-    updated_performance = zero_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)
-    return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def zero_shot_filter_clinical_context(clinical_context_choice):
-    # Update the Global store for the currently selected filters
-    zero_shot_currently_selected_filters["Clinical Context"] = clinical_context_choice
-    if isEmpty(zero_shot_currently_selected_filters):
-        zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
-        return zero_shot_df
-    filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)
-    updated_performance = zero_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)
-    return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def zero_shot_filter_applications(applications_choice):
-    # Update the Global store for the currently selected filters
-    zero_shot_currently_selected_filters["Applications"] = applications_choice
-    if isEmpty(zero_shot_currently_selected_filters):
-        zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
-        return zero_shot_df
-    filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)
-    updated_performance = zero_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)
-    return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def zero_shot_filter_stage_options(stage_choice):
-    # Update the Global store for the currently selected filters
-    zero_shot_currently_selected_filters["Clinical Stage"] = stage_choice
-    if isEmpty(zero_shot_currently_selected_filters):
-        zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
-        return zero_shot_df
-    filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)
-    updated_performance = zero_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)
-    return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def zero_shot_filter_data_access(data_access_choice):
-    # Update the Global store for the currently selected filters
-    zero_shot_currently_selected_filters["Data Access"] = data_access_choice
-    if isEmpty(zero_shot_currently_selected_filters):
-        zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
-        return zero_shot_df
-    filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)
-    updated_performance = zero_shot_update_average_performance(filtered_cols)
-    # Convert dictionary keys to integers to match the DataFrame index
-    updated_performance_int = {int(k): v for k, v in updated_performance.items()}
-    # Map the values to the 'Average Performance' column based on index
-    zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)
-    return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]
-def zero_shot_update_average_performance(selected_columns):
-    """
-    When a user clicks filters to filter certain tasks, the average performance
-    of the model should update. This function takes uses the updated filtered columns
-    and calculates the average performances of only those columns. It then updates
-    the leaderboard accordingly.
-    """
-    updated_average_performance = {}
-    for i in range(n_models):
-        performance = 0
-        num_tasks = 0
-        for task in selected_columns:
-            num_tasks += 1
-            performance += float(zero_shot_leaderboard_json[task][str(i)])
-        if num_tasks == 0:
-            num_tasks = 1
-        updated_average_performance[f"{i}"] = float(round(performance / num_tasks, 2))
-    return updated_average_performance
-def postprocess(self, value: pd.DataFrame) -> DataframeData:
-        # Ensure that the "Average Performance" column exists
-        if "Average Performance" in value.columns:
-            # Sort the DataFrame by the "average performance" column in descending order
-            value = value.sort_values(by="Average Performance", ascending=False)
-            return DataframeData(
-                headers=list(value.columns),  # type: ignore
-                data=value.to_dict(orient="split")["data"],  # type: ignore
-            )
-        if value is None:
-            return self.postprocess(pd.DataFrame({"column 1": []}))
-        if isinstance(value, (str, pd.DataFrame)):
-            if isinstance(value, str):
-                value = pd.read_csv(value)  # type: ignore
-            if len(value) == 0:
-                return DataframeData(
-                    headers=list(value.columns),  # type: ignore
-                    data=[[]],  # type: ignore
-                )
-            return DataframeData(
-                headers=list(value.columns),  # type: ignore
-                data=value.to_dict(orient="split")["data"],  # type: ignore
-            )
-        elif isinstance(value, Styler):
-            if semantic_version.Version(pd.__version__) < semantic_version.Version(
-                "1.5.0"
-            ):
-                raise ValueError(
-                    "Styler objects are only supported in pandas version 1.5.0 or higher. Please try: `pip install --upgrade pandas` to use this feature."
-                )
-            if self.interactive:
-                warnings.warn(
-                    "Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
-                )
-            df: pd.DataFrame = value.data  # type: ignore
-            if len(df) == 0:
-                return DataframeData(
-                    headers=list(df.columns),
-                    data=[[]],
-                    metadata=self.__extract_metadata(value),  # type: ignore
-                )
-            return DataframeData(
-                headers=list(df.columns),
-                data=df.to_dict(orient="split")["data"],  # type: ignore
-                metadata=self.__extract_metadata(value),  # type: ignore
-            )
-# Models are sorted in order of decreasing average performance (best performance at the top!)
-Leaderboard.postprocess = postprocess
-####################################################################################################
-###### Leaderboard
-####################################################################################################
-with gr.Blocks() as app:
-    gr.Markdown("# BRIDGE (Benchmarking Large Language Models for Understanding Real-world Clinical Practice Text)")
-    with gr.Tabs():
-        with gr.Tab("README"):
-            # gr.Markdown((Path(__file__).parent / "docs.md").read_text())
-            html_content = (Path(__file__).parent / "docs.md").read_text()
-            gr.HTML(html_content)
-        with gr.Tab("Zero-Shot"):
-            leaderboard = Leaderboard(
-                value=zero_shot_df,
-                select_columns = None,
-                search_columns=SearchColumns(primary_column = "Model", secondary_columns = "",
-                                     placeholder="Search by Model Name",
-                                     label="Model Search"),
-                hide_columns=["Model: Size Range", "Model: Accessibility"],
-                filter_columns=["Model: Domain", "Model: Size Range", "Model: Accessibility"],
-                datatype=config.TYPES,
-            )
-            # Language Filter
-            all_languages = ['English', 'Spanish',
-                             'Chinese', 'Norwegian',
-                             'Russian', 'Portuguese',
-                             'German', 'Japanese', 'French']
-            language_options = gr.CheckboxGroup(all_languages, label="Filter Task: Language")
-            # Task Type Filter
-            all_task_types = ['Question Answering', 'Text Classification', 'Named Entity Recognition',
-                              'Normalization and Coding', 'Natural Language Inference', 'Summarization',
-                              'Event Extraction', 'Semantic Similarity']
-            task_type_options = gr.CheckboxGroup(all_task_types, label="Filter Task: Task Type")
-            all_clinical_contexts = ['Neurology',  'Oncology',  'Radiology',  'Pulmonology',
-                                     'Cardiology',  'Dermatology',  'Critical Care',  'Nephrology',
-                                     'General',  'Endocrinology',  'Pediatrics',  'Pharmacology',
-                                     'Gastroenterology',  'Psychology']
-            cc_options = gr.CheckboxGroup(all_clinical_contexts, label="Filter Task: Clinical Context")
-            # Applications Filter
-            all_applications = ['Procudure information', 'Concept standarization',
-                                'Specialist recommendation', 'Negation identification',
-                                'Clinical trial matching', 'Consultation summarization',
-                                'Semantic relation', 'Post-discharge patient management',
-                                'De-identification', 'Billing & Coding', 'Phenotyping',
-                                'Data organization', 'Temporal & Causality relation',
-                                'Summarization', 'Screen & Consultation', 'Diagnosis',
-                                'ADE & Incidents', 'Risk factor extraction', 'Prognosis',
-                                'Medication information']
-            application_options = gr.CheckboxGroup(all_applications, label="Filter Task: Clinical Application")
-            # Clinical Stage Filter
-            all_stages = ['Treatment and Intervention', 'Triage and Referral',
-                          'Initial Assessment', 'Discharge and Administration',
-                          'Research', 'Diagnosis and Prognosis']
-            stage_options = gr.CheckboxGroup(all_stages, label="Filter Task: Clinical Stage")
-            # Data Access Filter
-            all_data_access = ['Open Access', 'Regulated']
-            da_options = gr.CheckboxGroup(all_data_access, label="Filter Task: Data Access")
-            language_options.change(fn=zero_shot_filter_language, inputs=language_options, outputs=leaderboard)
-            task_type_options.change(fn=zero_shot_filter_task_type, inputs=task_type_options, outputs=leaderboard)
-            cc_options.change(fn=zero_shot_filter_clinical_context, inputs=cc_options, outputs=leaderboard)
-            application_options.change(fn=zero_shot_filter_applications, inputs=application_options, outputs=leaderboard)
-            da_options.change(fn=zero_shot_filter_data_access, inputs=da_options, outputs=leaderboard)
-            stage_options.change(fn=zero_shot_filter_stage_options, inputs=stage_options, outputs=leaderboard)
-        with gr.Tab("Few-Shot"):
-            leaderboard = Leaderboard(
-                value=five_shot_df,
-                select_columns = None,
-                search_columns=SearchColumns(primary_column = "Model", secondary_columns = "",
-                                     placeholder="Search by Model Name",
-                                     label="Model Search"),
-                hide_columns=["Model: Size Range", "Model: Accessibility"],
-                filter_columns=["Model: Domain", "Model: Size Range", "Model: Accessibility"],
-                datatype=config.TYPES,
-            )
-            # Language Filter
-            all_languages = ['English', 'Spanish',
-                             'Chinese', 'Norwegian',
-                             'Russian', 'Portuguese',
-                             'German', 'Japanese', 'French']
-            language_options = gr.CheckboxGroup(all_languages, label="Filter Task: Language")
-            # Task Type Filter
-            all_task_types = ['Question Answering', 'Text Classification', 'Named Entity Recognition',
-                              'Normalization and Coding', 'Natural Language Inference', 'Summarization',
-                              'Event Extraction', 'Semantic Similarity']
-            task_type_options = gr.CheckboxGroup(all_task_types, label="Filter Task: Task Type")
-            # Clinical Context Filter
-            all_clinical_contexts = ['Neurology',  'Oncology',  'Radiology',  'Pulmonology',
-                                     'Cardiology',  'Dermatology',  'Critical Care',  'Nephrology',
-                                     'General',  'Endocrinology',  'Pediatrics',  'Pharmacology',
-                                     'Gastroenterology',  'Psychology']
-            cc_options = gr.CheckboxGroup(all_clinical_contexts, label="Filter Task: Clinical Context")
-            # Applications Filter
-            all_applications = ['Procudure information', 'Concept standarization',
-                                'Specialist recommendation', 'Negation identification',
-                                'Clinical trial matching', 'Consultation summarization',
-                                'Semantic relation', 'Post-discharge patient management',
-                                'De-identification', 'Billing & Coding', 'Phenotyping',
-                                'Data organization', 'Temporal & Causality relation',
-                                'Summarization', 'Screen & Consultation', 'Diagnosis',
-                                'ADE & Incidents', 'Risk factor extraction', 'Prognosis',
-                                'Medication information']
-            application_options = gr.CheckboxGroup(all_applications, label="Filter Task: Clinical Application")
-            # Clinical Stage Filter
-            all_stages = ['Treatment and Intervention', 'Triage and Referral',
-                          'Initial Assessment', 'Discharge and Administration',
-                          'Research', 'Diagnosis and Prognosis']
-            stage_options = gr.CheckboxGroup(all_stages, label="Filter Task: Clinical Stage")
-            # Data Access Filter
-            all_data_access = ['Open Access', 'Regulated']
-            da_options = gr.CheckboxGroup(all_data_access, label="Filter Task: Data Access")
-            language_options.change(fn=five_shot_filter_language, inputs=language_options, outputs=leaderboard)
-            task_type_options.change(fn=five_shot_filter_task_type, inputs=task_type_options, outputs=leaderboard)
-            cc_options.change(fn=five_shot_filter_clinical_context, inputs=cc_options, outputs=leaderboard)
-            application_options.change(fn=five_shot_filter_applications, inputs=application_options, outputs=leaderboard)
-            da_options.change(fn=five_shot_filter_data_access, inputs=da_options, outputs=leaderboard)
-            stage_options.change(fn=five_shot_filter_stage_options, inputs=stage_options, outputs=leaderboard)
-        with gr.Tab("CoT"):
-            leaderboard = Leaderboard(
-                value=cot_df,
-                select_columns = None,
-                search_columns=SearchColumns(primary_column = "Model", secondary_columns = "",
-                                     placeholder="Search by Model Name",
-                                     label="Model Search"),
-                hide_columns=["Model: Size Range", "Model: Accessibility"],
-                filter_columns=["Model: Domain", "Model: Size Range", "Model: Accessibility"],
-                datatype=config.TYPES,
-            )
-            # Language Filter
-            all_languages = ['English', 'Spanish',
-                             'Chinese', 'Norwegian',
-                             'Russian', 'Portuguese',
-                             'German', 'Japanese', 'French']
-            language_options = gr.CheckboxGroup(all_languages, label="Filter Task: Language")
-            # Task Type Filter
-            all_task_types = ['Question Answering', 'Text Classification', 'Named Entity Recognition',
-                              'Normalization and Coding', 'Natural Language Inference', 'Summarization',
-                              'Event Extraction', 'Semantic Similarity']
-            task_type_options = gr.CheckboxGroup(all_task_types, label="Filter Task: Task Type")
-            # Clinical Context Filter
-            all_clinical_contexts = ['Neurology',  'Oncology',  'Radiology',  'Pulmonology',
-                                     'Cardiology',  'Dermatology',  'Critical Care',  'Nephrology',
-                                     'General',  'Endocrinology',  'Pediatrics',  'Pharmacology',
-                                     'Gastroenterology',  'Psychology']
-            cc_options = gr.CheckboxGroup(all_clinical_contexts, label="Filter Task: Clinical Context")
-            # Applications Filter
-            all_applications = ['Procudure information', 'Concept standarization',
-                                'Specialist recommendation', 'Negation identification',
-                                'Clinical trial matching', 'Consultation summarization',
-                                'Semantic relation', 'Post-discharge patient management',
-                                'De-identification', 'Billing & Coding', 'Phenotyping',
-                                'Data organization', 'Temporal & Causality relation',
-                                'Summarization', 'Screen & Consultation', 'Diagnosis',
-                                'ADE & Incidents', 'Risk factor extraction', 'Prognosis',
-                                'Medication information']
-            application_options = gr.CheckboxGroup(all_applications, label="Filter Task: Clinical Application")
-            # Clinical Stage Filter
-            all_stages = ['Treatment and Intervention', 'Triage and Referral',
-                          'Initial Assessment', 'Discharge and Administration',
-                          'Research', 'Diagnosis and Prognosis']
-            stage_options = gr.CheckboxGroup(all_stages, label="Filter Task: Clinical Stage")
-            # Data Access Filter
-            all_data_access = ['Open Access', 'Regulated']
-            da_options = gr.CheckboxGroup(all_data_access, label="Filter Task: Data Access")
-            language_options.change(fn=cot_filter_language, inputs=language_options, outputs=leaderboard)
-            task_type_options.change(fn=cot_filter_task_type, inputs=task_type_options, outputs=leaderboard)
-            cc_options.change(fn=cot_filter_clinical_context, inputs=cc_options, outputs=leaderboard)
-            application_options.change(fn=cot_filter_applications, inputs=application_options, outputs=leaderboard)
-            da_options.change(fn=cot_filter_data_access, inputs=da_options, outputs=leaderboard)
-            stage_options.change(fn=cot_filter_stage_options, inputs=stage_options, outputs=leaderboard)
-if __name__ == "__main__":
-    app.launch()