Spaces:

YLab-Open
/

BRIDGE-Medical-Leaderboard

Running

App Files Files Community

BRIDGE-Medical-Leaderboard / app.py

jiagengwu

Update app.py

242f797 verified about 1 month ago

raw

history blame contribute delete

42.9 kB

	import gradio as gr
	from gradio_leaderboard import Leaderboard, SelectColumns, SearchColumns
	import config
	from pathlib import Path
	import pandas as pd
	import json

	import warnings
	from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, Literal
	import pandas as pd
	from pandas.io.formats.style import Styler

	import semantic_version
	from dataclasses import dataclass, field

	from gradio.components import Component
	from gradio.data_classes import GradioModel
	from gradio.events import Events

	@dataclass
	class SelectColumns:
	default_selection: Optional[list[str]] = field(default_factory=list)
	cant_deselect: Optional[list[str]] = field(default_factory=list)
	allow: bool = True
	label: Optional[str] = None
	show_label: bool = True
	info: Optional[str] = None

	@dataclass
	class ColumnFilter:
	column: str
	type: Literal["slider", "dropdown", "checkboxgroup", "boolean"] = None
	default: Optional[Union[int, float, List[Tuple[str, str]]]] = None
	choices: Optional[Union[int, float, List[Tuple[str, str]]]] = None
	label: Optional[str] = None
	info: Optional[str] = None
	show_label: bool = True
	min: Optional[Union[int, float]] = None
	max: Optional[Union[int, float]] = None

	class DataframeData(GradioModel):
	headers: List[str]
	data: Union[List[List[Any]], List[Tuple[Any, ...]]]
	metadata: Optional[Dict[str, Optional[List[Any]]]] = None


	abs_path = Path(__file__).parent

	# Load the leaderboard data for
	zero_shot_df = pd.read_json("leaderboards/Zero-Shot_leaderboard_data.json", precise_float=True)
	five_shot_df = pd.read_json("leaderboards/Few-Shot_leaderboard_data.json", precise_float=True)
	cot_df = pd.read_json("leaderboards/CoT_leaderboard_data.json", precise_float=True)

	# Original Average Performances
	original_zero_shot_avg_perf = zero_shot_df["Average Performance"]
	original_five_shot_avg_perf = five_shot_df["Average Performance"]
	original_cot_avg_perf = cot_df["Average Performance"]

	# Load the task information json data
	with open("task_information.json", 'r') as file:
	task_information_json = json.load(file)

	cot_currently_selected_filters = {
	"Language": [],
	"Task Type": [],
	"Clinical Context": [],
	"Data Access": [],
	"Applications": [],
	"Clinical Stage": []
	}

	five_shot_currently_selected_filters = {
	"Language": [],
	"Task Type": [],
	"Clinical Context": [],
	"Data Access": [],
	"Applications": [],
	"Clinical Stage": []
	}

	zero_shot_currently_selected_filters = {
	"Language": [],
	"Task Type": [],
	"Clinical Context": [],
	"Data Access": [],
	"Applications": [],
	"Clinical Stage": []
	}

	# with open("/Users/kevinxie/Desktop/Clinical NLP/Clinical-Text-Leaderboard/leaderboard_data.json", 'r') as file:
	with open("leaderboards/Few-Shot_leaderboard_data.json", 'r') as file:
	five_shot_leaderboard_json = json.load(file)

	with open("leaderboards/CoT_leaderboard_data.json", 'r') as file:
	CoT_leaderboard_json = json.load(file)

	with open("leaderboards/Zero-Shot_leaderboard_data.json", 'r') as file:
	zero_shot_leaderboard_json = json.load(file)

	valid_tasks = {'NUBES', 'NorSynthClinical-NER', 'MEDIQA 2023-sum-A', 'Medication extraction',
	'IMCS-V2-DAC', 'Cantemist-Coding', 'IFMIR-NER', 'EHRQA-QA', 'Ex4CDS', 'MedDG',
	'MTS-Temporal', 'CHIP-MDCFNPC', 'n2c2 2014-Diabetes', 'MIMIC-III Outcome.LoS',
	'n2c2 2014-Hypertension', 'RuCCoN', 'CARES-ICD10 Chapter', 'RuDReC-NER', 'MIMIC-IV DiReCT.Dis',
	'n2c2 2014-Medication', 'iCorpus', 'Brateca-Hospitalization', 'n2c2 2010-Assertion',
	'NorSynthClinical-PHI', 'IFMIR - NER&factuality', 'JP-STS', 'NorSynthClinical-RE',
	'n2c2 2010-Concept', 'BARR2', 'IMCS-V2-NER', 'IMCS-V2-MRG', 'cMedQA', 'MedSTS',
	'BRONCO150-NER&Status', 'n2c2 2018-ADE&medication', 'CLISTER', 'ClinicalNotes-UPMC',
	'PPTS', 'CLIP', 'IMCS-V2-SR', 'EHRQA-Sub department', 'BrainMRI-AIS', 'Brateca-Mortality',
	'meddocan', 'CHIP-CDEE', 'CAS-evidence', 'MEDIQA 2019-RQE', 'Cantemis-Norm', 'MEDIQA 2023-sum-B',
	'CHIP-CTC', 'C-EMRS', 'CARES ICD10 Block', 'Cantemis-NER', 'CLINpt-NER', 'MEDIQA 2023-chat-A',
	'n2c2 2014-De-identification', 'n2c2 2014-Hyperlipidemia', 'EHRQA-Primary department',
	'ADE-Drug dosage', 'IFMIR-Incident type', 'MIMIC-III Outcome.Mortality', 'n2c2 2006-De-identification',
	'CAS-label', 'MIMIC-IV CDM', 'CodiEsp-ICD-10-CM', 'n2c2 2010-Relation', 'CARES-ICD10 Subblock',
	'MIE', 'HealthCareMagic-100k', 'ADE-Identification', 'MIMIC-IV DiReCT.PDD', 'ADE-Extraction',
	'DialMed', 'GOUT-CC-Consensus', 'GraSSCo PHI', 'RuMedNLI', 'RuMedDaNet', 'CBLUE-CDN', 'icliniq-10k',
	'CARDIO-DE', 'CARES-Area', 'DiSMed-NER', 'CodiEsp-ICD-10-PCS', 'MedNLI', 'MTS', 'MIMIC-IV BHC',
	'n2c2 2014-CAD'}

	n_models = int(list(zero_shot_leaderboard_json["Model"].keys())[-1]) + 1

	def get_filtered_columns(filter_selections):
	"""
	Given an array of selected filters, this function will return a list of all
	the columns that match the criteria.

	Input:
	filter_selections: dictionary of all task type filter selections

	Output:
	Returns a list of all valid tasks to display (by task name)
	"""
	# Need to add a flag to this filter so that it only displays those that match all attributes
	valid_columns = []
	for task in task_information_json:
	task_info = task_information_json[task]

	# Flag to keep track of whether this task is valid
	isValid = True

	# Iterate through each attribute of the task
	for attribute in task_info:
	# If the filter is empty
	if not filter_selections[attribute]:
	continue

	value = task_info[attribute]

	# print(filter_selections[attribute])

	# Handle edge case for multiple categories
	if "," in value:
	all_categories = value.split(", ")

	flag = False
	for category in all_categories:
	if category in filter_selections[attribute]:
	flag = True
	break

	if flag: # one category matches
	isValid = True

	else: # none of the categories matched
	isValid = False

	# Handle Brazilian Edge Case
	elif (value == 'Portuguese\n(Brazilian)') and ('Portuguese' in filter_selections[attribute]):
	isValid = True
	break

	elif value not in filter_selections[attribute]:
	# if filter_selections[attribute] not in task_info[attribute]:
	isValid = False
	# break

	if task in valid_tasks and isValid:
	valid_columns.append(task)

	return valid_columns

	def isEmpty(currently_selected_filters):
	"""
	Checks if there are no selected filters
	"""
	flag = True
	for key, value in currently_selected_filters.items():
	if not value:
	continue
	else:
	return False

	return True


	####################################################################################################
	####### CoT Filters
	####################################################################################################


	def cot_filter_language(language_choice):
	# Update the Global store for the currently selected filters
	cot_currently_selected_filters["Language"] = language_choice

	if isEmpty(cot_currently_selected_filters):
	cot_df["Average Performance"] = original_cot_avg_perf
	return cot_df

	filtered_cols = get_filtered_columns(cot_currently_selected_filters)

	updated_performance = cot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)

	return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Size (B)', 'Average Performance'] + filtered_cols]

	def cot_filter_task_type(task_type_choice):
	# Update the Global store for the currently selected filters
	cot_currently_selected_filters["Task Type"] = task_type_choice

	if isEmpty(cot_currently_selected_filters):
	cot_df["Average Performance"] = original_cot_avg_perf
	return cot_df

	filtered_cols = get_filtered_columns(cot_currently_selected_filters)

	updated_performance = cot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)

	return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def cot_filter_clinical_context(clinical_context_choice):
	# Update the Global store for the currently selected filters
	cot_currently_selected_filters["Clinical Context"] = clinical_context_choice

	if isEmpty(cot_currently_selected_filters):
	cot_df["Average Performance"] = original_cot_avg_perf
	return cot_df

	filtered_cols = get_filtered_columns(cot_currently_selected_filters)

	updated_performance = cot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)

	return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def cot_filter_applications(applications_choice):
	# Update the Global store for the currently selected filters
	cot_currently_selected_filters["Applications"] = applications_choice

	if isEmpty(cot_currently_selected_filters):
	cot_df["Average Performance"] = original_cot_avg_perf
	return cot_df

	filtered_cols = get_filtered_columns(cot_currently_selected_filters)

	updated_performance = cot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)

	return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def cot_filter_stage_options(stage_choice):
	# Update the Global store for the currently selected filters
	cot_currently_selected_filters["Clinical Stage"] = stage_choice

	if isEmpty(cot_currently_selected_filters):
	cot_df["Average Performance"] = original_cot_avg_perf
	return cot_df

	filtered_cols = get_filtered_columns(cot_currently_selected_filters)

	updated_performance = cot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)

	return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def cot_filter_data_access(data_access_choice):
	# Update the Global store for the currently selected filters
	cot_currently_selected_filters["Data Access"] = data_access_choice

	if isEmpty(cot_currently_selected_filters):
	cot_df["Average Performance"] = original_cot_avg_perf
	return cot_df

	filtered_cols = get_filtered_columns(cot_currently_selected_filters)

	updated_performance = cot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	cot_df["Average Performance"] = cot_df.index.map(updated_performance_int)

	return cot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def cot_update_average_performance(selected_columns):
	"""
	When a user clicks filters to filter certain tasks, the average performance
	of the model should update. This function takes uses the updated filtered columns
	and calculates the average performances of only those columns. It then updates
	the leaderboard accordingly.
	"""
	updated_average_performance = {}

	for i in range(n_models):
	performance = 0

	num_tasks = 0
	for task in selected_columns:
	num_tasks += 1
	performance += float(CoT_leaderboard_json[task][str(i)])

	if num_tasks == 0:
	num_tasks = 1

	updated_average_performance[f"{i}"] = float(round(performance / num_tasks, 2))

	return updated_average_performance


	####################################################################################################
	####### Few Shot Filters
	####################################################################################################

	def five_shot_filter_language(language_choice):
	# Update the Global store for the currently selected filters
	five_shot_currently_selected_filters["Language"] = language_choice

	if isEmpty(five_shot_currently_selected_filters):
	five_shot_df["Average Performance"] = original_five_shot_avg_perf
	return five_shot_df

	filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)

	updated_performance = five_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)

	return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def five_shot_filter_task_type(task_type_choice):
	# Update the Global store for the currently selected filters
	five_shot_currently_selected_filters["Task Type"] = task_type_choice

	if isEmpty(five_shot_currently_selected_filters):
	five_shot_df["Average Performance"] = original_five_shot_avg_perf
	return five_shot_df

	filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)

	updated_performance = five_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)

	return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def five_shot_filter_clinical_context(clinical_context_choice):
	# Update the Global store for the currently selected filters
	five_shot_currently_selected_filters["Clinical Context"] = clinical_context_choice

	if isEmpty(five_shot_currently_selected_filters):
	five_shot_df["Average Performance"] = original_five_shot_avg_perf
	return five_shot_df

	filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)

	updated_performance = five_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)

	return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def five_shot_filter_applications(applications_choice):
	# Update the Global store for the currently selected filters
	five_shot_currently_selected_filters["Applications"] = applications_choice

	if isEmpty(five_shot_currently_selected_filters):
	five_shot_df["Average Performance"] = original_five_shot_avg_perf
	return five_shot_df

	filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)

	updated_performance = five_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)

	return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def five_shot_filter_stage_options(stage_choice):
	# Update the Global store for the currently selected filters
	five_shot_currently_selected_filters["Clinical Stage"] = stage_choice

	if isEmpty(five_shot_currently_selected_filters):
	five_shot_df["Average Performance"] = original_five_shot_avg_perf
	return five_shot_df

	filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)

	updated_performance = five_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)

	return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def five_shot_filter_data_access(data_access_choice):
	# Update the Global store for the currently selected filters
	five_shot_currently_selected_filters["Data Access"] = data_access_choice

	if isEmpty(five_shot_currently_selected_filters):
	five_shot_df["Average Performance"] = original_five_shot_avg_perf
	return five_shot_df

	filtered_cols = get_filtered_columns(five_shot_currently_selected_filters)

	updated_performance = five_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	five_shot_df["Average Performance"] = five_shot_df.index.map(updated_performance_int)

	return five_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]


	def five_shot_update_average_performance(selected_columns):
	"""
	When a user clicks filters to filter certain tasks, the average performance
	of the model should update. This function takes uses the updated filtered columns
	and calculates the average performances of only those columns. It then updates
	the leaderboard accordingly.
	"""
	updated_average_performance = {}

	for i in range(n_models):
	performance = 0

	num_tasks = 0
	for task in selected_columns:
	num_tasks += 1
	performance += float(five_shot_leaderboard_json[task][str(i)])

	if num_tasks == 0:
	num_tasks = 1

	updated_average_performance[f"{i}"] = float(round(performance / num_tasks, 2))

	return updated_average_performance


	####################################################################################################
	###### Zero Shot Filters
	####################################################################################################


	def zero_shot_filter_language(language_choice):
	# Update the Global store for the currently selected filters
	zero_shot_currently_selected_filters["Language"] = language_choice

	if isEmpty(zero_shot_currently_selected_filters):
	zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
	return zero_shot_df

	filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)

	updated_performance = zero_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)

	return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def zero_shot_filter_task_type(task_type_choice):
	# Update the Global store for the currently selected filters
	zero_shot_currently_selected_filters["Task Type"] = task_type_choice

	if isEmpty(zero_shot_currently_selected_filters):
	zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
	return zero_shot_df

	filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)

	updated_performance = zero_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)

	return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def zero_shot_filter_clinical_context(clinical_context_choice):
	# Update the Global store for the currently selected filters
	zero_shot_currently_selected_filters["Clinical Context"] = clinical_context_choice

	if isEmpty(zero_shot_currently_selected_filters):
	zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
	return zero_shot_df

	filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)

	updated_performance = zero_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)

	return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def zero_shot_filter_applications(applications_choice):
	# Update the Global store for the currently selected filters
	zero_shot_currently_selected_filters["Applications"] = applications_choice

	if isEmpty(zero_shot_currently_selected_filters):
	zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
	return zero_shot_df

	filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)

	updated_performance = zero_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)

	return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def zero_shot_filter_stage_options(stage_choice):
	# Update the Global store for the currently selected filters
	zero_shot_currently_selected_filters["Clinical Stage"] = stage_choice

	if isEmpty(zero_shot_currently_selected_filters):
	zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
	return zero_shot_df

	filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)

	updated_performance = zero_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)

	return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def zero_shot_filter_data_access(data_access_choice):
	# Update the Global store for the currently selected filters
	zero_shot_currently_selected_filters["Data Access"] = data_access_choice

	if isEmpty(zero_shot_currently_selected_filters):
	zero_shot_df["Average Performance"] = original_zero_shot_avg_perf
	return zero_shot_df

	filtered_cols = get_filtered_columns(zero_shot_currently_selected_filters)

	updated_performance = zero_shot_update_average_performance(filtered_cols)

	# Convert dictionary keys to integers to match the DataFrame index
	updated_performance_int = {int(k): v for k, v in updated_performance.items()}

	# Map the values to the 'Average Performance' column based on index
	zero_shot_df["Average Performance"] = zero_shot_df.index.map(updated_performance_int)

	return zero_shot_df[['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance'] + filtered_cols]

	def zero_shot_update_average_performance(selected_columns):
	"""
	When a user clicks filters to filter certain tasks, the average performance
	of the model should update. This function takes uses the updated filtered columns
	and calculates the average performances of only those columns. It then updates
	the leaderboard accordingly.
	"""
	updated_average_performance = {}

	for i in range(n_models):
	performance = 0

	num_tasks = 0
	for task in selected_columns:
	num_tasks += 1
	performance += float(zero_shot_leaderboard_json[task][str(i)])

	if num_tasks == 0:
	num_tasks = 1

	updated_average_performance[f"{i}"] = float(round(performance / num_tasks, 2))

	return updated_average_performance


	def postprocess(self, value: pd.DataFrame) -> DataframeData:
	# Ensure that the "Average Performance" column exists
	if "Average Performance" in value.columns:
	# Sort the DataFrame by the "average performance" column in descending order
	value = value.sort_values(by="Average Performance", ascending=False)

	return DataframeData(
	headers=list(value.columns), # type: ignore
	data=value.to_dict(orient="split")["data"], # type: ignore
	)

	if value is None:
	return self.postprocess(pd.DataFrame({"column 1": []}))
	if isinstance(value, (str, pd.DataFrame)):
	if isinstance(value, str):
	value = pd.read_csv(value) # type: ignore
	if len(value) == 0:
	return DataframeData(
	headers=list(value.columns), # type: ignore
	data=[[]], # type: ignore
	)
	return DataframeData(
	headers=list(value.columns), # type: ignore
	data=value.to_dict(orient="split")["data"], # type: ignore
	)
	elif isinstance(value, Styler):
	if semantic_version.Version(pd.__version__) < semantic_version.Version(
	"1.5.0"
	):
	raise ValueError(
	"Styler objects are only supported in pandas version 1.5.0 or higher. Please try: `pip install --upgrade pandas` to use this feature."
	)
	if self.interactive:
	warnings.warn(
	"Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
	)
	df: pd.DataFrame = value.data # type: ignore
	if len(df) == 0:
	return DataframeData(
	headers=list(df.columns),
	data=[[]],
	metadata=self.__extract_metadata(value), # type: ignore
	)
	return DataframeData(
	headers=list(df.columns),
	data=df.to_dict(orient="split")["data"], # type: ignore
	metadata=self.__extract_metadata(value), # type: ignore
	)

	# Models are sorted in order of decreasing average performance (best performance at the top!)
	Leaderboard.postprocess = postprocess


	####################################################################################################
	###### Leaderboard
	####################################################################################################

	with gr.Blocks() as app:
	gr.Markdown("# BRIDGE (Benchmarking Large Language Models for Understanding Real-world Clinical Practice Text)")

	with gr.Tabs():
	with gr.Tab("README"):
	# gr.Markdown((Path(__file__).parent / "docs.md").read_text())
	html_content = (Path(__file__).parent / "docs.md").read_text()
	gr.HTML(html_content)

	with gr.Tab("Zero-Shot"):
	leaderboard = Leaderboard(
	value=zero_shot_df,
	select_columns = None,
	search_columns=SearchColumns(primary_column = "Model", secondary_columns = "",
	placeholder="Search by Model Name",
	label="Model Search"),
	hide_columns=["Model: Size Range", "Model: Accessibility"],
	filter_columns=["Model: Domain", "Model: Size Range", "Model: Accessibility"],
	datatype=config.TYPES,
	)

	# Language Filter
	all_languages = ['English', 'Spanish',
	'Chinese', 'Norwegian',
	'Russian', 'Portuguese',
	'German', 'Japanese', 'French']

	language_options = gr.CheckboxGroup(all_languages, label="Filter Task: Language")

	# Task Type Filter
	all_task_types = ['Question Answering', 'Text Classification', 'Named Entity Recognition',
	'Normalization and Coding', 'Natural Language Inference', 'Summarization',
	'Event Extraction', 'Semantic Similarity']


	task_type_options = gr.CheckboxGroup(all_task_types, label="Filter Task: Task Type")

	all_clinical_contexts = ['Neurology', 'Oncology', 'Radiology', 'Pulmonology',
	'Cardiology', 'Dermatology', 'Critical Care', 'Nephrology',
	'General', 'Endocrinology', 'Pediatrics', 'Pharmacology',
	'Gastroenterology', 'Psychology']

	cc_options = gr.CheckboxGroup(all_clinical_contexts, label="Filter Task: Clinical Context")

	# Applications Filter
	all_applications = ['Procudure information', 'Concept standarization',
	'Specialist recommendation', 'Negation identification',
	'Clinical trial matching', 'Consultation summarization',
	'Semantic relation', 'Post-discharge patient management',
	'De-identification', 'Billing & Coding', 'Phenotyping',
	'Data organization', 'Temporal & Causality relation',
	'Summarization', 'Screen & Consultation', 'Diagnosis',
	'ADE & Incidents', 'Risk factor extraction', 'Prognosis',
	'Medication information']


	application_options = gr.CheckboxGroup(all_applications, label="Filter Task: Clinical Application")

	# Clinical Stage Filter
	all_stages = ['Treatment and Intervention', 'Triage and Referral',
	'Initial Assessment', 'Discharge and Administration',
	'Research', 'Diagnosis and Prognosis']

	stage_options = gr.CheckboxGroup(all_stages, label="Filter Task: Clinical Stage")

	# Data Access Filter
	all_data_access = ['Open Access', 'Regulated']

	da_options = gr.CheckboxGroup(all_data_access, label="Filter Task: Data Access")


	language_options.change(fn=zero_shot_filter_language, inputs=language_options, outputs=leaderboard)
	task_type_options.change(fn=zero_shot_filter_task_type, inputs=task_type_options, outputs=leaderboard)
	cc_options.change(fn=zero_shot_filter_clinical_context, inputs=cc_options, outputs=leaderboard)
	application_options.change(fn=zero_shot_filter_applications, inputs=application_options, outputs=leaderboard)
	da_options.change(fn=zero_shot_filter_data_access, inputs=da_options, outputs=leaderboard)
	stage_options.change(fn=zero_shot_filter_stage_options, inputs=stage_options, outputs=leaderboard)


	with gr.Tab("Few-Shot"):
	leaderboard = Leaderboard(
	value=five_shot_df,
	select_columns = None,
	search_columns=SearchColumns(primary_column = "Model", secondary_columns = "",
	placeholder="Search by Model Name",
	label="Model Search"),
	hide_columns=["Model: Size Range", "Model: Accessibility"],
	filter_columns=["Model: Domain", "Model: Size Range", "Model: Accessibility"],
	datatype=config.TYPES,
	)

	# Language Filter
	all_languages = ['English', 'Spanish',
	'Chinese', 'Norwegian',
	'Russian', 'Portuguese',
	'German', 'Japanese', 'French']

	language_options = gr.CheckboxGroup(all_languages, label="Filter Task: Language")

	# Task Type Filter
	all_task_types = ['Question Answering', 'Text Classification', 'Named Entity Recognition',
	'Normalization and Coding', 'Natural Language Inference', 'Summarization',
	'Event Extraction', 'Semantic Similarity']

	task_type_options = gr.CheckboxGroup(all_task_types, label="Filter Task: Task Type")


	# Clinical Context Filter
	all_clinical_contexts = ['Neurology', 'Oncology', 'Radiology', 'Pulmonology',
	'Cardiology', 'Dermatology', 'Critical Care', 'Nephrology',
	'General', 'Endocrinology', 'Pediatrics', 'Pharmacology',
	'Gastroenterology', 'Psychology']

	cc_options = gr.CheckboxGroup(all_clinical_contexts, label="Filter Task: Clinical Context")

	# Applications Filter
	all_applications = ['Procudure information', 'Concept standarization',
	'Specialist recommendation', 'Negation identification',
	'Clinical trial matching', 'Consultation summarization',
	'Semantic relation', 'Post-discharge patient management',
	'De-identification', 'Billing & Coding', 'Phenotyping',
	'Data organization', 'Temporal & Causality relation',
	'Summarization', 'Screen & Consultation', 'Diagnosis',
	'ADE & Incidents', 'Risk factor extraction', 'Prognosis',
	'Medication information']

	application_options = gr.CheckboxGroup(all_applications, label="Filter Task: Clinical Application")

	# Clinical Stage Filter
	all_stages = ['Treatment and Intervention', 'Triage and Referral',
	'Initial Assessment', 'Discharge and Administration',
	'Research', 'Diagnosis and Prognosis']

	stage_options = gr.CheckboxGroup(all_stages, label="Filter Task: Clinical Stage")

	# Data Access Filter
	all_data_access = ['Open Access', 'Regulated']

	da_options = gr.CheckboxGroup(all_data_access, label="Filter Task: Data Access")

	language_options.change(fn=five_shot_filter_language, inputs=language_options, outputs=leaderboard)
	task_type_options.change(fn=five_shot_filter_task_type, inputs=task_type_options, outputs=leaderboard)
	cc_options.change(fn=five_shot_filter_clinical_context, inputs=cc_options, outputs=leaderboard)
	application_options.change(fn=five_shot_filter_applications, inputs=application_options, outputs=leaderboard)
	da_options.change(fn=five_shot_filter_data_access, inputs=da_options, outputs=leaderboard)
	stage_options.change(fn=five_shot_filter_stage_options, inputs=stage_options, outputs=leaderboard)


	with gr.Tab("CoT"):
	leaderboard = Leaderboard(
	value=cot_df,
	select_columns = None,
	search_columns=SearchColumns(primary_column = "Model", secondary_columns = "",
	placeholder="Search by Model Name",
	label="Model Search"),
	hide_columns=["Model: Size Range", "Model: Accessibility"],
	filter_columns=["Model: Domain", "Model: Size Range", "Model: Accessibility"],
	datatype=config.TYPES,
	)

	# Language Filter
	all_languages = ['English', 'Spanish',
	'Chinese', 'Norwegian',
	'Russian', 'Portuguese',
	'German', 'Japanese', 'French']

	language_options = gr.CheckboxGroup(all_languages, label="Filter Task: Language")

	# Task Type Filter
	all_task_types = ['Question Answering', 'Text Classification', 'Named Entity Recognition',
	'Normalization and Coding', 'Natural Language Inference', 'Summarization',
	'Event Extraction', 'Semantic Similarity']

	task_type_options = gr.CheckboxGroup(all_task_types, label="Filter Task: Task Type")

	# Clinical Context Filter
	all_clinical_contexts = ['Neurology', 'Oncology', 'Radiology', 'Pulmonology',
	'Cardiology', 'Dermatology', 'Critical Care', 'Nephrology',
	'General', 'Endocrinology', 'Pediatrics', 'Pharmacology',
	'Gastroenterology', 'Psychology']

	cc_options = gr.CheckboxGroup(all_clinical_contexts, label="Filter Task: Clinical Context")

	# Applications Filter
	all_applications = ['Procudure information', 'Concept standarization',
	'Specialist recommendation', 'Negation identification',
	'Clinical trial matching', 'Consultation summarization',
	'Semantic relation', 'Post-discharge patient management',
	'De-identification', 'Billing & Coding', 'Phenotyping',
	'Data organization', 'Temporal & Causality relation',
	'Summarization', 'Screen & Consultation', 'Diagnosis',
	'ADE & Incidents', 'Risk factor extraction', 'Prognosis',
	'Medication information']

	application_options = gr.CheckboxGroup(all_applications, label="Filter Task: Clinical Application")

	# Clinical Stage Filter
	all_stages = ['Treatment and Intervention', 'Triage and Referral',
	'Initial Assessment', 'Discharge and Administration',
	'Research', 'Diagnosis and Prognosis']

	stage_options = gr.CheckboxGroup(all_stages, label="Filter Task: Clinical Stage")


	# Data Access Filter
	all_data_access = ['Open Access', 'Regulated']

	da_options = gr.CheckboxGroup(all_data_access, label="Filter Task: Data Access")


	language_options.change(fn=cot_filter_language, inputs=language_options, outputs=leaderboard)
	task_type_options.change(fn=cot_filter_task_type, inputs=task_type_options, outputs=leaderboard)
	cc_options.change(fn=cot_filter_clinical_context, inputs=cc_options, outputs=leaderboard)
	application_options.change(fn=cot_filter_applications, inputs=application_options, outputs=leaderboard)
	da_options.change(fn=cot_filter_data_access, inputs=da_options, outputs=leaderboard)

	stage_options.change(fn=cot_filter_stage_options, inputs=stage_options, outputs=leaderboard)


	if __name__ == "__main__":
	app.launch()