kevinxie06 commited on
Commit
fd7d17f
·
verified ·
1 Parent(s): dedce2f

Upload all utils files

Browse files
utils/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .data_loader import LeaderboardDataLoader
2
+ from .filter_manager import FilterManager
3
+ from .ui_components import UIComponents
4
+ from .leaderboard_postprocessor import postprocess, DataframeData
5
+
6
+ __all__ = ['LeaderboardDataLoader', 'FilterManager', 'UIComponents', 'postprocess', 'DataframeData']
utils/data_loader.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import json
3
+ from pathlib import Path
4
+ from typing import Dict, List
5
+
6
+ class LeaderboardDataLoader:
7
+ """Handles loading and managing leaderboard data"""
8
+
9
+ def __init__(self):
10
+ self.abs_path = Path(__file__).parent.parent
11
+ self.task_information = self._load_task_information()
12
+ self.leaderboard_data = self._load_leaderboard_data()
13
+ self.dataframes = self._create_dataframes()
14
+ self.original_avg_performances = self._store_original_performances()
15
+ self.n_models = self._calculate_n_models()
16
+
17
+ def _load_task_information(self) -> Dict:
18
+ """Load task information from JSON"""
19
+ with open(self.abs_path / "task_information.json", 'r') as file:
20
+ return json.load(file)
21
+
22
+ def _load_leaderboard_data(self) -> Dict[str, Dict]:
23
+ """Load all leaderboard JSON data"""
24
+ leaderboard_files = {
25
+ 'zero_shot': 'leaderboards/Zero-Shot_leaderboard_data.json',
26
+ 'few_shot': 'leaderboards/Few-Shot_leaderboard_data.json',
27
+ 'cot': 'leaderboards/CoT_leaderboard_data.json'
28
+ }
29
+
30
+ data = {}
31
+ for key, filepath in leaderboard_files.items():
32
+ with open(self.abs_path / filepath, 'r') as file:
33
+ data[key] = json.load(file)
34
+
35
+ return data
36
+
37
+ def _create_dataframes(self) -> Dict[str, pd.DataFrame]:
38
+ """Create pandas DataFrames from JSON data"""
39
+ dataframes = {}
40
+ for key in ['zero_shot', 'few_shot', 'cot']:
41
+ json_file = f"leaderboards/{key.replace('_', '-').title()}_leaderboard_data.json"
42
+ if key == 'few_shot':
43
+ json_file = "leaderboards/Few-Shot_leaderboard_data.json"
44
+ elif key == 'cot':
45
+ json_file = "leaderboards/CoT_leaderboard_data.json"
46
+ else:
47
+ json_file = "leaderboards/Zero-Shot_leaderboard_data.json"
48
+
49
+ dataframes[key] = pd.read_json(self.abs_path / json_file, precise_float=True)
50
+
51
+ return dataframes
52
+
53
+ def _store_original_performances(self) -> Dict[str, pd.Series]:
54
+ """Store original average performances for reset functionality"""
55
+ return {
56
+ key: df["Average Performance"].copy()
57
+ for key, df in self.dataframes.items()
58
+ }
59
+
60
+ def _calculate_n_models(self) -> int:
61
+ """Calculate number of models from the data"""
62
+ return int(list(self.leaderboard_data['zero_shot']["Model"].keys())[-1]) + 1
63
+
64
+ def get_dataframe(self, leaderboard_type: str) -> pd.DataFrame:
65
+ """Get dataframe for specific leaderboard type"""
66
+ return self.dataframes[leaderboard_type]
67
+
68
+ def get_leaderboard_json(self, leaderboard_type: str) -> Dict:
69
+ """Get JSON data for specific leaderboard type"""
70
+ return self.leaderboard_data[leaderboard_type]
71
+
72
+ def get_original_performance(self, leaderboard_type: str) -> pd.Series:
73
+ """Get original average performance for specific leaderboard type"""
74
+ return self.original_avg_performances[leaderboard_type]
utils/filter_manager.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+ import pandas as pd
3
+
4
+ class FilterManager:
5
+ """Manages filtering logic for all leaderboard types"""
6
+
7
+ def __init__(self, data_loader):
8
+ self.data_loader = data_loader
9
+ self.valid_tasks = {
10
+ 'NUBES', 'NorSynthClinical-NER', 'MEDIQA 2023-sum-A', 'Medication extraction',
11
+ 'IMCS-V2-DAC', 'Cantemist-Coding', 'IFMIR-NER', 'EHRQA-QA', 'Ex4CDS', 'MedDG',
12
+ 'MTS-Temporal', 'CHIP-MDCFNPC', 'n2c2 2014-Diabetes', 'MIMIC-III Outcome.LoS',
13
+ 'n2c2 2014-Hypertension', 'RuCCoN', 'CARES-ICD10 Chapter', 'RuDReC-NER', 'MIMIC-IV DiReCT.Dis',
14
+ 'n2c2 2014-Medication', 'iCorpus', 'Brateca-Hospitalization', 'n2c2 2010-Assertion',
15
+ 'NorSynthClinical-PHI', 'IFMIR - NER&factuality', 'JP-STS', 'NorSynthClinical-RE',
16
+ 'n2c2 2010-Concept', 'BARR2', 'IMCS-V2-NER', 'IMCS-V2-MRG', 'cMedQA', 'MedSTS',
17
+ 'BRONCO150-NER&Status', 'n2c2 2018-ADE&medication', 'CLISTER', 'ClinicalNotes-UPMC',
18
+ 'PPTS', 'CLIP', 'IMCS-V2-SR', 'EHRQA-Sub department', 'BrainMRI-AIS', 'Brateca-Mortality',
19
+ 'meddocan', 'CHIP-CDEE', 'CAS-evidence', 'MEDIQA 2019-RQE', 'Cantemis-Norm', 'MEDIQA 2023-sum-B',
20
+ 'CHIP-CTC', 'C-EMRS', 'CARES ICD10 Block', 'Cantemis-NER', 'CLINpt-NER', 'MEDIQA 2023-chat-A',
21
+ 'n2c2 2014-De-identification', 'n2c2 2014-Hyperlipidemia', 'EHRQA-Primary department',
22
+ 'ADE-Drug dosage', 'IFMIR-Incident type', 'MIMIC-III Outcome.Mortality', 'n2c2 2006-De-identification',
23
+ 'CAS-label', 'MIMIC-IV CDM', 'CodiEsp-ICD-10-CM', 'n2c2 2010-Relation', 'CARES-ICD10 Subblock',
24
+ 'MIE', 'HealthCareMagic-100k', 'ADE-Identification', 'MIMIC-IV DiReCT.PDD', 'ADE-Extraction',
25
+ 'DialMed', 'GOUT-CC-Consensus', 'GraSSCo PHI', 'RuMedNLI', 'RuMedDaNet', 'CBLUE-CDN', 'icliniq-10k',
26
+ 'CARDIO-DE', 'CARES-Area', 'DiSMed-NER', 'CodiEsp-ICD-10-PCS', 'MedNLI', 'MTS', 'MIMIC-IV BHC',
27
+ 'n2c2 2014-CAD'
28
+ }
29
+
30
+ # Initialize filter states for each leaderboard type
31
+ self.filter_states = {
32
+ 'zero_shot': self._create_empty_filter_state(),
33
+ 'few_shot': self._create_empty_filter_state(),
34
+ 'cot': self._create_empty_filter_state()
35
+ }
36
+
37
+ def _create_empty_filter_state(self) -> Dict[str, List]:
38
+ """Create an empty filter state"""
39
+ return {
40
+ "Language": [],
41
+ "Task Type": [],
42
+ "Clinical Context": [],
43
+ "Data Access": [],
44
+ "Applications": [],
45
+ "Clinical Stage": []
46
+ }
47
+
48
+ def get_filtered_columns(self, filter_selections: Dict[str, List]) -> List[str]:
49
+ """
50
+ Given an array of selected filters, return a list of all
51
+ the columns that match the criteria.
52
+ """
53
+ valid_columns = []
54
+ for task in self.data_loader.task_information:
55
+ task_info = self.data_loader.task_information[task]
56
+
57
+ # Flag to keep track of whether this task is valid
58
+ is_valid = True
59
+
60
+ # Iterate through each attribute of the task
61
+ for attribute in task_info:
62
+ # If the filter is empty
63
+ if not filter_selections[attribute]:
64
+ continue
65
+
66
+ value = task_info[attribute]
67
+
68
+ # Handle edge case for multiple categories
69
+ if "," in value:
70
+ all_categories = value.split(", ")
71
+ flag = False
72
+ for category in all_categories:
73
+ if category in filter_selections[attribute]:
74
+ flag = True
75
+ break
76
+
77
+ if flag: # one category matches
78
+ is_valid = True
79
+ else: # none of the categories matched
80
+ is_valid = False
81
+
82
+ # Handle Brazilian Edge Case
83
+ elif (value == 'Portuguese\n(Brazilian)') and ('Portuguese' in filter_selections[attribute]):
84
+ is_valid = True
85
+ break
86
+
87
+ elif value not in filter_selections[attribute]:
88
+ is_valid = False
89
+
90
+ if task in self.valid_tasks and is_valid:
91
+ valid_columns.append(task)
92
+
93
+ return valid_columns
94
+
95
+ def is_empty(self, filter_selections: Dict[str, List]) -> bool:
96
+ """Check if there are no selected filters"""
97
+ return all(not value for value in filter_selections.values())
98
+
99
+ def update_average_performance(self, leaderboard_type: str, selected_columns: List[str]) -> Dict[str, float]:
100
+ """
101
+ Calculate updated average performance based on selected columns
102
+ """
103
+ updated_average_performance = {}
104
+ leaderboard_json = self.data_loader.get_leaderboard_json(leaderboard_type)
105
+
106
+ for i in range(self.data_loader.n_models):
107
+ performance = 0
108
+ num_tasks = 0
109
+
110
+ for task in selected_columns:
111
+ if task in leaderboard_json:
112
+ num_tasks += 1
113
+ performance += float(leaderboard_json[task][str(i)])
114
+
115
+ if num_tasks == 0:
116
+ num_tasks = 1
117
+
118
+ updated_average_performance[f"{i}"] = float(round(performance / num_tasks, 2))
119
+
120
+ return updated_average_performance
121
+
122
+ def apply_filter(self, leaderboard_type: str, filter_type: str, filter_values: List[str]) -> pd.DataFrame:
123
+ """
124
+ Apply a filter to a specific leaderboard type and return updated dataframe
125
+ """
126
+ # Update the filter state
127
+ self.filter_states[leaderboard_type][filter_type] = filter_values
128
+
129
+ # Get the dataframe
130
+ df = self.data_loader.get_dataframe(leaderboard_type).copy()
131
+
132
+ # If no filters are applied, reset to original performance
133
+ if self.is_empty(self.filter_states[leaderboard_type]):
134
+ df["Average Performance"] = self.data_loader.get_original_performance(leaderboard_type)
135
+ return df
136
+
137
+ # Get filtered columns
138
+ filtered_cols = self.get_filtered_columns(self.filter_states[leaderboard_type])
139
+
140
+ # Update average performance
141
+ updated_performance = self.update_average_performance(leaderboard_type, filtered_cols)
142
+
143
+ # Convert dictionary keys to integers to match the DataFrame index
144
+ updated_performance_int = {int(k): v for k, v in updated_performance.items()}
145
+
146
+ # Map the values to the 'Average Performance' column based on index
147
+ df["Average Performance"] = df.index.map(updated_performance_int)
148
+
149
+ # Return dataframe with filtered columns
150
+ base_columns = ['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance']
151
+ return df[base_columns + filtered_cols]
utils/leaderboard_postprocessor.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, Literal
3
+ import pandas as pd
4
+ from pandas.io.formats.style import Styler
5
+ import semantic_version
6
+ from gradio.data_classes import GradioModel
7
+
8
+ class DataframeData(GradioModel):
9
+ headers: List[str]
10
+ data: Union[List[List[Any]], List[Tuple[Any, ...]]]
11
+ metadata: Optional[Dict[str, Optional[List[Any]]]] = None
12
+
13
+ def postprocess(self, value: pd.DataFrame) -> DataframeData:
14
+ """Custom postprocess function that sorts by Average Performance"""
15
+ # Ensure that the "Average Performance" column exists
16
+ if "Average Performance" in value.columns:
17
+ # Sort the DataFrame by the "average performance" column in descending order
18
+ value = value.sort_values(by="Average Performance", ascending=False)
19
+
20
+ return DataframeData(
21
+ headers=list(value.columns), # type: ignore
22
+ data=value.to_dict(orient="split")["data"], # type: ignore
23
+ )
24
+
25
+ if value is None:
26
+ return self.postprocess(pd.DataFrame({"column 1": []}))
27
+ if isinstance(value, (str, pd.DataFrame)):
28
+ if isinstance(value, str):
29
+ value = pd.read_csv(value) # type: ignore
30
+ if len(value) == 0:
31
+ return DataframeData(
32
+ headers=list(value.columns), # type: ignore
33
+ data=[[]], # type: ignore
34
+ )
35
+ return DataframeData(
36
+ headers=list(value.columns), # type: ignore
37
+ data=value.to_dict(orient="split")["data"], # type: ignore
38
+ )
39
+ elif isinstance(value, Styler):
40
+ if semantic_version.Version(pd.__version__) < semantic_version.Version(
41
+ "1.5.0"
42
+ ):
43
+ raise ValueError(
44
+ "Styler objects are only supported in pandas version 1.5.0 or higher. Please try: `pip install --upgrade pandas` to use this feature."
45
+ )
46
+ if self.interactive:
47
+ warnings.warn(
48
+ "Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
49
+ )
50
+ df: pd.DataFrame = value.data # type: ignore
51
+ if len(df) == 0:
52
+ return DataframeData(
53
+ headers=list(df.columns),
54
+ data=[[]],
55
+ metadata=self.__extract_metadata(value), # type: ignore
56
+ )
57
+ return DataframeData(
58
+ headers=list(df.columns),
59
+ data=df.to_dict(orient="split")["data"], # type: ignore
60
+ metadata=self.__extract_metadata(value), # type: ignore
61
+ )
utils/ui_components.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_leaderboard import Leaderboard
3
+ from typing import List, Tuple, Callable
4
+
5
+ class UIComponents:
6
+ """Handles creation of UI components for the leaderboard"""
7
+
8
+ @staticmethod
9
+ def create_filter_components() -> Tuple[List[gr.CheckboxGroup], List[str]]:
10
+ """Create all filter components and return them with their labels"""
11
+
12
+ # Language Filter
13
+ all_languages = ['English', 'Spanish', 'Chinese', 'Norwegian',
14
+ 'Russian', 'Portuguese', 'German', 'Japanese', 'French']
15
+ language_options = gr.CheckboxGroup(all_languages, label="Filter Task: Language")
16
+
17
+ # Task Type Filter
18
+ all_task_types = ['Question Answering', 'Text Classification', 'Named Entity Recognition',
19
+ 'Normalization and Coding', 'Natural Language Inference', 'Summarization',
20
+ 'Event Extraction', 'Semantic Similarity']
21
+ task_type_options = gr.CheckboxGroup(all_task_types, label="Filter Task: Task Type")
22
+
23
+ # Clinical Context Filter
24
+ all_clinical_contexts = ['Neurology', 'Oncology', 'Radiology', 'Pulmonology',
25
+ 'Cardiology', 'Dermatology', 'Critical Care', 'Nephrology',
26
+ 'General', 'Endocrinology', 'Pediatrics', 'Pharmacology',
27
+ 'Gastroenterology', 'Psychology']
28
+ cc_options = gr.CheckboxGroup(all_clinical_contexts, label="Filter Task: Clinical Context")
29
+
30
+ # Applications Filter
31
+ all_applications = ['Procudure information', 'Concept standarization',
32
+ 'Specialist recommendation', 'Negation identification',
33
+ 'Clinical trial matching', 'Consultation summarization',
34
+ 'Semantic relation', 'Post-discharge patient management',
35
+ 'De-identification', 'Billing & Coding', 'Phenotyping',
36
+ 'Data organization', 'Temporal & Causality relation',
37
+ 'Summarization', 'Screen & Consultation', 'Diagnosis',
38
+ 'ADE & Incidents', 'Risk factor extraction', 'Prognosis',
39
+ 'Medication information']
40
+ application_options = gr.CheckboxGroup(all_applications, label="Filter Task: Clinical Application")
41
+
42
+ # Clinical Stage Filter
43
+ all_stages = ['Treatment and Intervention', 'Triage and Referral',
44
+ 'Initial Assessment', 'Discharge and Administration',
45
+ 'Research', 'Diagnosis and Prognosis']
46
+ stage_options = gr.CheckboxGroup(all_stages, label="Filter Task: Clinical Stage")
47
+
48
+ # Data Access Filter
49
+ all_data_access = ['Open Access', 'Regulated']
50
+ da_options = gr.CheckboxGroup(all_data_access, label="Filter Task: Data Access")
51
+
52
+ components = [language_options, task_type_options, cc_options,
53
+ application_options, stage_options, da_options]
54
+
55
+ filter_types = ["Language", "Task Type", "Clinical Context",
56
+ "Applications", "Clinical Stage", "Data Access"]
57
+
58
+ return components, filter_types
59
+
60
+ @staticmethod
61
+ def setup_filter_events(components: List[gr.CheckboxGroup],
62
+ filter_types: List[str],
63
+ leaderboard: Leaderboard,
64
+ filter_manager,
65
+ leaderboard_type: str):
66
+ """Setup event handlers for filter components"""
67
+
68
+ def create_filter_function(filter_type: str, lb_type: str):
69
+ """Create a filter function with proper closure"""
70
+ return lambda values: filter_manager.apply_filter(lb_type, filter_type, values)
71
+
72
+ for component, filter_type in zip(components, filter_types):
73
+ filter_fn = create_filter_function(filter_type, leaderboard_type)
74
+ component.change(
75
+ fn=filter_fn,
76
+ inputs=component,
77
+ outputs=leaderboard
78
+ )