Spaces:

YLab-Open
/

BRIDGE-Medical-Leaderboard

Running

App Files Files Community

kevinxie06 commited on Jun 3

Commit

fd7d17f

verified ·

1 Parent(s): dedce2f

Upload all utils files

Browse files

Files changed (5) hide show

utils/__init__.py +6 -0
utils/data_loader.py +74 -0
utils/filter_manager.py +151 -0
utils/leaderboard_postprocessor.py +61 -0
utils/ui_components.py +78 -0

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .data_loader import LeaderboardDataLoader
+from .filter_manager import FilterManager
+from .ui_components import UIComponents
+from .leaderboard_postprocessor import postprocess, DataframeData
+__all__ = ['LeaderboardDataLoader', 'FilterManager', 'UIComponents', 'postprocess', 'DataframeData']

utils/data_loader.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import pandas as pd
+import json
+from pathlib import Path
+from typing import Dict, List
+class LeaderboardDataLoader:
+    """Handles loading and managing leaderboard data"""
+    def __init__(self):
+        self.abs_path = Path(__file__).parent.parent
+        self.task_information = self._load_task_information()
+        self.leaderboard_data = self._load_leaderboard_data()
+        self.dataframes = self._create_dataframes()
+        self.original_avg_performances = self._store_original_performances()
+        self.n_models = self._calculate_n_models()
+    def _load_task_information(self) -> Dict:
+        """Load task information from JSON"""
+        with open(self.abs_path / "task_information.json", 'r') as file:
+            return json.load(file)
+    def _load_leaderboard_data(self) -> Dict[str, Dict]:
+        """Load all leaderboard JSON data"""
+        leaderboard_files = {
+            'zero_shot': 'leaderboards/Zero-Shot_leaderboard_data.json',
+            'few_shot': 'leaderboards/Few-Shot_leaderboard_data.json',
+            'cot': 'leaderboards/CoT_leaderboard_data.json'
+        }
+        data = {}
+        for key, filepath in leaderboard_files.items():
+            with open(self.abs_path / filepath, 'r') as file:
+                data[key] = json.load(file)
+        return data
+    def _create_dataframes(self) -> Dict[str, pd.DataFrame]:
+        """Create pandas DataFrames from JSON data"""
+        dataframes = {}
+        for key in ['zero_shot', 'few_shot', 'cot']:
+            json_file = f"leaderboards/{key.replace('_', '-').title()}_leaderboard_data.json"
+            if key == 'few_shot':
+                json_file = "leaderboards/Few-Shot_leaderboard_data.json"
+            elif key == 'cot':
+                json_file = "leaderboards/CoT_leaderboard_data.json"
+            else:
+                json_file = "leaderboards/Zero-Shot_leaderboard_data.json"
+            dataframes[key] = pd.read_json(self.abs_path / json_file, precise_float=True)
+        return dataframes
+    def _store_original_performances(self) -> Dict[str, pd.Series]:
+        """Store original average performances for reset functionality"""
+        return {
+            key: df["Average Performance"].copy()
+            for key, df in self.dataframes.items()
+        }
+    def _calculate_n_models(self) -> int:
+        """Calculate number of models from the data"""
+        return int(list(self.leaderboard_data['zero_shot']["Model"].keys())[-1]) + 1
+    def get_dataframe(self, leaderboard_type: str) -> pd.DataFrame:
+        """Get dataframe for specific leaderboard type"""
+        return self.dataframes[leaderboard_type]
+    def get_leaderboard_json(self, leaderboard_type: str) -> Dict:
+        """Get JSON data for specific leaderboard type"""
+        return self.leaderboard_data[leaderboard_type]
+    def get_original_performance(self, leaderboard_type: str) -> pd.Series:
+        """Get original average performance for specific leaderboard type"""
+        return self.original_avg_performances[leaderboard_type]

utils/filter_manager.py ADDED Viewed

	@@ -0,0 +1,151 @@

+from typing import Dict, List, Any
+import pandas as pd
+class FilterManager:
+    """Manages filtering logic for all leaderboard types"""
+    def __init__(self, data_loader):
+        self.data_loader = data_loader
+        self.valid_tasks = {
+            'NUBES', 'NorSynthClinical-NER', 'MEDIQA 2023-sum-A', 'Medication extraction',
+            'IMCS-V2-DAC', 'Cantemist-Coding', 'IFMIR-NER', 'EHRQA-QA', 'Ex4CDS', 'MedDG',
+            'MTS-Temporal', 'CHIP-MDCFNPC', 'n2c2 2014-Diabetes', 'MIMIC-III Outcome.LoS',
+            'n2c2 2014-Hypertension', 'RuCCoN', 'CARES-ICD10 Chapter', 'RuDReC-NER', 'MIMIC-IV DiReCT.Dis',
+            'n2c2 2014-Medication', 'iCorpus', 'Brateca-Hospitalization', 'n2c2 2010-Assertion',
+            'NorSynthClinical-PHI', 'IFMIR - NER&factuality', 'JP-STS', 'NorSynthClinical-RE',
+            'n2c2 2010-Concept', 'BARR2', 'IMCS-V2-NER', 'IMCS-V2-MRG', 'cMedQA', 'MedSTS',
+            'BRONCO150-NER&Status', 'n2c2 2018-ADE&medication', 'CLISTER', 'ClinicalNotes-UPMC',
+            'PPTS', 'CLIP', 'IMCS-V2-SR', 'EHRQA-Sub department', 'BrainMRI-AIS', 'Brateca-Mortality',
+            'meddocan', 'CHIP-CDEE', 'CAS-evidence', 'MEDIQA 2019-RQE', 'Cantemis-Norm', 'MEDIQA 2023-sum-B',
+            'CHIP-CTC', 'C-EMRS', 'CARES ICD10 Block', 'Cantemis-NER', 'CLINpt-NER', 'MEDIQA 2023-chat-A',
+            'n2c2 2014-De-identification', 'n2c2 2014-Hyperlipidemia', 'EHRQA-Primary department',
+            'ADE-Drug dosage', 'IFMIR-Incident type', 'MIMIC-III Outcome.Mortality', 'n2c2 2006-De-identification',
+            'CAS-label', 'MIMIC-IV CDM', 'CodiEsp-ICD-10-CM', 'n2c2 2010-Relation', 'CARES-ICD10 Subblock',
+            'MIE', 'HealthCareMagic-100k', 'ADE-Identification', 'MIMIC-IV DiReCT.PDD', 'ADE-Extraction',
+            'DialMed', 'GOUT-CC-Consensus', 'GraSSCo PHI', 'RuMedNLI', 'RuMedDaNet', 'CBLUE-CDN', 'icliniq-10k',
+            'CARDIO-DE', 'CARES-Area', 'DiSMed-NER', 'CodiEsp-ICD-10-PCS', 'MedNLI', 'MTS', 'MIMIC-IV BHC',
+            'n2c2 2014-CAD'
+        }
+        # Initialize filter states for each leaderboard type
+        self.filter_states = {
+            'zero_shot': self._create_empty_filter_state(),
+            'few_shot': self._create_empty_filter_state(),
+            'cot': self._create_empty_filter_state()
+        }
+    def _create_empty_filter_state(self) -> Dict[str, List]:
+        """Create an empty filter state"""
+        return {
+            "Language": [],
+            "Task Type": [],
+            "Clinical Context": [],
+            "Data Access": [],
+            "Applications": [],
+            "Clinical Stage": []
+        }
+    def get_filtered_columns(self, filter_selections: Dict[str, List]) -> List[str]:
+        """
+        Given an array of selected filters, return a list of all
+        the columns that match the criteria.
+        """
+        valid_columns = []
+        for task in self.data_loader.task_information:
+            task_info = self.data_loader.task_information[task]
+            # Flag to keep track of whether this task is valid
+            is_valid = True
+            # Iterate through each attribute of the task
+            for attribute in task_info:
+                # If the filter is empty
+                if not filter_selections[attribute]:
+                    continue
+                value = task_info[attribute]
+                # Handle edge case for multiple categories
+                if "," in value:
+                    all_categories = value.split(", ")
+                    flag = False
+                    for category in all_categories:
+                        if category in filter_selections[attribute]:
+                            flag = True
+                            break
+                    if flag:  # one category matches
+                        is_valid = True
+                    else:  # none of the categories matched
+                        is_valid = False
+                # Handle Brazilian Edge Case
+                elif (value == 'Portuguese\n(Brazilian)') and ('Portuguese' in filter_selections[attribute]):
+                    is_valid = True
+                    break
+                elif value not in filter_selections[attribute]:
+                    is_valid = False
+            if task in self.valid_tasks and is_valid:
+                valid_columns.append(task)
+        return valid_columns
+    def is_empty(self, filter_selections: Dict[str, List]) -> bool:
+        """Check if there are no selected filters"""
+        return all(not value for value in filter_selections.values())
+    def update_average_performance(self, leaderboard_type: str, selected_columns: List[str]) -> Dict[str, float]:
+        """
+        Calculate updated average performance based on selected columns
+        """
+        updated_average_performance = {}
+        leaderboard_json = self.data_loader.get_leaderboard_json(leaderboard_type)
+        for i in range(self.data_loader.n_models):
+            performance = 0
+            num_tasks = 0
+            for task in selected_columns:
+                if task in leaderboard_json:
+                    num_tasks += 1
+                    performance += float(leaderboard_json[task][str(i)])
+            if num_tasks == 0:
+                num_tasks = 1
+            updated_average_performance[f"{i}"] = float(round(performance / num_tasks, 2))
+        return updated_average_performance
+    def apply_filter(self, leaderboard_type: str, filter_type: str, filter_values: List[str]) -> pd.DataFrame:
+        """
+        Apply a filter to a specific leaderboard type and return updated dataframe
+        """
+        # Update the filter state
+        self.filter_states[leaderboard_type][filter_type] = filter_values
+        # Get the dataframe
+        df = self.data_loader.get_dataframe(leaderboard_type).copy()
+        # If no filters are applied, reset to original performance
+        if self.is_empty(self.filter_states[leaderboard_type]):
+            df["Average Performance"] = self.data_loader.get_original_performance(leaderboard_type)
+            return df
+        # Get filtered columns
+        filtered_cols = self.get_filtered_columns(self.filter_states[leaderboard_type])
+        # Update average performance
+        updated_performance = self.update_average_performance(leaderboard_type, filtered_cols)
+        # Convert dictionary keys to integers to match the DataFrame index
+        updated_performance_int = {int(k): v for k, v in updated_performance.items()}
+        # Map the values to the 'Average Performance' column based on index
+        df["Average Performance"] = df.index.map(updated_performance_int)
+        # Return dataframe with filtered columns
+        base_columns = ['T', 'Model', 'Model: Domain', 'Model: Accessibility', 'Model: Size Range', 'Size (B)', 'Average Performance']
+        return df[base_columns + filtered_cols]

utils/leaderboard_postprocessor.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import warnings
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, Literal
+import pandas as pd
+from pandas.io.formats.style import Styler
+import semantic_version
+from gradio.data_classes import GradioModel
+class DataframeData(GradioModel):
+    headers: List[str]
+    data: Union[List[List[Any]], List[Tuple[Any, ...]]]
+    metadata: Optional[Dict[str, Optional[List[Any]]]] = None
+def postprocess(self, value: pd.DataFrame) -> DataframeData:
+    """Custom postprocess function that sorts by Average Performance"""
+    # Ensure that the "Average Performance" column exists
+    if "Average Performance" in value.columns:
+        # Sort the DataFrame by the "average performance" column in descending order
+        value = value.sort_values(by="Average Performance", ascending=False)
+        return DataframeData(
+            headers=list(value.columns),  # type: ignore
+            data=value.to_dict(orient="split")["data"],  # type: ignore
+        )
+    if value is None:
+        return self.postprocess(pd.DataFrame({"column 1": []}))
+    if isinstance(value, (str, pd.DataFrame)):
+        if isinstance(value, str):
+            value = pd.read_csv(value)  # type: ignore
+        if len(value) == 0:
+            return DataframeData(
+                headers=list(value.columns),  # type: ignore
+                data=[[]],  # type: ignore
+            )
+        return DataframeData(
+            headers=list(value.columns),  # type: ignore
+            data=value.to_dict(orient="split")["data"],  # type: ignore
+        )
+    elif isinstance(value, Styler):
+        if semantic_version.Version(pd.__version__) < semantic_version.Version(
+            "1.5.0"
+        ):
+            raise ValueError(
+                "Styler objects are only supported in pandas version 1.5.0 or higher. Please try: `pip install --upgrade pandas` to use this feature."
+            )
+        if self.interactive:
+            warnings.warn(
+                "Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
+            )
+        df: pd.DataFrame = value.data  # type: ignore
+        if len(df) == 0:
+            return DataframeData(
+                headers=list(df.columns),
+                data=[[]],
+                metadata=self.__extract_metadata(value),  # type: ignore
+            )
+        return DataframeData(
+            headers=list(df.columns),
+            data=df.to_dict(orient="split")["data"],  # type: ignore
+            metadata=self.__extract_metadata(value),  # type: ignore
+        )

utils/ui_components.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import gradio as gr
+from gradio_leaderboard import Leaderboard
+from typing import List, Tuple, Callable
+class UIComponents:
+    """Handles creation of UI components for the leaderboard"""
+    @staticmethod
+    def create_filter_components() -> Tuple[List[gr.CheckboxGroup], List[str]]:
+        """Create all filter components and return them with their labels"""
+        # Language Filter
+        all_languages = ['English', 'Spanish', 'Chinese', 'Norwegian',
+                        'Russian', 'Portuguese', 'German', 'Japanese', 'French']
+        language_options = gr.CheckboxGroup(all_languages, label="Filter Task: Language")
+        # Task Type Filter
+        all_task_types = ['Question Answering', 'Text Classification', 'Named Entity Recognition',
+                         'Normalization and Coding', 'Natural Language Inference', 'Summarization',
+                         'Event Extraction', 'Semantic Similarity']
+        task_type_options = gr.CheckboxGroup(all_task_types, label="Filter Task: Task Type")
+        # Clinical Context Filter
+        all_clinical_contexts = ['Neurology', 'Oncology', 'Radiology', 'Pulmonology',
+                               'Cardiology', 'Dermatology', 'Critical Care', 'Nephrology',
+                               'General', 'Endocrinology', 'Pediatrics', 'Pharmacology',
+                               'Gastroenterology', 'Psychology']
+        cc_options = gr.CheckboxGroup(all_clinical_contexts, label="Filter Task: Clinical Context")
+        # Applications Filter
+        all_applications = ['Procudure information', 'Concept standarization',
+                           'Specialist recommendation', 'Negation identification',
+                           'Clinical trial matching', 'Consultation summarization',
+                           'Semantic relation', 'Post-discharge patient management',
+                           'De-identification', 'Billing & Coding', 'Phenotyping',
+                           'Data organization', 'Temporal & Causality relation',
+                           'Summarization', 'Screen & Consultation', 'Diagnosis',
+                           'ADE & Incidents', 'Risk factor extraction', 'Prognosis',
+                           'Medication information']
+        application_options = gr.CheckboxGroup(all_applications, label="Filter Task: Clinical Application")
+        # Clinical Stage Filter
+        all_stages = ['Treatment and Intervention', 'Triage and Referral',
+                     'Initial Assessment', 'Discharge and Administration',
+                     'Research', 'Diagnosis and Prognosis']
+        stage_options = gr.CheckboxGroup(all_stages, label="Filter Task: Clinical Stage")
+        # Data Access Filter
+        all_data_access = ['Open Access', 'Regulated']
+        da_options = gr.CheckboxGroup(all_data_access, label="Filter Task: Data Access")
+        components = [language_options, task_type_options, cc_options,
+                     application_options, stage_options, da_options]
+        filter_types = ["Language", "Task Type", "Clinical Context",
+                       "Applications", "Clinical Stage", "Data Access"]
+        return components, filter_types
+    @staticmethod
+    def setup_filter_events(components: List[gr.CheckboxGroup],
+                          filter_types: List[str],
+                          leaderboard: Leaderboard,
+                          filter_manager,
+                          leaderboard_type: str):
+        """Setup event handlers for filter components"""
+        def create_filter_function(filter_type: str, lb_type: str):
+            """Create a filter function with proper closure"""
+            return lambda values: filter_manager.apply_filter(lb_type, filter_type, values)
+        for component, filter_type in zip(components, filter_types):
+            filter_fn = create_filter_function(filter_type, leaderboard_type)
+            component.change(
+                fn=filter_fn,
+                inputs=component,
+                outputs=leaderboard
+            )