Spaces:

mib-bench
/

leaderboard

Running

App Files Files Community

jasonshaoshun commited on Jan 28

Commit

66f5701

1 Parent(s): c50d688

debug

Browse files

Files changed (1) hide show

app.py +60 -329

app.py CHANGED Viewed

@@ -127,386 +127,117 @@ LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGAT
 from src.about import TasksMib_Subgraph
-# def init_leaderboard_mib_subgraph(dataframe, track):
-#     if dataframe is None or dataframe.empty:
-#         raise ValueError("Leaderboard DataFrame is empty or None.")
-#     # Get unique tasks and models for filters
-#     tasks = list(set(task.value.benchmark for task in TasksMib_Subgraph))
-#     models = list(set(
-#         model
-#         for task in TasksMib_Subgraph
-#         for model in task.value.models
-#     ))
-#     return Leaderboard(
-#         value=dataframe,
-#         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
-#         select_columns=SelectColumns(
-#             default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
-#             cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
-#             label="Select Columns to Display:",
-#         ),
-#         column_filters=[
-#             ColumnFilter(
-#                 column="task_filter",
-#                 choices=tasks,
-#                 label="Filter by Task:",
-#                 default=None
-#             ),
-#             ColumnFilter(
-#                 column="model_filter",
-#                 choices=models,
-#                 label="Filter by Model:",
-#                 default=None
-#             )
-#         ],
-#         search_columns=["Method"],
-#         hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
-#         bool_checkboxgroup_label="Hide models",
-#         interactive=False,
-#     )
-# def init_leaderboard_mib_subgraph(dataframe, track):
-#     if dataframe is None or dataframe.empty:
-#         raise ValueError("Leaderboard DataFrame is empty or None.")
-#     # Add filter columns to display
-#     dataframe['Task'] = dataframe.apply(
-#         lambda row: [task.value.benchmark for task in TasksMib_Subgraph
-#                     if any(f"{task.value.benchmark}_{model}" in row.index
-#                           for model in task.value.models)][0],
-#         axis=1
-#     )
-#     dataframe['Model'] = dataframe.apply(
-#         lambda row: [model for task in TasksMib_Subgraph
-#                     for model in task.value.models
-#                     if f"{task.value.benchmark}_{model}" in row.index][0],
-#         axis=1
-#     )
-#     return Leaderboard(
-#         value=dataframe,
-#         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
-#         select_columns=SelectColumns(
-#             default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
-#             cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
-#             label="Select Columns to Display:",
-#         ),
-#         search_columns=["Method", "Task", "Model"],  # Add Task and Model to searchable columns
-#         hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
-#         bool_checkboxgroup_label="Hide models",
-#         interactive=False,
-#     )
 # def init_leaderboard_mib_subgraph(dataframe, track):
-#     """Initialize the subgraph leaderboard with grouped column selection."""
 #     if dataframe is None or dataframe.empty:
 #         raise ValueError("Leaderboard DataFrame is empty or None.")
-#     # Get tasks and models using the new class methods
-#     tasks = TasksMib_Subgraph.get_all_tasks()
-#     models = TasksMib_Subgraph.get_all_models()
-#     # Create a mapping from selection to actual column names
-#     selection_map = {}
-#     # Add task mappings - when a task is selected, show all its columns
-#     for task in tasks:
-#         # For each task, find all valid task_model combinations
-#         valid_combos = []
-#         for model in models:
-#             col_name = f"{task}_{model}"
-#             if col_name in dataframe.columns:
-#                 valid_combos.append(col_name)
-#         if valid_combos:
-#             selection_map[task] = valid_combos
-#     # Add model mappings - when a model is selected, show all its columns
-#     for model in models:
-#         # For each model, find all valid task_model combinations
-#         valid_combos = []
-#         for task in tasks:
-#             col_name = f"{task}_{model}"
-#             if col_name in dataframe.columns:
-#                 valid_combos.append(col_name)
-#         if valid_combos:
-#             selection_map[model] = valid_combos
-#     return Leaderboard(
-#         value=dataframe,
-#         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
-#         select_columns=SelectColumns(
-#             choices=[tasks, models],  # Two groups of choices
-#             labels=["Tasks", "Models"],  # Labels for each group
-#             default_selection=[*tasks, *models],  # Show everything by default
-#             cant_deselect=["Method"],  # Method column always visible
-#             label="Filter by Tasks or Models:",
-#             selection_map=selection_map  # Map selections to actual columns
-#         ),
-#         search_columns=["Method"],
-#         hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
-#         bool_checkboxgroup_label="Hide models",
-#         interactive=False,
-#     )
-# def init_leaderboard_mib_subgraph(dataframe, track):
-#     """Initialize the subgraph leaderboard with grouped column selection for gradio-leaderboard 0.0.13"""
-#     if dataframe is None or dataframe.empty:
-#         raise ValueError("Leaderboard DataFrame is empty or None.")
-#     # Get all unique tasks and models
-#     tasks = [task.value.benchmark for task in TasksMib_Subgraph]
-#     models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
-#     # Create two selection groups: one for tasks and one for models
-#     # In 0.0.13, we can only have one SelectColumns, so we'll combine them
-#     selection_choices = [
-#         *[f"Task: {task}" for task in tasks],  # Prefix with 'Task:' for clarity
-#         *[f"Model: {model}" for model in models]  # Prefix with 'Model:' for clarity
-#     ]
-#     return Leaderboard(
-#         value=dataframe,
-#         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
-#         select_columns=SelectColumns(
-#             default_selection=selection_choices,  # Show all by default
-#             choices=selection_choices,
-#             cant_deselect=["Method"],  # Method column always visible
-#             label="Select Tasks or Models:",
-#         ),
-#         search_columns=["Method"],
-#         hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
-#         bool_checkboxgroup_label="Hide models",
-#         interactive=False,
-#     )
-# def init_leaderboard_mib_subgraph(dataframe, track):
-#     """Initialize the subgraph leaderboard focusing only on task and model filtering.
-#     This implementation creates a focused view where users can select which task-model
-#     combinations they want to see, making the analysis of results more straightforward.
-#     """
-#     if dataframe is None or dataframe.empty:
-#         raise ValueError("Leaderboard DataFrame is empty or None.")
-#     # Get all task-model combinations that actually exist in our data
-#     task_model_columns = []
 #     for task in TasksMib_Subgraph:
-#         for model in task.value.models:
-#             col_name = f"{task.value.benchmark}_{model}"
-#             if col_name in dataframe.columns:
-#                 task_model_columns.append(col_name)
-#     return Leaderboard(
-#         value=dataframe,
-#         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
-#         select_columns=SelectColumns(
-#             default_selection=task_model_columns,
-#             label="Select Task-Model Combinations:",
-#         ),
-#         search_columns=["Method"],  # Keep Method searchable but not in column selection
-#         hide_columns=[],  # We don't need to hide any columns
-#         bool_checkboxgroup_label="Hide models",
-#         interactive=False,
-#     )
-# def init_leaderboard_mib_subgraph(dataframe, track):
-#     """Initialize the subgraph leaderboard with verified task/model column selection"""
-#     if dataframe is None or dataframe.empty:
-#         raise ValueError("Leaderboard DataFrame is empty or None.")
-#     # First, let's identify which columns actually exist in our dataframe
-#     print("Available columns in dataframe:", dataframe.columns.tolist())
-#     # Create task selections based on TasksMib_Subgraph definition
-#     task_selections = []
-#     for task in TasksMib_Subgraph:
-#         task_cols = []
-#         for model in task.value.models:
-#             col_name = f"{task.value.benchmark}_{model}"
-#             if col_name in dataframe.columns:
-#                 task_cols.append(col_name)
-#         if task_cols:  # Only add tasks that have data
-#             print(f"Task {task.value.benchmark} has columns:", task_cols)
-#             task_selections.append(f"Task: {task.value.benchmark}")
-#     # Create model selections by checking which models appear in columns
-#     model_selections = []
 #     all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
 #     for model in all_models:
-#         model_cols = []
-#         for task in TasksMib_Subgraph:
-#             if model in task.value.models:
-#                 col_name = f"{task.value.benchmark}_{model}"
-#                 if col_name in dataframe.columns:
-#                     model_cols.append(col_name)
-#         if model_cols:  # Only add models that have data
-#             print(f"Model {model} has columns:", model_cols)
-#             model_selections.append(f"Model: {model}")
-#     # Combine all selections
-#     selections = task_selections + model_selections
-#     print("Final selection options:", selections)
-#     # Print DataFrame information
-#     print("\nDebugging DataFrame:")
-#     print("DataFrame columns:", dataframe.columns.tolist())
-#     print("DataFrame shape:", dataframe.shape)
-#     print("DataFrame head:\n", dataframe.head())
-#     return Leaderboard(
-#         value=dataframe,
-#         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
-#         select_columns=SelectColumns(
-#             default_selection=selections,
-#             label="Select Tasks or Models:"
-#         ),
-#         search_columns=["Method"],
-#         hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
-#         bool_checkboxgroup_label="Hide models",
-#         interactive=False,
-#     )
-# def init_leaderboard_mib_subgraph(dataframe, track):
-#     """Initialize the subgraph leaderboard with benchmark and model filtering capabilities."""
-#     if dataframe is None or dataframe.empty:
-#         raise ValueError("Leaderboard DataFrame is empty or None.")
-#     # Print DataFrame information for debugging
-#     # print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
-#     # Get result columns (excluding Method and Average)
-#     result_columns = [col for col in dataframe.columns
-#                      if col not in ['Method', 'Average', 'eval_name'] and '_' in col]
-#     # Create benchmark and model selections
-#     benchmarks = set()
-#     models = set()
-#     print(f"\nDebugging Result Columns: {result_columns}")
-#     # Extract unique benchmarks and models from column names
-#     for col in result_columns:
-#         print(f"col is {col}")
-#         benchmark, model = col.split('_', maxsplit=1)
-#         benchmarks.add(benchmark)
-#         models.add(model)
-#         print(f"benchmark is {benchmark} and model is {model}")
-#     # Create selection groups
-#     benchmark_selections = {
-#         # For each benchmark, store which columns should be shown
-#         benchmark: [col for col in result_columns if col.startswith(f"{benchmark}_")]
-#         for benchmark in benchmarks
-#     }
-#     model_selections = {
-#         # For each model, store which columns should be shown
-#         model: [col for col in result_columns if col.startswith(f"_{model}")]
-#         for model in models
-#     }
-#     # Combine the selection mappings
-#     selection_groups = {
-#         **benchmark_selections,
-#         **model_selections
-#     }
-#     print("\nDebugging Selection Groups:")
-#     print("Benchmarks:", benchmark_selections.keys())
-#     print("Models:", model_selections.keys())
-#     # Convert keys to list for selection options
-#     selection_options = list(selection_groups.keys())
 #     return Leaderboard(
 #         value=dataframe,
 #         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
 #         select_columns=SelectColumns(
-#             default_selection=selection_options,  # Show all options by default
-#             label="Filter by Benchmark or Model:"
 #         ),
 #         search_columns=["Method"],
 #         hide_columns=[],
 #         interactive=False,
 #     )
 def init_leaderboard_mib_subgraph(dataframe, track):
-    """Initialize the subgraph leaderboard with grouped column selection by benchmark."""
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
     print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
-    # Create groups of columns by benchmark
-    benchmark_groups = []
-    # For each benchmark in our TasksMib_Subgraph enum...
     for task in TasksMib_Subgraph:
         benchmark = task.value.benchmark
-        # Get all valid columns for this benchmark's models
         benchmark_cols = [
             f"{benchmark}_{model}"
             for model in task.value.models
             if f"{benchmark}_{model}" in dataframe.columns
         ]
-        if benchmark_cols:  # Only add if we have valid columns
-            benchmark_groups.append(benchmark_cols)
-            print(f"\nBenchmark group for {benchmark}:", benchmark_cols)
-    # Create model groups as well
-    model_groups = []
     all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
-    # For each unique model...
     for model in all_models:
-        # Get all valid columns for this model across benchmarks
         model_cols = [
             f"{task.value.benchmark}_{model}"
             for task in TasksMib_Subgraph
             if model in task.value.models
             and f"{task.value.benchmark}_{model}" in dataframe.columns
         ]
-        if model_cols:  # Only add if we have valid columns
-            model_groups.append(model_cols)
-            print(f"\nModel group for {model}:", model_cols)
-    # Combine all groups
-    all_groups = benchmark_groups + model_groups
-    # Flatten groups for default selection (show everything initially)
-    all_columns = [col for group in all_groups for col in group]
-    print("\nAll available columns:", all_columns)
     return Leaderboard(
         value=dataframe,
         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
         select_columns=SelectColumns(
-            default_selection=all_columns,  # Show all columns initially
-            label="Select Results:"
         ),
         search_columns=["Method"],
         hide_columns=[],

 from src.about import TasksMib_Subgraph
 # def init_leaderboard_mib_subgraph(dataframe, track):
+#     """Initialize the subgraph leaderboard with grouped column selection by benchmark."""
 #     if dataframe is None or dataframe.empty:
 #         raise ValueError("Leaderboard DataFrame is empty or None.")
+#     print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
+#     # Create groups of columns by benchmark
+#     benchmark_groups = []
+#     # For each benchmark in our TasksMib_Subgraph enum...
 #     for task in TasksMib_Subgraph:
+#         benchmark = task.value.benchmark
+#         # Get all valid columns for this benchmark's models
+#         benchmark_cols = [
+#             f"{benchmark}_{model}"
+#             for model in task.value.models
+#             if f"{benchmark}_{model}" in dataframe.columns
+#         ]
+#         if benchmark_cols:  # Only add if we have valid columns
+#             benchmark_groups.append(benchmark_cols)
+#             print(f"\nBenchmark group for {benchmark}:", benchmark_cols)
+#     # Create model groups as well
+#     model_groups = []
 #     all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
+#     # For each unique model...
 #     for model in all_models:
+#         # Get all valid columns for this model across benchmarks
+#         model_cols = [
+#             f"{task.value.benchmark}_{model}"
+#             for task in TasksMib_Subgraph
+#             if model in task.value.models
+#             and f"{task.value.benchmark}_{model}" in dataframe.columns
+#         ]
+#         if model_cols:  # Only add if we have valid columns
+#             model_groups.append(model_cols)
+#             print(f"\nModel group for {model}:", model_cols)
+#     # Combine all groups
+#     all_groups = benchmark_groups + model_groups
+#     # Flatten groups for default selection (show everything initially)
+#     all_columns = [col for group in all_groups for col in group]
+#     print("\nAll available columns:", all_columns)
 #     return Leaderboard(
 #         value=dataframe,
 #         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
 #         select_columns=SelectColumns(
+#             default_selection=all_columns,  # Show all columns initially
+#             label="Select Results:"
 #         ),
 #         search_columns=["Method"],
 #         hide_columns=[],
 #         interactive=False,
 #     )
 def init_leaderboard_mib_subgraph(dataframe, track):
+    """Initialize the subgraph leaderboard with group-based column selection."""
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
     print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
+    # Create selection mapping for benchmark groups
+    selection_mapping = {}
+    # Create benchmark groups with descriptive names
     for task in TasksMib_Subgraph:
         benchmark = task.value.benchmark
+        # Get all columns for this benchmark's models
         benchmark_cols = [
             f"{benchmark}_{model}"
             for model in task.value.models
             if f"{benchmark}_{model}" in dataframe.columns
         ]
+        if benchmark_cols:
+            # Use a descriptive group name as the key
+            group_name = f"Benchmark: {benchmark.upper()}"
+            selection_mapping[group_name] = benchmark_cols
+            print(f"\n{group_name} maps to:", benchmark_cols)
+    # Create model groups with descriptive names
     all_models = list(set(model for task in TasksMib_Subgraph for model in task.value.models))
     for model in all_models:
+        # Get all columns for this model across benchmarks
         model_cols = [
             f"{task.value.benchmark}_{model}"
             for task in TasksMib_Subgraph
             if model in task.value.models
             and f"{task.value.benchmark}_{model}" in dataframe.columns
         ]
+        if model_cols:
+            # Use a descriptive group name as the key
+            group_name = f"Model: {model}"
+            selection_mapping[group_name] = model_cols
+            print(f"\n{group_name} maps to:", model_cols)
+    # The selection options are the group names
+    selection_options = list(selection_mapping.keys())
+    print("\nSelection options:", selection_options)
     return Leaderboard(
         value=dataframe,
         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
         select_columns=SelectColumns(
+            default_selection=selection_options,  # Show all groups by default
+            label="Select Benchmark or Model Groups:"
         ),
         search_columns=["Method"],
         hide_columns=[],