Spaces:

mib-bench
/

leaderboard

Running

App Files Files Community

jasonshaoshun commited on Jan 28

Commit

7d21286

1 Parent(s): 4a47622

debug

Browse files

Files changed (1) hide show

app.py +114 -41

app.py CHANGED Viewed

@@ -125,6 +125,8 @@ LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGAT
 from src.about import TasksMib_Subgraph
 # def init_leaderboard_mib_subgraph(dataframe, track):
 #     if dataframe is None or dataframe.empty:
 #         raise ValueError("Leaderboard DataFrame is empty or None.")
@@ -386,62 +388,133 @@ from src.about import TasksMib_Subgraph
 def init_leaderboard_mib_subgraph(dataframe, track):
-    """Initialize the subgraph leaderboard with benchmark and model filtering capabilities."""
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
-    # Print DataFrame information for debugging
-    # print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
-    # Get result columns (excluding Method and Average)
-    result_columns = [col for col in dataframe.columns
-                     if col not in ['Method', 'Average', 'eval_name'] and '_' in col]
-    # Create benchmark and model selections
-    benchmarks = set()
-    models = set()
-    print(f"\nDebugging Result Columns: {result_columns}")
-    # Extract unique benchmarks and models from column names
-    for col in result_columns:
-        print(f"col is {col}")
-        benchmark, model = col.split('_', maxsplit=1)
-        benchmarks.add(benchmark)
-        models.add(model)
-        print(f"benchmark is {benchmark} and model is {model}")
-    # Create selection groups
-    benchmark_selections = {
-        # For each benchmark, store which columns should be shown
-        benchmark: [col for col in result_columns if col.startswith(f"{benchmark}_")]
-        for benchmark in benchmarks
-    }
-    model_selections = {
-        # For each model, store which columns should be shown
-        model: [col for col in result_columns if col.startswith(f"_{model}")]
-        for model in models
-    }
-    # Combine the selection mappings
     selection_groups = {
         **benchmark_selections,
         **model_selections
     }
-    print("\nDebugging Selection Groups:")
-    print("Benchmarks:", benchmark_selections.keys())
-    print("Models:", model_selections.keys())
-    # Convert keys to list for selection options
     selection_options = list(selection_groups.keys())
     return Leaderboard(
         value=dataframe,
         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
         select_columns=SelectColumns(
-            default_selection=selection_options,  # Show all options by default
             label="Filter by Benchmark or Model:"
         ),
         search_columns=["Method"],

 from src.about import TasksMib_Subgraph
 # def init_leaderboard_mib_subgraph(dataframe, track):
 #     if dataframe is None or dataframe.empty:
 #         raise ValueError("Leaderboard DataFrame is empty or None.")
+# def init_leaderboard_mib_subgraph(dataframe, track):
+#     """Initialize the subgraph leaderboard with benchmark and model filtering capabilities."""
+#     if dataframe is None or dataframe.empty:
+#         raise ValueError("Leaderboard DataFrame is empty or None.")
+#     # Print DataFrame information for debugging
+#     # print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
+#     # Get result columns (excluding Method and Average)
+#     result_columns = [col for col in dataframe.columns
+#                      if col not in ['Method', 'Average', 'eval_name'] and '_' in col]
+#     # Create benchmark and model selections
+#     benchmarks = set()
+#     models = set()
+#     print(f"\nDebugging Result Columns: {result_columns}")
+#     # Extract unique benchmarks and models from column names
+#     for col in result_columns:
+#         print(f"col is {col}")
+#         benchmark, model = col.split('_', maxsplit=1)
+#         benchmarks.add(benchmark)
+#         models.add(model)
+#         print(f"benchmark is {benchmark} and model is {model}")
+#     # Create selection groups
+#     benchmark_selections = {
+#         # For each benchmark, store which columns should be shown
+#         benchmark: [col for col in result_columns if col.startswith(f"{benchmark}_")]
+#         for benchmark in benchmarks
+#     }
+#     model_selections = {
+#         # For each model, store which columns should be shown
+#         model: [col for col in result_columns if col.startswith(f"_{model}")]
+#         for model in models
+#     }
+#     # Combine the selection mappings
+#     selection_groups = {
+#         **benchmark_selections,
+#         **model_selections
+#     }
+#     print("\nDebugging Selection Groups:")
+#     print("Benchmarks:", benchmark_selections.keys())
+#     print("Models:", model_selections.keys())
+#     # Convert keys to list for selection options
+#     selection_options = list(selection_groups.keys())
+#     return Leaderboard(
+#         value=dataframe,
+#         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
+#         select_columns=SelectColumns(
+#             default_selection=selection_options,  # Show all options by default
+#             label="Filter by Benchmark or Model:"
+#         ),
+#         search_columns=["Method"],
+#         hide_columns=[],
+#         interactive=False,
+#     )
 def init_leaderboard_mib_subgraph(dataframe, track):
+    """Initialize the subgraph leaderboard with benchmark and model filtering using direct enum access."""
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
+    print("\nDebugging DataFrame columns:", dataframe.columns.tolist())
+    # Get benchmarks directly from TasksMib_Subgraph
+    benchmarks = [task.value.benchmark for task in TasksMib_Subgraph]
+    print("\nBenchmarks from enum:", benchmarks)
+    # Get unique models from all tasks
+    models = list(set(
+        model  # Get each model
+        for task in TasksMib_Subgraph  # For each task
+        for model in task.value.models  # Get all its models
+    ))
+    print("\nModels from enum:", models)
+    # Create benchmark selections - map each benchmark to its columns
+    benchmark_selections = {}
+    for task in TasksMib_Subgraph:
+        benchmark = task.value.benchmark
+        # For this benchmark, get all its valid model combinations
+        valid_columns = [
+            f"{benchmark}_{model}"
+            for model in task.value.models
+            if f"{benchmark}_{model}" in dataframe.columns
+        ]
+        benchmark_selections[benchmark] = valid_columns
+        print(f"\nBenchmark {benchmark} maps to columns:", valid_columns)
+    # Create model selections - map each model to its columns
+    model_selections = {}
+    for model in models:
+        # For this model, find all benchmarks where it's used
+        valid_columns = [
+            f"{task.value.benchmark}_{model}"
+            for task in TasksMib_Subgraph
+            if model in task.value.models
+            and f"{task.value.benchmark}_{model}" in dataframe.columns
+        ]
+        model_selections[model] = valid_columns
+        print(f"\nModel {model} maps to columns:", valid_columns)
+    # Combine all selections
     selection_groups = {
         **benchmark_selections,
         **model_selections
     }
+    # Get the final selection options
     selection_options = list(selection_groups.keys())
+    print("\nFinal selection options:", selection_options)
     return Leaderboard(
         value=dataframe,
         datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
         select_columns=SelectColumns(
+            default_selection=selection_options,
             label="Filter by Benchmark or Model:"
         ),
         search_columns=["Method"],