Spaces:

mib-bench
/

leaderboard

Running

App Files Files Community

jasonshaoshun commited on Jan 30

Commit

3a309c3

1 Parent(s): e1a39f1

caulsal-track debug

Browse files

Files changed (1) hide show

src/leaderboard/read_evals.py +50 -13

src/leaderboard/read_evals.py CHANGED Viewed

@@ -360,34 +360,71 @@ class EvalResult_MIB_CAUSALGRAPH:
 def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
     """
     Aggregates rows with the same base method name by taking the max value for each column.
-    Works with Method as a regular column instead of index.
     """
     df_copy = df.copy()
-    print("\nBase methods extraction:")
-    base_methods = [name.split('_')[0] if '_' in name and name.split('_')[-1].isdigit()
-                   else name for name in df_copy['Method']]
-    print(f"Original methods: {df_copy['Method'].tolist()}")
-    print(f"Base methods: {base_methods}")
     # Convert scores to numeric values
     score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
     for col in score_columns:
-        df_copy[col] = df_copy[col].apply(lambda x: float(x) if isinstance(x, str) else x)
-    # Group by base method name and take the max
-    aggregated_df = df_copy.groupby('base_method')[score_columns].max().round(3)
-    # Reset index to make base_method a regular column and rename it to Method
     aggregated_df = aggregated_df.reset_index()
     aggregated_df = aggregated_df.rename(columns={'base_method': 'Method'})
-    # Convert back to string format
     for col in score_columns:
-        aggregated_df[col] = aggregated_df[col].apply(lambda x: f"{x:.3f}")
     return aggregated_df

+# def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
+#     """
+#     Aggregates rows with the same base method name by taking the max value for each column.
+#     Works with Method as a regular column instead of index.
+#     """
+#     df_copy = df.copy()
+#     print("\nBase methods extraction:")
+#     base_methods = [name.split('_')[0] if '_' in name and name.split('_')[-1].isdigit()
+#                    else name for name in df_copy['Method']]
+#     print(f"Original methods: {df_copy['Method'].tolist()}")
+#     print(f"Base methods: {base_methods}")
+#     df_copy['base_method'] = base_methods
+#     # Convert scores to numeric values
+#     score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
+#     for col in score_columns:
+#         df_copy[col] = df_copy[col].apply(lambda x: float(x) if isinstance(x, str) else x)
+#     # Group by base method name and take the max
+#     aggregated_df = df_copy.groupby('base_method')[score_columns].max().round(3)
+#     # Reset index to make base_method a regular column and rename it to Method
+#     aggregated_df = aggregated_df.reset_index()
+#     aggregated_df = aggregated_df.rename(columns={'base_method': 'Method'})
+#     # Convert back to string format
+#     for col in score_columns:
+#         aggregated_df[col] = aggregated_df[col].apply(lambda x: f"{x:.3f}")
+#     return aggregated_df
 def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
     """
     Aggregates rows with the same base method name by taking the max value for each column.
     """
     df_copy = df.copy()
+    # Extract base method names (remove only the numeric suffix)
+    def get_base_method(name):
+        if '_' in name:
+            # Split from the right once to handle method names that contain '_'
+            base, last_part = name.rsplit('_', 1)
+            if last_part.isdigit():
+                return base
+        return name
+    base_methods = [get_base_method(name) for name in df_copy['Method']]
+    df_copy['base_method'] = base_methods
     # Convert scores to numeric values
     score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
     for col in score_columns:
+        df_copy[col] = df_copy[col].apply(lambda x: float(x) if isinstance(x, str) and not pd.isna(x) else x)
+    # Group by base method name and take the max, handling NaN values
+    aggregated_df = df_copy.groupby('base_method')[score_columns].agg(lambda x: np.nanmax(x)).round(3)
+    # Convert back to string format and reset index
     aggregated_df = aggregated_df.reset_index()
     aggregated_df = aggregated_df.rename(columns={'base_method': 'Method'})
+    # Convert numeric values back to strings with 3 decimal places
     for col in score_columns:
+        aggregated_df[col] = aggregated_df[col].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else x)
     return aggregated_df