Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
3a309c3
1
Parent(s):
e1a39f1
caulsal-track debug
Browse files- src/leaderboard/read_evals.py +50 -13
src/leaderboard/read_evals.py
CHANGED
@@ -360,34 +360,71 @@ class EvalResult_MIB_CAUSALGRAPH:
|
|
360 |
|
361 |
|
362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
|
364 |
"""
|
365 |
Aggregates rows with the same base method name by taking the max value for each column.
|
366 |
-
Works with Method as a regular column instead of index.
|
367 |
"""
|
368 |
-
|
369 |
df_copy = df.copy()
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
|
376 |
# Convert scores to numeric values
|
377 |
score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
|
378 |
for col in score_columns:
|
379 |
-
df_copy[col] = df_copy[col].apply(lambda x: float(x) if isinstance(x, str) else x)
|
380 |
|
381 |
-
# Group by base method name and take the max
|
382 |
-
aggregated_df = df_copy.groupby('base_method')[score_columns].
|
383 |
|
384 |
-
#
|
385 |
aggregated_df = aggregated_df.reset_index()
|
386 |
aggregated_df = aggregated_df.rename(columns={'base_method': 'Method'})
|
387 |
|
388 |
-
# Convert back to
|
389 |
for col in score_columns:
|
390 |
-
aggregated_df[col] = aggregated_df[col].apply(lambda x: f"{x:.3f}")
|
391 |
|
392 |
return aggregated_df
|
393 |
|
|
|
360 |
|
361 |
|
362 |
|
363 |
+
# def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
|
364 |
+
# """
|
365 |
+
# Aggregates rows with the same base method name by taking the max value for each column.
|
366 |
+
# Works with Method as a regular column instead of index.
|
367 |
+
# """
|
368 |
+
# df_copy = df.copy()
|
369 |
+
# print("\nBase methods extraction:")
|
370 |
+
# base_methods = [name.split('_')[0] if '_' in name and name.split('_')[-1].isdigit()
|
371 |
+
# else name for name in df_copy['Method']]
|
372 |
+
# print(f"Original methods: {df_copy['Method'].tolist()}")
|
373 |
+
# print(f"Base methods: {base_methods}")
|
374 |
+
|
375 |
+
|
376 |
+
# df_copy['base_method'] = base_methods
|
377 |
+
|
378 |
+
# # Convert scores to numeric values
|
379 |
+
# score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
|
380 |
+
# for col in score_columns:
|
381 |
+
# df_copy[col] = df_copy[col].apply(lambda x: float(x) if isinstance(x, str) else x)
|
382 |
+
|
383 |
+
# # Group by base method name and take the max
|
384 |
+
# aggregated_df = df_copy.groupby('base_method')[score_columns].max().round(3)
|
385 |
+
|
386 |
+
# # Reset index to make base_method a regular column and rename it to Method
|
387 |
+
# aggregated_df = aggregated_df.reset_index()
|
388 |
+
# aggregated_df = aggregated_df.rename(columns={'base_method': 'Method'})
|
389 |
+
|
390 |
+
# # Convert back to string format
|
391 |
+
# for col in score_columns:
|
392 |
+
# aggregated_df[col] = aggregated_df[col].apply(lambda x: f"{x:.3f}")
|
393 |
+
|
394 |
+
# return aggregated_df
|
395 |
def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
|
396 |
"""
|
397 |
Aggregates rows with the same base method name by taking the max value for each column.
|
|
|
398 |
"""
|
|
|
399 |
df_copy = df.copy()
|
400 |
+
|
401 |
+
# Extract base method names (remove only the numeric suffix)
|
402 |
+
def get_base_method(name):
|
403 |
+
if '_' in name:
|
404 |
+
# Split from the right once to handle method names that contain '_'
|
405 |
+
base, last_part = name.rsplit('_', 1)
|
406 |
+
if last_part.isdigit():
|
407 |
+
return base
|
408 |
+
return name
|
409 |
+
|
410 |
+
base_methods = [get_base_method(name) for name in df_copy['Method']]
|
411 |
+
df_copy['base_method'] = base_methods
|
412 |
|
413 |
# Convert scores to numeric values
|
414 |
score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
|
415 |
for col in score_columns:
|
416 |
+
df_copy[col] = df_copy[col].apply(lambda x: float(x) if isinstance(x, str) and not pd.isna(x) else x)
|
417 |
|
418 |
+
# Group by base method name and take the max, handling NaN values
|
419 |
+
aggregated_df = df_copy.groupby('base_method')[score_columns].agg(lambda x: np.nanmax(x)).round(3)
|
420 |
|
421 |
+
# Convert back to string format and reset index
|
422 |
aggregated_df = aggregated_df.reset_index()
|
423 |
aggregated_df = aggregated_df.rename(columns={'base_method': 'Method'})
|
424 |
|
425 |
+
# Convert numeric values back to strings with 3 decimal places
|
426 |
for col in score_columns:
|
427 |
+
aggregated_df[col] = aggregated_df[col].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else x)
|
428 |
|
429 |
return aggregated_df
|
430 |
|