jasonshaoshun commited on
Commit
3a309c3
·
1 Parent(s): e1a39f1

caulsal-track debug

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +50 -13
src/leaderboard/read_evals.py CHANGED
@@ -360,34 +360,71 @@ class EvalResult_MIB_CAUSALGRAPH:
360
 
361
 
362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
364
  """
365
  Aggregates rows with the same base method name by taking the max value for each column.
366
- Works with Method as a regular column instead of index.
367
  """
368
-
369
  df_copy = df.copy()
370
- print("\nBase methods extraction:")
371
- base_methods = [name.split('_')[0] if '_' in name and name.split('_')[-1].isdigit()
372
- else name for name in df_copy['Method']]
373
- print(f"Original methods: {df_copy['Method'].tolist()}")
374
- print(f"Base methods: {base_methods}")
 
 
 
 
 
 
 
375
 
376
  # Convert scores to numeric values
377
  score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
378
  for col in score_columns:
379
- df_copy[col] = df_copy[col].apply(lambda x: float(x) if isinstance(x, str) else x)
380
 
381
- # Group by base method name and take the max
382
- aggregated_df = df_copy.groupby('base_method')[score_columns].max().round(3)
383
 
384
- # Reset index to make base_method a regular column and rename it to Method
385
  aggregated_df = aggregated_df.reset_index()
386
  aggregated_df = aggregated_df.rename(columns={'base_method': 'Method'})
387
 
388
- # Convert back to string format
389
  for col in score_columns:
390
- aggregated_df[col] = aggregated_df[col].apply(lambda x: f"{x:.3f}")
391
 
392
  return aggregated_df
393
 
 
360
 
361
 
362
 
363
+ # def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
364
+ # """
365
+ # Aggregates rows with the same base method name by taking the max value for each column.
366
+ # Works with Method as a regular column instead of index.
367
+ # """
368
+ # df_copy = df.copy()
369
+ # print("\nBase methods extraction:")
370
+ # base_methods = [name.split('_')[0] if '_' in name and name.split('_')[-1].isdigit()
371
+ # else name for name in df_copy['Method']]
372
+ # print(f"Original methods: {df_copy['Method'].tolist()}")
373
+ # print(f"Base methods: {base_methods}")
374
+
375
+
376
+ # df_copy['base_method'] = base_methods
377
+
378
+ # # Convert scores to numeric values
379
+ # score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
380
+ # for col in score_columns:
381
+ # df_copy[col] = df_copy[col].apply(lambda x: float(x) if isinstance(x, str) else x)
382
+
383
+ # # Group by base method name and take the max
384
+ # aggregated_df = df_copy.groupby('base_method')[score_columns].max().round(3)
385
+
386
+ # # Reset index to make base_method a regular column and rename it to Method
387
+ # aggregated_df = aggregated_df.reset_index()
388
+ # aggregated_df = aggregated_df.rename(columns={'base_method': 'Method'})
389
+
390
+ # # Convert back to string format
391
+ # for col in score_columns:
392
+ # aggregated_df[col] = aggregated_df[col].apply(lambda x: f"{x:.3f}")
393
+
394
+ # return aggregated_df
395
  def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
396
  """
397
  Aggregates rows with the same base method name by taking the max value for each column.
 
398
  """
 
399
  df_copy = df.copy()
400
+
401
+ # Extract base method names (remove only the numeric suffix)
402
+ def get_base_method(name):
403
+ if '_' in name:
404
+ # Split from the right once to handle method names that contain '_'
405
+ base, last_part = name.rsplit('_', 1)
406
+ if last_part.isdigit():
407
+ return base
408
+ return name
409
+
410
+ base_methods = [get_base_method(name) for name in df_copy['Method']]
411
+ df_copy['base_method'] = base_methods
412
 
413
  # Convert scores to numeric values
414
  score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
415
  for col in score_columns:
416
+ df_copy[col] = df_copy[col].apply(lambda x: float(x) if isinstance(x, str) and not pd.isna(x) else x)
417
 
418
+ # Group by base method name and take the max, handling NaN values
419
+ aggregated_df = df_copy.groupby('base_method')[score_columns].agg(lambda x: np.nanmax(x)).round(3)
420
 
421
+ # Convert back to string format and reset index
422
  aggregated_df = aggregated_df.reset_index()
423
  aggregated_df = aggregated_df.rename(columns={'base_method': 'Method'})
424
 
425
+ # Convert numeric values back to strings with 3 decimal places
426
  for col in score_columns:
427
+ aggregated_df[col] = aggregated_df[col].apply(lambda x: f"{x:.3f}" if not pd.isna(x) else x)
428
 
429
  return aggregated_df
430