jasonshaoshun commited on
Commit
e1a39f1
·
1 Parent(s): 85f4717

caulsal-track debug

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +7 -73
src/leaderboard/read_evals.py CHANGED
@@ -358,89 +358,20 @@ class EvalResult_MIB_CAUSALGRAPH:
358
  return data_dict
359
 
360
 
361
- # def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
362
- # """
363
- # Aggregates rows with the same base method name by taking the max value for each column.
364
- # """
365
- # # Create a copy of the DataFrame
366
- # df_copy = df.copy()
367
-
368
- # # Extract base method names (remove _2, _3, etc. suffixes)
369
- # base_methods = [name.split('_')[0] if '_' in name and name.split('_')[-1].isdigit()
370
- # else name for name in df_copy.index]
371
- # df_copy.index = base_methods
372
-
373
- # # Convert scores to numeric values
374
- # def extract_score(score_str):
375
- # if isinstance(score_str, str):
376
- # return float(score_str)
377
- # return 0.0
378
-
379
- # numeric_df = df_copy.applymap(extract_score)
380
-
381
- # # Group by base method name and take the max
382
- # aggregated_df = numeric_df.groupby(level=0).max().round(3)
383
-
384
- # # Convert back to string format
385
- # aggregated_df = aggregated_df.applymap(lambda x: f"{x:.3f}")
386
-
387
- # return aggregated_df
388
 
389
 
390
- # def create_intervention_averaged_df(df: pd.DataFrame) -> pd.DataFrame:
391
- # """
392
- # Creates a DataFrame where columns are model_task and cells are averaged over interventions.
393
- # """
394
- # # Create a copy of the DataFrame
395
- # df_copy = df.copy()
396
-
397
- # # Remove the Average column if it exists
398
- # if 'Average' in df_copy.columns:
399
- # df_copy = df_copy.drop('Average', axis=1)
400
-
401
- # # Function to extract score value from string
402
- # def extract_score(score_str):
403
- # if isinstance(score_str, str):
404
- # return float(score_str)
405
- # return 0.0
406
-
407
- # # Convert all scores to numeric values
408
- # numeric_df = df_copy.applymap(extract_score)
409
-
410
- # # Group columns by model_task
411
- # model_task_groups = {}
412
- # for col in numeric_df.columns:
413
- # model_task = '_'.join(col.split('_')[:2]) # Get model_task part
414
- # if model_task not in model_task_groups:
415
- # model_task_groups[model_task] = []
416
- # model_task_groups[model_task].append(col)
417
-
418
- # # Create new DataFrame with averaged intervention scores
419
- # averaged_df = pd.DataFrame({
420
- # model_task: numeric_df[cols].mean(axis=1).round(3)
421
- # for model_task, cols in model_task_groups.items()
422
- # })
423
-
424
- # # Add overall average column
425
- # averaged_df['Average'] = averaged_df.mean(axis=1).round(3)
426
-
427
- # # Sort by Average column
428
- # averaged_df = averaged_df.sort_values('Average', ascending=False)
429
-
430
- # return averaged_df
431
-
432
  def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
433
  """
434
  Aggregates rows with the same base method name by taking the max value for each column.
435
  Works with Method as a regular column instead of index.
436
  """
437
- # Create a copy of the DataFrame
438
  df_copy = df.copy()
439
-
440
- # Extract base method names (remove _2, _3, etc. suffixes)
441
  base_methods = [name.split('_')[0] if '_' in name and name.split('_')[-1].isdigit()
442
  else name for name in df_copy['Method']]
443
- df_copy['base_method'] = base_methods
 
444
 
445
  # Convert scores to numeric values
446
  score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
@@ -551,6 +482,9 @@ def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str)
551
  # if "eval_name" in detailed_df.columns:
552
  # detailed_df.drop("eval_name", axis=1, inplace=True)
553
 
 
 
 
554
  # Create aggregated DataFrame
555
  aggregated_df = aggregate_methods(detailed_df)
556
 
 
358
  return data_dict
359
 
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
364
  """
365
  Aggregates rows with the same base method name by taking the max value for each column.
366
  Works with Method as a regular column instead of index.
367
  """
368
+
369
  df_copy = df.copy()
370
+ print("\nBase methods extraction:")
 
371
  base_methods = [name.split('_')[0] if '_' in name and name.split('_')[-1].isdigit()
372
  else name for name in df_copy['Method']]
373
+ print(f"Original methods: {df_copy['Method'].tolist()}")
374
+ print(f"Base methods: {base_methods}")
375
 
376
  # Convert scores to numeric values
377
  score_columns = [col for col in df_copy.columns if col not in ['Method', 'base_method']]
 
482
  # if "eval_name" in detailed_df.columns:
483
  # detailed_df.drop("eval_name", axis=1, inplace=True)
484
 
485
+ print("Before aggregation:")
486
+ print(detailed_df)
487
+
488
  # Create aggregated DataFrame
489
  aggregated_df = aggregate_methods(detailed_df)
490