jasonshaoshun commited on
Commit
02e508b
·
1 Parent(s): a100ebc
Files changed (2) hide show
  1. src/leaderboard/read_evals.py +9 -3
  2. src/populate.py +11 -11
src/leaderboard/read_evals.py CHANGED
@@ -258,12 +258,14 @@ class EvalResult_MIB_CAUSALGRAPH:
258
  data_dict[col_name] = round(np.mean(scores), 3)
259
 
260
  return data_dict
261
-
262
  def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str) -> List[EvalResult_MIB_CAUSALGRAPH]:
263
- """Extract evaluation results for MIB causalgraph"""
264
  model_result_filepaths = []
265
 
 
266
  for root, dirnames, files in os.walk(results_path):
 
 
267
  if len(files) == 0 or any([not f.endswith(".json") for f in files]):
268
  continue
269
 
@@ -275,17 +277,21 @@ def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str)
275
  for file in files:
276
  model_result_filepaths.append(os.path.join(root, file))
277
 
 
 
278
  eval_results = []
279
  for filepath in model_result_filepaths:
280
  try:
281
  eval_result = EvalResult_MIB_CAUSALGRAPH("", "", {})
282
  result = eval_result.init_from_json_file(filepath)
283
- result.to_dict() # Verify conversion works
 
284
  eval_results.append(result)
285
  except Exception as e:
286
  print(f"Error processing {filepath}: {e}")
287
  continue
288
 
 
289
  return eval_results
290
 
291
 
 
258
  data_dict[col_name] = round(np.mean(scores), 3)
259
 
260
  return data_dict
261
+
262
  def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str) -> List[EvalResult_MIB_CAUSALGRAPH]:
 
263
  model_result_filepaths = []
264
 
265
+ print(f"Scanning directory: {results_path}")
266
  for root, dirnames, files in os.walk(results_path):
267
+ print(f"Current directory: {root}")
268
+ print(f"Found files: {files}")
269
  if len(files) == 0 or any([not f.endswith(".json") for f in files]):
270
  continue
271
 
 
277
  for file in files:
278
  model_result_filepaths.append(os.path.join(root, file))
279
 
280
+ print(f"Found json files: {model_result_filepaths}")
281
+
282
  eval_results = []
283
  for filepath in model_result_filepaths:
284
  try:
285
  eval_result = EvalResult_MIB_CAUSALGRAPH("", "", {})
286
  result = eval_result.init_from_json_file(filepath)
287
+ print(f"Processed file {filepath}")
288
+ print(f"Got result: {result}")
289
  eval_results.append(result)
290
  except Exception as e:
291
  print(f"Error processing {filepath}: {e}")
292
  continue
293
 
294
+ print(f"Total results processed: {len(eval_results)}")
295
  return eval_results
296
 
297
 
src/populate.py CHANGED
@@ -112,23 +112,23 @@ def create_intervention_averaged_df(df: pd.DataFrame) -> pd.DataFrame:
112
 
113
  def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
114
  """Creates a dataframe from all the MIB causal graph experiment results"""
115
- print(f"results_path is {results_path}, requests_path is {requests_path}")
116
  raw_data = get_raw_eval_results_mib_causalgraph(results_path, requests_path)
117
- print(f"raw_data is {raw_data}")
 
 
 
 
118
 
119
  # Convert each result to dict format for detailed df
120
  all_data_json = [v.to_dict() for v in raw_data]
121
- detailed_df = pd.DataFrame.from_records(all_data_json)
122
- print(f"detailed_df is: {detailed_df}")
123
-
124
- # Create and print other views for debugging/reference
125
- aggregated_df = aggregate_methods(detailed_df)
126
- print(f"aggregated_df is: {aggregated_df}")
127
 
128
- intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
129
- print(f"intervention_averaged_df is: {intervention_averaged_df}")
 
130
 
131
- # Only return detailed_df for display
132
  return detailed_df
133
 
134
 
 
112
 
113
  def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
114
  """Creates a dataframe from all the MIB causal graph experiment results"""
115
+ print(f"Starting get_leaderboard_df_mib_causalgraph with path: {results_path}")
116
  raw_data = get_raw_eval_results_mib_causalgraph(results_path, requests_path)
117
+ print(f"Length of raw_data: {len(raw_data) if raw_data else 0}")
118
+
119
+ if not raw_data:
120
+ print("Warning: raw_data is empty")
121
+ return pd.DataFrame()
122
 
123
  # Convert each result to dict format for detailed df
124
  all_data_json = [v.to_dict() for v in raw_data]
125
+ print(f"Length of all_data_json: {len(all_data_json)}")
126
+ print(f"First entry of all_data_json: {all_data_json[0] if all_data_json else None}")
 
 
 
 
127
 
128
+ detailed_df = pd.DataFrame.from_records(all_data_json)
129
+ print(f"Shape of detailed_df: {detailed_df.shape}")
130
+ print(f"Columns in detailed_df: {detailed_df.columns.tolist()}")
131
 
 
132
  return detailed_df
133
 
134