jasonshaoshun commited on
Commit
56d1796
·
1 Parent(s): 753260a
Files changed (2) hide show
  1. src/about.py +13 -2
  2. src/populate.py +18 -3
src/about.py CHANGED
@@ -66,11 +66,22 @@ class TaskMIB_Causalgraph:
66
  # ["score"]
67
  # )
68
 
 
 
 
 
 
 
 
 
 
 
 
69
  class TasksMib_Causalgraph(Enum):
70
  task0 = TaskMIB_Causalgraph(
71
  "MCQA",
72
- ["Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"], # Match exact model names with correct casing
73
- [str(i) for i in range(32)],
74
  "mcqa",
75
  ["output_token", "output_location"],
76
  ["randomLetter_counterfactual", "answerPosition_counterfactual",
 
66
  # ["score"]
67
  # )
68
 
69
+ # class TasksMib_Causalgraph(Enum):
70
+ # task0 = TaskMIB_Causalgraph(
71
+ # "MCQA",
72
+ # ["Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"], # Match exact model names with correct casing
73
+ # [str(i) for i in range(32)],
74
+ # "mcqa",
75
+ # ["output_token", "output_location"],
76
+ # ["randomLetter_counterfactual", "answerPosition_counterfactual",
77
+ # "answerPosition_randomLetter_counterfactual"],
78
+ # ["score"]
79
+ # )
80
  class TasksMib_Causalgraph(Enum):
81
  task0 = TaskMIB_Causalgraph(
82
  "MCQA",
83
+ ["qwen2forcausallm", "gemma2forcausallm", "llamaforcausallm"], # Use lowercase consistently
84
+ [str(i) for i in range(32)],
85
  "mcqa",
86
  ["output_token", "output_location"],
87
  ["randomLetter_counterfactual", "answerPosition_counterfactual",
src/populate.py CHANGED
@@ -248,15 +248,30 @@ def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, co
248
  # Convert each result to dict format for detailed df
249
  all_data_json = [v.to_dict() for v in raw_data]
250
  detailed_df = pd.DataFrame.from_records(all_data_json)
251
- print("Columns in detailed_df:", detailed_df.columns.tolist())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
  # Create aggregated df
254
  aggregated_df = aggregate_methods(detailed_df)
255
- print("Columns in aggregated_df:", aggregated_df.columns.tolist())
256
 
257
  # Create intervention-averaged df
258
  intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
259
- print("Columns in intervention_averaged_df:", intervention_averaged_df.columns.tolist())
 
260
 
261
  return detailed_df, aggregated_df, intervention_averaged_df
262
 
 
248
  # Convert each result to dict format for detailed df
249
  all_data_json = [v.to_dict() for v in raw_data]
250
  detailed_df = pd.DataFrame.from_records(all_data_json)
251
+
252
+ # Print the actual columns for debugging
253
+ print("Original columns:", detailed_df.columns.tolist())
254
+
255
+ # Rename columns to match schema
256
+ column_mapping = {}
257
+ for col in detailed_df.columns:
258
+ if col in ['eval_name', 'Method']:
259
+ continue
260
+ # Ensure consistent casing for the column names
261
+ new_col = col.replace('Qwen2ForCausalLM', 'qwen2forcausallm') \
262
+ .replace('Gemma2ForCausalLM', 'gemma2forcausallm') \
263
+ .replace('LlamaForCausalLM', 'llamaforcausallm')
264
+ column_mapping[col] = new_col
265
+
266
+ detailed_df = detailed_df.rename(columns=column_mapping)
267
 
268
  # Create aggregated df
269
  aggregated_df = aggregate_methods(detailed_df)
 
270
 
271
  # Create intervention-averaged df
272
  intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
273
+
274
+ print("Transformed columns:", detailed_df.columns.tolist())
275
 
276
  return detailed_df, aggregated_df, intervention_averaged_df
277