Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
56d1796
1
Parent(s):
753260a
debug
Browse files- src/about.py +13 -2
- src/populate.py +18 -3
src/about.py
CHANGED
@@ -66,11 +66,22 @@ class TaskMIB_Causalgraph:
|
|
66 |
# ["score"]
|
67 |
# )
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
class TasksMib_Causalgraph(Enum):
|
70 |
task0 = TaskMIB_Causalgraph(
|
71 |
"MCQA",
|
72 |
-
["
|
73 |
-
[str(i) for i in range(32)],
|
74 |
"mcqa",
|
75 |
["output_token", "output_location"],
|
76 |
["randomLetter_counterfactual", "answerPosition_counterfactual",
|
|
|
66 |
# ["score"]
|
67 |
# )
|
68 |
|
69 |
+
# class TasksMib_Causalgraph(Enum):
|
70 |
+
# task0 = TaskMIB_Causalgraph(
|
71 |
+
# "MCQA",
|
72 |
+
# ["Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"], # Match exact model names with correct casing
|
73 |
+
# [str(i) for i in range(32)],
|
74 |
+
# "mcqa",
|
75 |
+
# ["output_token", "output_location"],
|
76 |
+
# ["randomLetter_counterfactual", "answerPosition_counterfactual",
|
77 |
+
# "answerPosition_randomLetter_counterfactual"],
|
78 |
+
# ["score"]
|
79 |
+
# )
|
80 |
class TasksMib_Causalgraph(Enum):
|
81 |
task0 = TaskMIB_Causalgraph(
|
82 |
"MCQA",
|
83 |
+
["qwen2forcausallm", "gemma2forcausallm", "llamaforcausallm"], # Use lowercase consistently
|
84 |
+
[str(i) for i in range(32)],
|
85 |
"mcqa",
|
86 |
["output_token", "output_location"],
|
87 |
["randomLetter_counterfactual", "answerPosition_counterfactual",
|
src/populate.py
CHANGED
@@ -248,15 +248,30 @@ def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, co
|
|
248 |
# Convert each result to dict format for detailed df
|
249 |
all_data_json = [v.to_dict() for v in raw_data]
|
250 |
detailed_df = pd.DataFrame.from_records(all_data_json)
|
251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
# Create aggregated df
|
254 |
aggregated_df = aggregate_methods(detailed_df)
|
255 |
-
print("Columns in aggregated_df:", aggregated_df.columns.tolist())
|
256 |
|
257 |
# Create intervention-averaged df
|
258 |
intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
|
259 |
-
|
|
|
260 |
|
261 |
return detailed_df, aggregated_df, intervention_averaged_df
|
262 |
|
|
|
248 |
# Convert each result to dict format for detailed df
|
249 |
all_data_json = [v.to_dict() for v in raw_data]
|
250 |
detailed_df = pd.DataFrame.from_records(all_data_json)
|
251 |
+
|
252 |
+
# Print the actual columns for debugging
|
253 |
+
print("Original columns:", detailed_df.columns.tolist())
|
254 |
+
|
255 |
+
# Rename columns to match schema
|
256 |
+
column_mapping = {}
|
257 |
+
for col in detailed_df.columns:
|
258 |
+
if col in ['eval_name', 'Method']:
|
259 |
+
continue
|
260 |
+
# Ensure consistent casing for the column names
|
261 |
+
new_col = col.replace('Qwen2ForCausalLM', 'qwen2forcausallm') \
|
262 |
+
.replace('Gemma2ForCausalLM', 'gemma2forcausallm') \
|
263 |
+
.replace('LlamaForCausalLM', 'llamaforcausallm')
|
264 |
+
column_mapping[col] = new_col
|
265 |
+
|
266 |
+
detailed_df = detailed_df.rename(columns=column_mapping)
|
267 |
|
268 |
# Create aggregated df
|
269 |
aggregated_df = aggregate_methods(detailed_df)
|
|
|
270 |
|
271 |
# Create intervention-averaged df
|
272 |
intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
|
273 |
+
|
274 |
+
print("Transformed columns:", detailed_df.columns.tolist())
|
275 |
|
276 |
return detailed_df, aggregated_df, intervention_averaged_df
|
277 |
|