Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
56d1796
1
Parent(s):
753260a
debug
Browse files- src/about.py +13 -2
- src/populate.py +18 -3
src/about.py
CHANGED
|
@@ -66,11 +66,22 @@ class TaskMIB_Causalgraph:
|
|
| 66 |
# ["score"]
|
| 67 |
# )
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
class TasksMib_Causalgraph(Enum):
|
| 70 |
task0 = TaskMIB_Causalgraph(
|
| 71 |
"MCQA",
|
| 72 |
-
["
|
| 73 |
-
[str(i) for i in range(32)],
|
| 74 |
"mcqa",
|
| 75 |
["output_token", "output_location"],
|
| 76 |
["randomLetter_counterfactual", "answerPosition_counterfactual",
|
|
|
|
| 66 |
# ["score"]
|
| 67 |
# )
|
| 68 |
|
| 69 |
+
# class TasksMib_Causalgraph(Enum):
|
| 70 |
+
# task0 = TaskMIB_Causalgraph(
|
| 71 |
+
# "MCQA",
|
| 72 |
+
# ["Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"], # Match exact model names with correct casing
|
| 73 |
+
# [str(i) for i in range(32)],
|
| 74 |
+
# "mcqa",
|
| 75 |
+
# ["output_token", "output_location"],
|
| 76 |
+
# ["randomLetter_counterfactual", "answerPosition_counterfactual",
|
| 77 |
+
# "answerPosition_randomLetter_counterfactual"],
|
| 78 |
+
# ["score"]
|
| 79 |
+
# )
|
| 80 |
class TasksMib_Causalgraph(Enum):
|
| 81 |
task0 = TaskMIB_Causalgraph(
|
| 82 |
"MCQA",
|
| 83 |
+
["qwen2forcausallm", "gemma2forcausallm", "llamaforcausallm"], # Use lowercase consistently
|
| 84 |
+
[str(i) for i in range(32)],
|
| 85 |
"mcqa",
|
| 86 |
["output_token", "output_location"],
|
| 87 |
["randomLetter_counterfactual", "answerPosition_counterfactual",
|
src/populate.py
CHANGED
|
@@ -248,15 +248,30 @@ def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, co
|
|
| 248 |
# Convert each result to dict format for detailed df
|
| 249 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 250 |
detailed_df = pd.DataFrame.from_records(all_data_json)
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
# Create aggregated df
|
| 254 |
aggregated_df = aggregate_methods(detailed_df)
|
| 255 |
-
print("Columns in aggregated_df:", aggregated_df.columns.tolist())
|
| 256 |
|
| 257 |
# Create intervention-averaged df
|
| 258 |
intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
|
| 259 |
-
|
|
|
|
| 260 |
|
| 261 |
return detailed_df, aggregated_df, intervention_averaged_df
|
| 262 |
|
|
|
|
| 248 |
# Convert each result to dict format for detailed df
|
| 249 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 250 |
detailed_df = pd.DataFrame.from_records(all_data_json)
|
| 251 |
+
|
| 252 |
+
# Print the actual columns for debugging
|
| 253 |
+
print("Original columns:", detailed_df.columns.tolist())
|
| 254 |
+
|
| 255 |
+
# Rename columns to match schema
|
| 256 |
+
column_mapping = {}
|
| 257 |
+
for col in detailed_df.columns:
|
| 258 |
+
if col in ['eval_name', 'Method']:
|
| 259 |
+
continue
|
| 260 |
+
# Ensure consistent casing for the column names
|
| 261 |
+
new_col = col.replace('Qwen2ForCausalLM', 'qwen2forcausallm') \
|
| 262 |
+
.replace('Gemma2ForCausalLM', 'gemma2forcausallm') \
|
| 263 |
+
.replace('LlamaForCausalLM', 'llamaforcausallm')
|
| 264 |
+
column_mapping[col] = new_col
|
| 265 |
+
|
| 266 |
+
detailed_df = detailed_df.rename(columns=column_mapping)
|
| 267 |
|
| 268 |
# Create aggregated df
|
| 269 |
aggregated_df = aggregate_methods(detailed_df)
|
|
|
|
| 270 |
|
| 271 |
# Create intervention-averaged df
|
| 272 |
intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
|
| 273 |
+
|
| 274 |
+
print("Transformed columns:", detailed_df.columns.tolist())
|
| 275 |
|
| 276 |
return detailed_df, aggregated_df, intervention_averaged_df
|
| 277 |
|