Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
36438b0
1
Parent(s):
29701ab
debug
Browse files- src/about.py +15 -4
- src/display/utils.py +3 -4
- src/populate.py +7 -6
src/about.py
CHANGED
@@ -54,20 +54,31 @@ class TaskMIB_Causalgraph:
|
|
54 |
counterfactuals: list[str] # symbol_counterfactual, etc.
|
55 |
metrics: list[str] # score
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
class TasksMib_Causalgraph(Enum):
|
58 |
task0 = TaskMIB_Causalgraph(
|
59 |
"MCQA",
|
60 |
-
["
|
61 |
[str(i) for i in range(32)], # 0-31 layers
|
62 |
"mcqa",
|
63 |
["output_token", "output_location"],
|
64 |
-
["
|
65 |
-
"
|
66 |
["score"]
|
67 |
)
|
68 |
|
69 |
|
70 |
-
|
71 |
NUM_FEWSHOT = 0 # Change with your few shot
|
72 |
# ---------------------------------------------------
|
73 |
|
|
|
54 |
counterfactuals: list[str] # symbol_counterfactual, etc.
|
55 |
metrics: list[str] # score
|
56 |
|
57 |
+
# class TasksMib_Causalgraph(Enum):
|
58 |
+
# task0 = TaskMIB_Causalgraph(
|
59 |
+
# "MCQA",
|
60 |
+
# ["LlamaForCausalLM", "Qwen2ForCausalLM", "Gemma2ForCausalLM"], # Updated model list
|
61 |
+
# [str(i) for i in range(32)], # 0-31 layers
|
62 |
+
# "mcqa",
|
63 |
+
# ["output_token", "output_location"],
|
64 |
+
# ["symbol_counterfactual", "randomLetter_counterfactual",
|
65 |
+
# "answerPosition_counterfactual", "answerPosition_symbol_counterfactual"],
|
66 |
+
# ["score"]
|
67 |
+
# )
|
68 |
+
|
69 |
class TasksMib_Causalgraph(Enum):
|
70 |
task0 = TaskMIB_Causalgraph(
|
71 |
"MCQA",
|
72 |
+
["qwen2forcausallm", "gemma2forcausallm", "llamaforcausallm"], # Match exact model names
|
73 |
[str(i) for i in range(32)], # 0-31 layers
|
74 |
"mcqa",
|
75 |
["output_token", "output_location"],
|
76 |
+
["randomLetter_counterfactual", "answerPosition_counterfactual",
|
77 |
+
"answerPosition_randomLetter_counterfactual"], # Match exact counterfactual names
|
78 |
["score"]
|
79 |
)
|
80 |
|
81 |
|
|
|
82 |
NUM_FEWSHOT = 0 # Change with your few shot
|
83 |
# ---------------------------------------------------
|
84 |
|
src/display/utils.py
CHANGED
@@ -171,12 +171,12 @@ auto_eval_column_dict_mib_causalgraph.append(["method", ColumnContent, ColumnCon
|
|
171 |
|
172 |
# For each model-task-intervention-counterfactual combination
|
173 |
for task in TasksMib_Causalgraph:
|
174 |
-
for model in task.value.models:
|
175 |
-
model_name = model # Don't convert to lowercase
|
176 |
for layer in task.value.layers:
|
177 |
for intervention in task.value.interventions:
|
178 |
for counterfactual in task.value.counterfactuals:
|
179 |
-
|
|
|
180 |
auto_eval_column_dict_mib_causalgraph.append([
|
181 |
col_name,
|
182 |
ColumnContent,
|
@@ -184,7 +184,6 @@ for task in TasksMib_Causalgraph:
|
|
184 |
])
|
185 |
|
186 |
|
187 |
-
|
188 |
|
189 |
|
190 |
# Create the dataclass
|
|
|
171 |
|
172 |
# For each model-task-intervention-counterfactual combination
|
173 |
for task in TasksMib_Causalgraph:
|
174 |
+
for model in task.value.models:
|
|
|
175 |
for layer in task.value.layers:
|
176 |
for intervention in task.value.interventions:
|
177 |
for counterfactual in task.value.counterfactuals:
|
178 |
+
# Match exact column format from DataFrame
|
179 |
+
col_name = f"{model}_layer{layer}_{intervention}_{counterfactual}"
|
180 |
auto_eval_column_dict_mib_causalgraph.append([
|
181 |
col_name,
|
182 |
ColumnContent,
|
|
|
184 |
])
|
185 |
|
186 |
|
|
|
187 |
|
188 |
|
189 |
# Create the dataclass
|
src/populate.py
CHANGED
@@ -180,16 +180,17 @@ def create_intervention_averaged_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
180 |
if 'eval_name' in df_copy.columns:
|
181 |
df_copy = df_copy.drop('eval_name', axis=1)
|
182 |
|
183 |
-
# Group columns by model and
|
184 |
result_cols = {}
|
185 |
for task in TasksMib_Causalgraph:
|
186 |
for model in task.value.models: # Will iterate over all three models
|
187 |
-
model = model.lower()
|
188 |
for intervention in task.value.interventions:
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
193 |
|
194 |
averaged_df = pd.DataFrame()
|
195 |
if method_col is not None:
|
|
|
180 |
if 'eval_name' in df_copy.columns:
|
181 |
df_copy = df_copy.drop('eval_name', axis=1)
|
182 |
|
183 |
+
# Group columns by model and intervention
|
184 |
result_cols = {}
|
185 |
for task in TasksMib_Causalgraph:
|
186 |
for model in task.value.models: # Will iterate over all three models
|
|
|
187 |
for intervention in task.value.interventions:
|
188 |
+
for counterfactual in task.value.counterfactuals:
|
189 |
+
col_pattern = f"{model}_layer.*_{intervention}_{counterfactual}"
|
190 |
+
matching_cols = [c for c in df_copy.columns if pd.Series(c).str.match(col_pattern).any()]
|
191 |
+
if matching_cols:
|
192 |
+
col_name = f"{model}_{intervention}_{counterfactual}"
|
193 |
+
result_cols[col_name] = matching_cols
|
194 |
|
195 |
averaged_df = pd.DataFrame()
|
196 |
if method_col is not None:
|