Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
b56a213
1
Parent(s):
28169bd
debug
Browse files- src/about.py +40 -11
- src/display/utils.py +29 -8
src/about.py
CHANGED
@@ -44,15 +44,17 @@ class TasksMib_Subgraph(Enum):
|
|
44 |
task1 = TaskMIB_Subgraph("mcqa", ["meta_llama", "qwen", "gpt2"], "mcqa", ["edge_counts", "faithfulness"])
|
45 |
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
56 |
|
57 |
# class TasksMib_Causalgraph(Enum):
|
58 |
# task0 = TaskMIB_Causalgraph(
|
@@ -77,11 +79,38 @@ class TaskMIB_Causalgraph:
|
|
77 |
# "answerPosition_randomLetter_counterfactual"],
|
78 |
# ["score"]
|
79 |
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
class TasksMib_Causalgraph(Enum):
|
81 |
task0 = TaskMIB_Causalgraph(
|
82 |
"MCQA",
|
83 |
-
["
|
84 |
-
|
|
|
|
|
|
|
|
|
85 |
"mcqa",
|
86 |
["output_token", "output_location"],
|
87 |
["randomLetter_counterfactual", "answerPosition_counterfactual",
|
|
|
44 |
task1 = TaskMIB_Subgraph("mcqa", ["meta_llama", "qwen", "gpt2"], "mcqa", ["edge_counts", "faithfulness"])
|
45 |
|
46 |
|
47 |
+
|
48 |
+
|
49 |
+
# @dataclass
|
50 |
+
# class TaskMIB_Causalgraph:
|
51 |
+
# benchmark: str # MCQA
|
52 |
+
# models: list[str] # List of all models
|
53 |
+
# layers: list[str] # 0-31
|
54 |
+
# col_name: str # display name in leaderboard
|
55 |
+
# interventions: list[str] # output_token, output_location
|
56 |
+
# counterfactuals: list[str] # symbol_counterfactual, etc.
|
57 |
+
# metrics: list[str] # score
|
58 |
|
59 |
# class TasksMib_Causalgraph(Enum):
|
60 |
# task0 = TaskMIB_Causalgraph(
|
|
|
79 |
# "answerPosition_randomLetter_counterfactual"],
|
80 |
# ["score"]
|
81 |
# )
|
82 |
+
|
83 |
+
# class TasksMib_Causalgraph(Enum):
|
84 |
+
# task0 = TaskMIB_Causalgraph(
|
85 |
+
# "MCQA",
|
86 |
+
# ["qwen2forcausallm", "gemma2forcausallm", "llamaforcausallm"], # Use lowercase consistently
|
87 |
+
# [str(i) for i in range(32)],
|
88 |
+
# "mcqa",
|
89 |
+
# ["output_token", "output_location"],
|
90 |
+
# ["randomLetter_counterfactual", "answerPosition_counterfactual",
|
91 |
+
# "answerPosition_randomLetter_counterfactual"],
|
92 |
+
# ["score"]
|
93 |
+
# )
|
94 |
+
|
95 |
+
@dataclass
|
96 |
+
class TaskMIB_Causalgraph:
|
97 |
+
benchmark: str
|
98 |
+
models: list[str]
|
99 |
+
layers: dict[str, list[str]] # Different layers for each model
|
100 |
+
col_name: str
|
101 |
+
interventions: list[str]
|
102 |
+
counterfactuals: list[str]
|
103 |
+
metrics: list[str]
|
104 |
+
|
105 |
class TasksMib_Causalgraph(Enum):
|
106 |
task0 = TaskMIB_Causalgraph(
|
107 |
"MCQA",
|
108 |
+
["Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"],
|
109 |
+
{
|
110 |
+
"Qwen2ForCausalLM": [str(i) for i in range(24)], # 0-23
|
111 |
+
"Gemma2ForCausalLM": [str(i) for i in range(26)], # 0-25
|
112 |
+
"LlamaForCausalLM": [str(i) for i in range(32)] # 0-31
|
113 |
+
},
|
114 |
"mcqa",
|
115 |
["output_token", "output_location"],
|
116 |
["randomLetter_counterfactual", "answerPosition_counterfactual",
|
src/display/utils.py
CHANGED
@@ -163,24 +163,45 @@ BENCHMARK_COLS_MIB_CAUSALGRAPH = []
|
|
163 |
|
164 |
|
165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
auto_eval_column_dict_mib_causalgraph = []
|
167 |
|
168 |
# Method name column
|
169 |
auto_eval_column_dict_mib_causalgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
|
170 |
-
|
171 |
-
# Add eval_name column
|
172 |
auto_eval_column_dict_mib_causalgraph.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True)])
|
173 |
|
174 |
# For each model-task-intervention-counterfactual combination
|
175 |
for task in TasksMib_Causalgraph:
|
176 |
-
for model in task.value.models:
|
177 |
-
|
178 |
-
for layer in task.value.layers:
|
179 |
for intervention in task.value.interventions:
|
180 |
for counterfactual in task.value.counterfactuals:
|
181 |
-
|
182 |
-
col_name = f"{model_name}_layer{layer}_{intervention}_{counterfactual}"
|
183 |
-
# Use the exact column name as both the field name and display name
|
184 |
auto_eval_column_dict_mib_causalgraph.append([
|
185 |
col_name,
|
186 |
ColumnContent,
|
|
|
163 |
|
164 |
|
165 |
|
166 |
+
# auto_eval_column_dict_mib_causalgraph = []
|
167 |
+
|
168 |
+
# # Method name column
|
169 |
+
# auto_eval_column_dict_mib_causalgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
|
170 |
+
|
171 |
+
# # Add eval_name column
|
172 |
+
# auto_eval_column_dict_mib_causalgraph.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True)])
|
173 |
+
|
174 |
+
# # For each model-task-intervention-counterfactual combination
|
175 |
+
# for task in TasksMib_Causalgraph:
|
176 |
+
# for model in task.value.models: # Use exact model names with correct casing
|
177 |
+
# model_name = model # Don't convert to lowercase
|
178 |
+
# for layer in task.value.layers:
|
179 |
+
# for intervention in task.value.interventions:
|
180 |
+
# for counterfactual in task.value.counterfactuals:
|
181 |
+
# # Match exact format from the actual data
|
182 |
+
# col_name = f"{model_name}_layer{layer}_{intervention}_{counterfactual}"
|
183 |
+
# # Use the exact column name as both the field name and display name
|
184 |
+
# auto_eval_column_dict_mib_causalgraph.append([
|
185 |
+
# col_name,
|
186 |
+
# ColumnContent,
|
187 |
+
# ColumnContent(col_name, "number", True)
|
188 |
+
# ])
|
189 |
+
|
190 |
+
|
191 |
+
|
192 |
auto_eval_column_dict_mib_causalgraph = []
|
193 |
|
194 |
# Method name column
|
195 |
auto_eval_column_dict_mib_causalgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
|
|
|
|
|
196 |
auto_eval_column_dict_mib_causalgraph.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True)])
|
197 |
|
198 |
# For each model-task-intervention-counterfactual combination
|
199 |
for task in TasksMib_Causalgraph:
|
200 |
+
for model in task.value.models:
|
201 |
+
for layer in task.value.layers[model]: # Use model-specific layers
|
|
|
202 |
for intervention in task.value.interventions:
|
203 |
for counterfactual in task.value.counterfactuals:
|
204 |
+
col_name = f"{model}_layer{layer}_{intervention}_{counterfactual}"
|
|
|
|
|
205 |
auto_eval_column_dict_mib_causalgraph.append([
|
206 |
col_name,
|
207 |
ColumnContent,
|