jasonshaoshun commited on
Commit
b56a213
·
1 Parent(s): 28169bd
Files changed (2) hide show
  1. src/about.py +40 -11
  2. src/display/utils.py +29 -8
src/about.py CHANGED
@@ -44,15 +44,17 @@ class TasksMib_Subgraph(Enum):
44
  task1 = TaskMIB_Subgraph("mcqa", ["meta_llama", "qwen", "gpt2"], "mcqa", ["edge_counts", "faithfulness"])
45
 
46
 
47
- @dataclass
48
- class TaskMIB_Causalgraph:
49
- benchmark: str # MCQA
50
- models: list[str] # List of all models
51
- layers: list[str] # 0-31
52
- col_name: str # display name in leaderboard
53
- interventions: list[str] # output_token, output_location
54
- counterfactuals: list[str] # symbol_counterfactual, etc.
55
- metrics: list[str] # score
 
 
56
 
57
  # class TasksMib_Causalgraph(Enum):
58
  # task0 = TaskMIB_Causalgraph(
@@ -77,11 +79,38 @@ class TaskMIB_Causalgraph:
77
  # "answerPosition_randomLetter_counterfactual"],
78
  # ["score"]
79
  # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  class TasksMib_Causalgraph(Enum):
81
  task0 = TaskMIB_Causalgraph(
82
  "MCQA",
83
- ["qwen2forcausallm", "gemma2forcausallm", "llamaforcausallm"], # Use lowercase consistently
84
- [str(i) for i in range(32)],
 
 
 
 
85
  "mcqa",
86
  ["output_token", "output_location"],
87
  ["randomLetter_counterfactual", "answerPosition_counterfactual",
 
44
  task1 = TaskMIB_Subgraph("mcqa", ["meta_llama", "qwen", "gpt2"], "mcqa", ["edge_counts", "faithfulness"])
45
 
46
 
47
+
48
+
49
+ # @dataclass
50
+ # class TaskMIB_Causalgraph:
51
+ # benchmark: str # MCQA
52
+ # models: list[str] # List of all models
53
+ # layers: list[str] # 0-31
54
+ # col_name: str # display name in leaderboard
55
+ # interventions: list[str] # output_token, output_location
56
+ # counterfactuals: list[str] # symbol_counterfactual, etc.
57
+ # metrics: list[str] # score
58
 
59
  # class TasksMib_Causalgraph(Enum):
60
  # task0 = TaskMIB_Causalgraph(
 
79
  # "answerPosition_randomLetter_counterfactual"],
80
  # ["score"]
81
  # )
82
+
83
+ # class TasksMib_Causalgraph(Enum):
84
+ # task0 = TaskMIB_Causalgraph(
85
+ # "MCQA",
86
+ # ["qwen2forcausallm", "gemma2forcausallm", "llamaforcausallm"], # Use lowercase consistently
87
+ # [str(i) for i in range(32)],
88
+ # "mcqa",
89
+ # ["output_token", "output_location"],
90
+ # ["randomLetter_counterfactual", "answerPosition_counterfactual",
91
+ # "answerPosition_randomLetter_counterfactual"],
92
+ # ["score"]
93
+ # )
94
+
95
+ @dataclass
96
+ class TaskMIB_Causalgraph:
97
+ benchmark: str
98
+ models: list[str]
99
+ layers: dict[str, list[str]] # Different layers for each model
100
+ col_name: str
101
+ interventions: list[str]
102
+ counterfactuals: list[str]
103
+ metrics: list[str]
104
+
105
  class TasksMib_Causalgraph(Enum):
106
  task0 = TaskMIB_Causalgraph(
107
  "MCQA",
108
+ ["Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"],
109
+ {
110
+ "Qwen2ForCausalLM": [str(i) for i in range(24)], # 0-23
111
+ "Gemma2ForCausalLM": [str(i) for i in range(26)], # 0-25
112
+ "LlamaForCausalLM": [str(i) for i in range(32)] # 0-31
113
+ },
114
  "mcqa",
115
  ["output_token", "output_location"],
116
  ["randomLetter_counterfactual", "answerPosition_counterfactual",
src/display/utils.py CHANGED
@@ -163,24 +163,45 @@ BENCHMARK_COLS_MIB_CAUSALGRAPH = []
163
 
164
 
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  auto_eval_column_dict_mib_causalgraph = []
167
 
168
  # Method name column
169
  auto_eval_column_dict_mib_causalgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
170
-
171
- # Add eval_name column
172
  auto_eval_column_dict_mib_causalgraph.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True)])
173
 
174
  # For each model-task-intervention-counterfactual combination
175
  for task in TasksMib_Causalgraph:
176
- for model in task.value.models: # Use exact model names with correct casing
177
- model_name = model # Don't convert to lowercase
178
- for layer in task.value.layers:
179
  for intervention in task.value.interventions:
180
  for counterfactual in task.value.counterfactuals:
181
- # Match exact format from the actual data
182
- col_name = f"{model_name}_layer{layer}_{intervention}_{counterfactual}"
183
- # Use the exact column name as both the field name and display name
184
  auto_eval_column_dict_mib_causalgraph.append([
185
  col_name,
186
  ColumnContent,
 
163
 
164
 
165
 
166
+ # auto_eval_column_dict_mib_causalgraph = []
167
+
168
+ # # Method name column
169
+ # auto_eval_column_dict_mib_causalgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
170
+
171
+ # # Add eval_name column
172
+ # auto_eval_column_dict_mib_causalgraph.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True)])
173
+
174
+ # # For each model-task-intervention-counterfactual combination
175
+ # for task in TasksMib_Causalgraph:
176
+ # for model in task.value.models: # Use exact model names with correct casing
177
+ # model_name = model # Don't convert to lowercase
178
+ # for layer in task.value.layers:
179
+ # for intervention in task.value.interventions:
180
+ # for counterfactual in task.value.counterfactuals:
181
+ # # Match exact format from the actual data
182
+ # col_name = f"{model_name}_layer{layer}_{intervention}_{counterfactual}"
183
+ # # Use the exact column name as both the field name and display name
184
+ # auto_eval_column_dict_mib_causalgraph.append([
185
+ # col_name,
186
+ # ColumnContent,
187
+ # ColumnContent(col_name, "number", True)
188
+ # ])
189
+
190
+
191
+
192
  auto_eval_column_dict_mib_causalgraph = []
193
 
194
  # Method name column
195
  auto_eval_column_dict_mib_causalgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])
 
 
196
  auto_eval_column_dict_mib_causalgraph.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True)])
197
 
198
  # For each model-task-intervention-counterfactual combination
199
  for task in TasksMib_Causalgraph:
200
+ for model in task.value.models:
201
+ for layer in task.value.layers[model]: # Use model-specific layers
 
202
  for intervention in task.value.interventions:
203
  for counterfactual in task.value.counterfactuals:
204
+ col_name = f"{model}_layer{layer}_{intervention}_{counterfactual}"
 
 
205
  auto_eval_column_dict_mib_causalgraph.append([
206
  col_name,
207
  ColumnContent,