jasonshaoshun commited on
Commit
aea751c
·
1 Parent(s): 79683c9
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +25 -25
src/leaderboard/read_evals.py CHANGED
@@ -254,35 +254,35 @@ class EvalResult_MIB_CAUSALGRAPH:
254
 
255
  # return data_dict
256
  def to_dict(self):
257
- """Converts the Eval Result to a dict for dataframe display"""
258
- data_dict = {
259
- "eval_name": self.eval_name,
260
- "Method": self.method_name,
261
- }
262
-
263
- # Process each model's results
264
- for model_id, task_scores in self.results.items():
265
- model_name = model_id.lower() # Lowercase for consistency
266
-
267
- # Each task_scores contains layer data
268
- for layer_data in task_scores.get("MCQA", []):
269
- layer = layer_data.get("layer")
270
- layer_scores = layer_data.get("layer_scores", [])
271
 
272
- # Process each intervention and counterfactual
273
- for intervention_data in layer_scores:
274
- intervention = intervention_data["intervention"][0]
275
- counterfactual_scores = intervention_data["counterfactual_scores"]
276
 
277
- for cf_score in counterfactual_scores:
278
- counterfactual = cf_score["counterfactual"][0]
279
- score = cf_score["score"]
 
280
 
281
- # Column name matches what we defined in utils.py
282
- col_name = f"{model_name}_layer{layer}_{intervention}_{counterfactual}".lower()
283
- data_dict[col_name] = score
 
 
 
 
284
 
285
- return data_dict
286
 
287
 
288
  # def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str) -> List[EvalResult_MIB_CAUSALGRAPH]:
 
254
 
255
  # return data_dict
256
  def to_dict(self):
257
+ """Converts the Eval Result to a dict for dataframe display"""
258
+ data_dict = {
259
+ "eval_name": self.eval_name,
260
+ "Method": self.method_name,
261
+ }
262
+
263
+ # Process each model's results
264
+ for model_id, task_scores in self.results.items():
265
+ model_name = model_id.lower() # Lowercase for consistency
 
 
 
 
 
266
 
267
+ # Each task_scores contains layer data
268
+ for layer_data in task_scores.get("MCQA", []):
269
+ layer = layer_data.get("layer")
270
+ layer_scores = layer_data.get("layer_scores", [])
271
 
272
+ # Process each intervention and counterfactual
273
+ for intervention_data in layer_scores:
274
+ intervention = intervention_data["intervention"][0]
275
+ counterfactual_scores = intervention_data["counterfactual_scores"]
276
 
277
+ for cf_score in counterfactual_scores:
278
+ counterfactual = cf_score["counterfactual"][0]
279
+ score = cf_score["score"]
280
+
281
+ # Column name matches what we defined in utils.py
282
+ col_name = f"{model_name}_layer{layer}_{intervention}_{counterfactual}".lower()
283
+ data_dict[col_name] = score
284
 
285
+ return data_dict
286
 
287
 
288
  # def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str) -> List[EvalResult_MIB_CAUSALGRAPH]: