jasonshaoshun commited on
Commit
b7ce23f
·
1 Parent(s): 5051ffe
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +8 -9
src/leaderboard/read_evals.py CHANGED
@@ -84,7 +84,6 @@ class EvalResult_MIB_SUBGRAPH:
84
  scores = model_result.get("scores", {})
85
 
86
  # for task in ["ioi", "mcqa", "arithmetic_addition", "arithmetic_subtraction", "arc_easy", "arc_challenge"]:
87
- print(f"TasksMib_Subgraph.get_all_tasks() is {TasksMib_Subgraph.get_all_tasks()}")
88
  for task in TasksMib_Subgraph.get_all_tasks():
89
  if task in scores:
90
  results[task][model_name] = {
@@ -108,16 +107,16 @@ class EvalResult_MIB_SUBGRAPH:
108
  }
109
 
110
  # Initialize all possible columns with '-'
111
- expected_models = ["llama3", "qwen2_5", "gpt2", "gemma2"]
112
- expected_tasks = ["ioi", "mcqa", "arithmetic_addition", "arithmetic_subtraction", "arc_easy", "arc_challenge"]
113
  for task in expected_tasks:
114
  for model in expected_models:
115
- if model == "gpt2" and task != "ioi":
116
- continue
117
- if model == "qwen2_5" and task.startswith(("arithmetic", "arc")):
118
- continue
119
- if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
120
- continue
121
  data_dict[f"{task}_{model}"] = '-'
122
 
123
  all_scores = []
 
84
  scores = model_result.get("scores", {})
85
 
86
  # for task in ["ioi", "mcqa", "arithmetic_addition", "arithmetic_subtraction", "arc_easy", "arc_challenge"]:
 
87
  for task in TasksMib_Subgraph.get_all_tasks():
88
  if task in scores:
89
  results[task][model_name] = {
 
107
  }
108
 
109
  # Initialize all possible columns with '-'
110
+ expected_models = TasksMib_Subgraph.get_all_models()
111
+ expected_tasks = TasksMib_Subgraph.get_all_tasks()
112
  for task in expected_tasks:
113
  for model in expected_models:
114
+ # if model == "gpt2" and task != "ioi":
115
+ # continue
116
+ # if model == "qwen2_5" and task.startswith(("arithmetic", "arc")):
117
+ # continue
118
+ # if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
119
+ # continue
120
  data_dict[f"{task}_{model}"] = '-'
121
 
122
  all_scores = []