Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
b7ce23f
1
Parent(s):
5051ffe
debug
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -84,7 +84,6 @@ class EvalResult_MIB_SUBGRAPH:
|
|
84 |
scores = model_result.get("scores", {})
|
85 |
|
86 |
# for task in ["ioi", "mcqa", "arithmetic_addition", "arithmetic_subtraction", "arc_easy", "arc_challenge"]:
|
87 |
-
print(f"TasksMib_Subgraph.get_all_tasks() is {TasksMib_Subgraph.get_all_tasks()}")
|
88 |
for task in TasksMib_Subgraph.get_all_tasks():
|
89 |
if task in scores:
|
90 |
results[task][model_name] = {
|
@@ -108,16 +107,16 @@ class EvalResult_MIB_SUBGRAPH:
|
|
108 |
}
|
109 |
|
110 |
# Initialize all possible columns with '-'
|
111 |
-
expected_models =
|
112 |
-
expected_tasks =
|
113 |
for task in expected_tasks:
|
114 |
for model in expected_models:
|
115 |
-
if model == "gpt2" and task != "ioi":
|
116 |
-
|
117 |
-
if model == "qwen2_5" and task.startswith(("arithmetic", "arc")):
|
118 |
-
|
119 |
-
if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
|
120 |
-
|
121 |
data_dict[f"{task}_{model}"] = '-'
|
122 |
|
123 |
all_scores = []
|
|
|
84 |
scores = model_result.get("scores", {})
|
85 |
|
86 |
# for task in ["ioi", "mcqa", "arithmetic_addition", "arithmetic_subtraction", "arc_easy", "arc_challenge"]:
|
|
|
87 |
for task in TasksMib_Subgraph.get_all_tasks():
|
88 |
if task in scores:
|
89 |
results[task][model_name] = {
|
|
|
107 |
}
|
108 |
|
109 |
# Initialize all possible columns with '-'
|
110 |
+
expected_models = TasksMib_Subgraph.get_all_models()
|
111 |
+
expected_tasks = TasksMib_Subgraph.get_all_tasks()
|
112 |
for task in expected_tasks:
|
113 |
for model in expected_models:
|
114 |
+
# if model == "gpt2" and task != "ioi":
|
115 |
+
# continue
|
116 |
+
# if model == "qwen2_5" and task.startswith(("arithmetic", "arc")):
|
117 |
+
# continue
|
118 |
+
# if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
|
119 |
+
# continue
|
120 |
data_dict[f"{task}_{model}"] = '-'
|
121 |
|
122 |
all_scores = []
|