Spaces:

mib-bench
/

leaderboard

Running

jasonshaoshun commited on Jan 29

Commit

2490332

1 Parent(s): b7ce23f

debug

Files changed (1) hide show

src/leaderboard/read_evals.py CHANGED Viewed

@@ -141,22 +141,23 @@ class EvalResult_MIB_SUBGRAPH:
                 all_scores.append(score)
         # All entries must be present for average
-        required_entries = [
-            data_dict['ioi_llama3'] != '-',
-            data_dict['ioi_qwen2_5'] != '-',
-            data_dict['ioi_gpt2'] != '-',
-            data_dict['ioi_gemma2'] != '-',
-            data_dict['mcqa_llama3'] != '-',
-            data_dict['mcqa_qwen2_5'] != '-',
-            data_dict['mcqa_gemma2'] != '-',
-            data_dict['arithmetic_addition_llama3'] != '-',
-            data_dict['arithmetic_subtraction_llama3'] != '-',
-            data_dict['arc_easy_gemma2'] != '-',
-            data_dict['arc_easy_llama3'] != '-',
-            data_dict['arc_challenge_llama3'] != '-'
-        ]
-        data_dict["Average"] = round(np.mean(all_scores), 2) if all(required_entries) else '-'
         return data_dict

                 all_scores.append(score)
         # All entries must be present for average
+        # required_entries = [
+        #     data_dict['ioi_llama3'] != '-',
+        #     data_dict['ioi_qwen2_5'] != '-',
+        #     data_dict['ioi_gpt2'] != '-',
+        #     data_dict['ioi_gemma2'] != '-',
+        #     data_dict['mcqa_llama3'] != '-',
+        #     data_dict['mcqa_qwen2_5'] != '-',
+        #     data_dict['mcqa_gemma2'] != '-',
+        #     data_dict['arithmetic_addition_llama3'] != '-',
+        #     data_dict['arithmetic_subtraction_llama3'] != '-',
+        #     data_dict['arc_easy_gemma2'] != '-',
+        #     data_dict['arc_easy_llama3'] != '-',
+        #     data_dict['arc_challenge_llama3'] != '-'
+        # ]
+        # data_dict["Average"] = round(np.mean(all_scores), 2) if all(required_entries) else '-'
+        data_dict["Average"] = round(np.mean(all_scores), 2) if '-' not in data_dict.values() else '-'
         return data_dict