lewtun HF Staff commited on
Commit
1dede4d
·
1 Parent(s): 9d7fd9c

Add pass@1 for MATH-500

Browse files
Files changed (1) hide show
  1. app.py +12 -0
app.py CHANGED
@@ -109,6 +109,18 @@ def get_leaderboard_df():
109
  df.loc[model_revision, task] = float(data["results"]["all"]["extractive_match"])
110
  elif "gpqa_pass@1:8_samples" in data["results"]["all"]:
111
  df.loc[model_revision, task] = float(data["results"]["all"]["gpqa_pass@1:8_samples"])
 
 
 
 
 
 
 
 
 
 
 
 
112
  # MATH reports qem
113
  elif task.lower() in ["aimo_kaggle", "math_deepseek_cot", "math_deepseek_rl_cot"]:
114
  value = data["results"]["all"]["qem"]
 
109
  df.loc[model_revision, task] = float(data["results"]["all"]["extractive_match"])
110
  elif "gpqa_pass@1:8_samples" in data["results"]["all"]:
111
  df.loc[model_revision, task] = float(data["results"]["all"]["gpqa_pass@1:8_samples"])
112
+ # MATH-500 now reports pass@1
113
+ elif task.lower() == "math_500":
114
+ # Check for 8 samples
115
+ if "math_pass@1:4_samples" in data["results"]["all"]:
116
+ value = data["results"]["all"]["math_pass@1:4_samples"]
117
+ df.loc[model_revision, f"{task} (n=4)"] = float(value)
118
+
119
+ # For backward compatibility, also store in the original column name if any value exists
120
+ if "extractive_match" in data["results"]["all"]:
121
+ df.loc[model_revision, task] = float(data["results"]["all"]["extractive_match"])
122
+ elif "math_pass@1:4_samples" in data["results"]["all"]:
123
+ df.loc[model_revision, task] = float(data["results"]["all"]["math_pass@1:4_samples"])
124
  # MATH reports qem
125
  elif task.lower() in ["aimo_kaggle", "math_deepseek_cot", "math_deepseek_rl_cot"]:
126
  value = data["results"]["all"]["qem"]