Add ifeval metrics
Browse files
app.py
CHANGED
|
@@ -51,8 +51,9 @@ def get_leaderboard_df():
|
|
| 51 |
df.loc[model_revision, task] = float(value)
|
| 52 |
# IFEval has several metrics but we report just the prompt-loose-acc one
|
| 53 |
elif task.lower() == "ifeval":
|
| 54 |
-
|
| 55 |
-
|
|
|
|
| 56 |
# MMLU has several metrics but we report just the average one
|
| 57 |
elif task.lower() == "mmlu":
|
| 58 |
value = [v["acc"] for k, v in data["results"].items() if "_average" in k.lower()][0]
|
|
|
|
| 51 |
df.loc[model_revision, task] = float(value)
|
| 52 |
# IFEval has several metrics but we report just the prompt-loose-acc one
|
| 53 |
elif task.lower() == "ifeval":
|
| 54 |
+
for metric in ["prompt_level_loose", "prompt_level_strict"]:
|
| 55 |
+
value = data["results"][first_result_key][f"{metric}_acc"]
|
| 56 |
+
df.loc[model_revision, f"{task}_{metric}"] = float(value)
|
| 57 |
# MMLU has several metrics but we report just the average one
|
| 58 |
elif task.lower() == "mmlu":
|
| 59 |
value = [v["acc"] for k, v in data["results"].items() if "_average" in k.lower()][0]
|