Add AlpacaEval
Browse files
app.py
CHANGED
|
@@ -64,6 +64,9 @@ def get_leaderboard_df(merge_values: bool = True):
|
|
| 64 |
# MATH reports qem
|
| 65 |
elif task.lower() in ["math", "math_v2", "aimo_kaggle"]:
|
| 66 |
value = data["results"]["all"]["qem"]
|
|
|
|
|
|
|
|
|
|
| 67 |
else:
|
| 68 |
first_metric_key = next(
|
| 69 |
iter(data["results"][first_result_key])
|
|
@@ -80,13 +83,15 @@ def get_leaderboard_df(merge_values: bool = True):
|
|
| 80 |
else:
|
| 81 |
df.loc[model_revision, task] = value
|
| 82 |
|
| 83 |
-
# Put IFEval / BBH / AGIEval in first columns
|
|
|
|
|
|
|
| 84 |
ifeval_col = df.pop("Ifeval")
|
| 85 |
-
df.insert(
|
| 86 |
bbh_col = df.pop("Bbh")
|
| 87 |
-
df.insert(
|
| 88 |
agieval_col = df.pop("Agieval")
|
| 89 |
-
df.insert(
|
| 90 |
# Drop rows where every entry is NaN
|
| 91 |
df = df.dropna(how="all", axis=0, subset=[c for c in df.columns if c != "Date"])
|
| 92 |
df.insert(loc=1, column="Average", value=df.mean(axis=1, numeric_only=True))
|
|
|
|
| 64 |
# MATH reports qem
|
| 65 |
elif task.lower() in ["math", "math_v2", "aimo_kaggle"]:
|
| 66 |
value = data["results"]["all"]["qem"]
|
| 67 |
+
# Report length controlled winrate for AlpacaEval
|
| 68 |
+
elif task.lower() == "alpaca_eval":
|
| 69 |
+
value = data["results"][first_result_key]["length_controlled_winrate"] / 100.0
|
| 70 |
else:
|
| 71 |
first_metric_key = next(
|
| 72 |
iter(data["results"][first_result_key])
|
|
|
|
| 83 |
else:
|
| 84 |
df.loc[model_revision, task] = value
|
| 85 |
|
| 86 |
+
# Put IFEval / BBH / AGIEval / AlpacaEval in first columns
|
| 87 |
+
alpaca_col = df.pop("Alpaca_eval")
|
| 88 |
+
df.insert(1, "Alpaca_eval", alpaca_col)
|
| 89 |
ifeval_col = df.pop("Ifeval")
|
| 90 |
+
df.insert(2, "Ifeval", ifeval_col)
|
| 91 |
bbh_col = df.pop("Bbh")
|
| 92 |
+
df.insert(3, "Bbh", bbh_col)
|
| 93 |
agieval_col = df.pop("Agieval")
|
| 94 |
+
df.insert(4, "Agieval", agieval_col)
|
| 95 |
# Drop rows where every entry is NaN
|
| 96 |
df = df.dropna(how="all", axis=0, subset=[c for c in df.columns if c != "Date"])
|
| 97 |
df.insert(loc=1, column="Average", value=df.mean(axis=1, numeric_only=True))
|