Spaces:
Running
Running
ycy
commited on
Commit
·
320abd6
1
Parent(s):
7dad3b1
test
Browse files- src/leaderboard/read_evals.py +1 -1
- src/populate.py +1 -2
src/leaderboard/read_evals.py
CHANGED
@@ -76,7 +76,7 @@ class EvalResult:
|
|
76 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
77 |
continue
|
78 |
|
79 |
-
mean_acc = np.mean(accs)
|
80 |
results[task.benchmark] = mean_acc
|
81 |
|
82 |
return self(
|
|
|
76 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
77 |
continue
|
78 |
|
79 |
+
mean_acc = np.mean(accs)
|
80 |
results[task.benchmark] = mean_acc
|
81 |
|
82 |
return self(
|
src/populate.py
CHANGED
@@ -18,8 +18,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
18 |
|
19 |
df = df.sort_values(by=[AutoEvalColumn.task0.name], ascending=False)
|
20 |
df = df[cols].round(decimals=2)
|
21 |
-
|
22 |
-
assert 0
|
23 |
# filter out if any of the benchmarks have not been produced
|
24 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
25 |
return df
|
|
|
18 |
|
19 |
df = df.sort_values(by=[AutoEvalColumn.task0.name], ascending=False)
|
20 |
df = df[cols].round(decimals=2)
|
21 |
+
|
|
|
22 |
# filter out if any of the benchmarks have not been produced
|
23 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
24 |
return df
|