ycy commited on
Commit
320abd6
·
1 Parent(s): 7dad3b1
Files changed (2) hide show
  1. src/leaderboard/read_evals.py +1 -1
  2. src/populate.py +1 -2
src/leaderboard/read_evals.py CHANGED
@@ -76,7 +76,7 @@ class EvalResult:
76
  if accs.size == 0 or any([acc is None for acc in accs]):
77
  continue
78
 
79
- mean_acc = np.mean(accs) * 100.0
80
  results[task.benchmark] = mean_acc
81
 
82
  return self(
 
76
  if accs.size == 0 or any([acc is None for acc in accs]):
77
  continue
78
 
79
+ mean_acc = np.mean(accs)
80
  results[task.benchmark] = mean_acc
81
 
82
  return self(
src/populate.py CHANGED
@@ -18,8 +18,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
18
 
19
  df = df.sort_values(by=[AutoEvalColumn.task0.name], ascending=False)
20
  df = df[cols].round(decimals=2)
21
- print(df)
22
- assert 0
23
  # filter out if any of the benchmarks have not been produced
24
  df = df[has_no_nan_values(df, benchmark_cols)]
25
  return df
 
18
 
19
  df = df.sort_values(by=[AutoEvalColumn.task0.name], ascending=False)
20
  df = df[cols].round(decimals=2)
21
+
 
22
  # filter out if any of the benchmarks have not been produced
23
  df = df[has_no_nan_values(df, benchmark_cols)]
24
  return df