BertrandCabotIDRIS commited on
Commit
349acce
·
verified ·
1 Parent(s): 6b0f22e

Update src/leaderboard/read_evals.py

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +16 -0
src/leaderboard/read_evals.py CHANGED
@@ -80,6 +80,19 @@ class EvalResult:
80
  mean_acc = np.mean(accs) * 100.0
81
  results[task.benchmark] = mean_acc
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  return self(
84
  eval_name=result_key,
85
  full_model=full_model,
@@ -134,6 +147,9 @@ class EvalResult:
134
  for task in Tasks:
135
  data_dict[task.value.col_name] = self.results[task.value.benchmark]
136
 
 
 
 
137
  return data_dict
138
 
139
 
 
80
  mean_acc = np.mean(accs) * 100.0
81
  results[task.benchmark] = mean_acc
82
 
83
+ mix_accs = []
84
+ for task in MixTasks:
85
+ task = task.value
86
+
87
+ # We average all scores of a given metric (not all metrics are present in all files)
88
+ accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
89
+ if accs.size == 0 or any([acc is None for acc in accs]):
90
+ continue
91
+
92
+ mix_accs.append(np.mean(accs) * 100.0)
93
+ results['Mix-fr'] = mean(mix_accs)
94
+
95
+
96
  return self(
97
  eval_name=result_key,
98
  full_model=full_model,
 
147
  for task in Tasks:
148
  data_dict[task.value.col_name] = self.results[task.value.benchmark]
149
 
150
+ for task in MixTasks:
151
+ data_dict['Mix-fr'] = self.results['Mix-fr']
152
+
153
  return data_dict
154
 
155