Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update src/leaderboard/read_evals.py
Browse files
src/leaderboard/read_evals.py
CHANGED
|
@@ -90,6 +90,7 @@ class EvalResult:
|
|
| 90 |
mean_acc = np.mean(accs) if len(accs) > 0 else 0
|
| 91 |
results[task.benchmark] = mean_acc
|
| 92 |
if task.benchmark == "CLCC-H":
|
|
|
|
| 93 |
results[task.benchmark] = results[task.benchmark] * 100
|
| 94 |
|
| 95 |
return self(
|
|
@@ -129,7 +130,16 @@ class EvalResult:
|
|
| 129 |
|
| 130 |
def to_dict(self):
|
| 131 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 132 |
-
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
data_dict = {
|
| 134 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 135 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
|
| 90 |
mean_acc = np.mean(accs) if len(accs) > 0 else 0
|
| 91 |
results[task.benchmark] = mean_acc
|
| 92 |
if task.benchmark == "CLCC-H":
|
| 93 |
+
print("results[task.benchmark]: ", results[task.benchmark])
|
| 94 |
results[task.benchmark] = results[task.benchmark] * 100
|
| 95 |
|
| 96 |
return self(
|
|
|
|
| 130 |
|
| 131 |
def to_dict(self):
|
| 132 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 133 |
+
#average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
| 134 |
+
|
| 135 |
+
average = 0
|
| 136 |
+
nums = 0
|
| 137 |
+
for v in self.results.values():
|
| 138 |
+
if v is not None and v != 0:
|
| 139 |
+
average += v
|
| 140 |
+
nums += 1
|
| 141 |
+
average = average/nums
|
| 142 |
+
|
| 143 |
data_dict = {
|
| 144 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 145 |
AutoEvalColumn.precision.name: self.precision.value.name,
|