Spaces:
Sleeping
Sleeping
Update mmlu_eval_original.py
Browse files- mmlu_eval_original.py +0 -5
mmlu_eval_original.py
CHANGED
|
@@ -146,8 +146,6 @@ def eval (subject, model, tokenizer, dev_df, test_df, num_questions_per_subject=
|
|
| 146 |
return cors, acc, all_probs
|
| 147 |
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
| 151 |
def evaluate_mmlu(model, tokenizer, num_subjects=-1, num_questions=5, num_shots=5):
|
| 152 |
"""
|
| 153 |
Evaluates the model on MMLU across all subjects.
|
|
@@ -190,10 +188,7 @@ def evaluate_mmlu(model, tokenizer, num_subjects=-1, num_questions=5, num_shots=
|
|
| 190 |
'Num_correct': int(np.sum(cors)),
|
| 191 |
'Accuracy': acc
|
| 192 |
})
|
| 193 |
-
|
| 194 |
-
|
| 195 |
|
| 196 |
-
|
| 197 |
weighted_acc = np.mean(np.concatenate(all_cors))
|
| 198 |
|
| 199 |
min_acc_subject = min(results.items(), key=lambda x: x[1])[0]
|
|
|
|
| 146 |
return cors, acc, all_probs
|
| 147 |
|
| 148 |
|
|
|
|
|
|
|
| 149 |
def evaluate_mmlu(model, tokenizer, num_subjects=-1, num_questions=5, num_shots=5):
|
| 150 |
"""
|
| 151 |
Evaluates the model on MMLU across all subjects.
|
|
|
|
| 188 |
'Num_correct': int(np.sum(cors)),
|
| 189 |
'Accuracy': acc
|
| 190 |
})
|
|
|
|
|
|
|
| 191 |
|
|
|
|
| 192 |
weighted_acc = np.mean(np.concatenate(all_cors))
|
| 193 |
|
| 194 |
min_acc_subject = min(results.items(), key=lambda x: x[1])[0]
|