Update eval_utils.py
Browse files- eval_utils.py +6 -5
eval_utils.py
CHANGED
|
@@ -257,13 +257,14 @@ def evaluate_summ(gold_data, pred_data):
|
|
| 257 |
pred_summaries.append(pred_summary)
|
| 258 |
|
| 259 |
|
| 260 |
-
rl_evaluator = rouge.Rouge(metrics=['rouge-n','rouge-l'], max_n=2, limit_length=False, apply_avg=True)
|
| 261 |
-
rl_scores = rl_evaluator.get_scores(pred_summaries, gold_summaries)
|
| 262 |
-
print("Rouge:", {k:v['f'] for k,v in rl_scores.items()}, flush=True)
|
| 263 |
|
| 264 |
-
_, _, bs = bert_score.score(pred_summaries, gold_summaries, lang="en", verbose=True
|
| 265 |
print("BERTSCORE:", bs.mean().item())
|
| 266 |
-
return {'ROUGE-L': rl_scores['rouge-l']['f'], 'BERTSCORE': bs.mean().item()}
|
|
|
|
| 267 |
|
| 268 |
|
| 269 |
def evaluate_lmt(gold_data, pred_data):
|
|
|
|
| 257 |
pred_summaries.append(pred_summary)
|
| 258 |
|
| 259 |
|
| 260 |
+
# rl_evaluator = rouge.Rouge(metrics=['rouge-n','rouge-l'], max_n=2, limit_length=False, apply_avg=True)
|
| 261 |
+
# rl_scores = rl_evaluator.get_scores(pred_summaries, gold_summaries)
|
| 262 |
+
# print("Rouge:", {k:v['f'] for k,v in rl_scores.items()}, flush=True)
|
| 263 |
|
| 264 |
+
_, _, bs = bert_score.score(pred_summaries, gold_summaries, lang="en", verbose=True)
|
| 265 |
print("BERTSCORE:", bs.mean().item())
|
| 266 |
+
# return {'ROUGE-L': rl_scores['rouge-l']['f'], 'BERTSCORE': bs.mean().item()}
|
| 267 |
+
return {'ROUGE-L': '-', 'BERTSCORE': bs.mean().item()}
|
| 268 |
|
| 269 |
|
| 270 |
def evaluate_lmt(gold_data, pred_data):
|