File size: 1,074 Bytes
5de6ff4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# compute_metrics.py

import json
from pathlib import Path
import sacrebleu
from rouge_score import rouge_scorer, scoring

# === Config ===
RESULTS_FILE = "./output/eval_results.json"
assert Path(RESULTS_FILE).exists(), f"File not found: {RESULTS_FILE}"

# === Load data ===
with open(RESULTS_FILE, "r", encoding="utf-8") as f:
    data = json.load(f)

references = [entry["reference"] for entry in data]
predictions = [entry["prediction"] for entry in data]

# === Compute BLEU ===
bleu = sacrebleu.corpus_bleu(predictions, [references])
print("✅ BLEU Score:", bleu.score)

# === Compute ROUGE ===
scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
aggregator = scoring.BootstrapAggregator()

for pred, ref in zip(predictions, references):
    scores = scorer.score(ref, pred)
    aggregator.add_scores(scores)

rouge_result = aggregator.aggregate()
print("\n✅ ROUGE Scores:")
for k, v in rouge_result.items():
   print(f"{k}: P={v.mid.precision:.4f}, R={v.mid.recall:.4f}, F1={v.mid.fmeasure:.4f}")