File size: 2,549 Bytes
6fe7180 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from pathlib import Path
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
from glimpse.glimpse.data_loading.Glimpse_tokenizer import glimpse_tokenizer
# === CONFIGURATION ===
BASE_DIR = Path(__file__).resolve().parent.parent
MODEL_DIR = BASE_DIR / "alternative_polarity" / "deberta" / "deberta_v3_large_polarity_final_model"
# MODEL_DIR = BASE_DIR / "alternative_polarity" / "llama" / "final_model"
# MODEL_DIR = BASE_DIR / "alternative_polarity" / "scideberta" / "scideberta_full_polarity_final_model"
# --> Best so far: deberta_v3 (passes "pros" test)
# === Load model and tokenizer ===
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# === Prediction function with confidence ===
def predict_polarity(sentences):
inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
probs = F.softmax(outputs.logits, dim=1)
confidences, preds = torch.max(probs, dim=1)
results = []
for sentence, pred, conf, prob in zip(sentences, preds, confidences, probs):
results.append({
"sentence": sentence,
"label": "Positive" if pred.item() == 1 else "Negative",
"confidence": conf.item(),
"probs": prob.cpu().numpy().tolist()
})
return results
# === Example: test a multi-sentence peer review ===
if __name__ == "__main__":
# Replace this with your review
full_review = """
Pros:
Con: The experiments lack comparison with prior work.
The authors clearly explain their methodology, which is a strong point.
"""
# Use glimpse tokenizer to split into sentences
sentences = glimpse_tokenizer(full_review)
# Run polarity prediction
results = predict_polarity(sentences)
# Display results
for res in results:
print(f"\nSentence: {res['sentence']}")
print(f" → Prediction: {res['label']} (Confidence: {res['confidence']:.3f})")
print(f" Probabilities: [Negative: {res['probs'][0]:.3f}, Positive: {res['probs'][1]:.3f}]")
|