Spaces:
Runtime error
Runtime error
File size: 1,711 Bytes
a65f9fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
# Use a BERT model PRE-FINETUNED for sentiment analysis
# BERT fine-tuned on SST-2 dataset
model_name = "textattack/bert-base-uncased-SST-2"
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
def analyze_sentiment(text):
# Tokenize input
inputs = tokenizer(text,
return_tensors="pt",
truncation=True,
padding=True,
max_length=512).to(device)
# Get predictions
with torch.no_grad():
outputs = model(**inputs)
probs = torch.softmax(outputs.logits, dim=1)
# Process results
labels = ["NEGATIVE", "POSITIVE"]
confidence, pred_class = torch.max(probs, dim=1)
return {
"text": text,
"prediction": labels[pred_class],
"confidence": confidence.item(),
"probabilities": dict(zip(labels, probs.tolist()[0]))
}
# Example usage
text = "Hugging Face is amazing!"
result = analyze_sentiment(text)
print(f"\nInput: {result['text']}")
print(f"Model: {model_name}")
print(f"Prediction: {result['prediction']} ({result['confidence']:.2%})")
print("Probabilities:")
for label, prob in result['probabilities'].items():
print(f" {label}: {prob:.4f}")
# Additional tokenization info
print("\nTokenization details:")
tokens = tokenizer.tokenize(text)
print(f"Tokens: {tokens}")
print(f"Token IDs: {tokenizer.encode(text)}") |