525GradioApp / processors /roberta_analysis.py
Ryan
update
2d9e425
"""
RoBERTa-based sentiment analysis for comparing LLM responses
"""
import torch
import numpy as np # ended up not using, but left in case I need it later.
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import nltk
from nltk.tokenize import sent_tokenize
# Global variables to store models once loaded
ROBERTA_TOKENIZER = None
ROBERTA_MODEL = None
def ensure_nltk_resources():
"""Make sure necessary NLTK resources are downloaded"""
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt', quiet=True)
def load_roberta_model():
"""
Load the RoBERTa model and tokenizer for sentiment analysis
Returns:
tuple: (tokenizer, model) for RoBERTa sentiment analysis
"""
global ROBERTA_TOKENIZER, ROBERTA_MODEL
# Return cached model if already loaded
if ROBERTA_TOKENIZER is not None and ROBERTA_MODEL is not None:
return ROBERTA_TOKENIZER, ROBERTA_MODEL
print("Loading RoBERTa model and tokenizer...")
try:
# Load tokenizer and model for sentiment analysis
ROBERTA_TOKENIZER = RobertaTokenizer.from_pretrained('roberta-base')
ROBERTA_MODEL = RobertaForSequenceClassification.from_pretrained('roberta-large-mnli')
return ROBERTA_TOKENIZER, ROBERTA_MODEL
except Exception as e:
print(f"Error loading RoBERTa model: {str(e)}")
# Return None values if loading fails
return None, None
def analyze_sentiment_roberta(text):
"""
Analyze sentiment using RoBERTa model
Args:
text (str): Text to analyze
Returns:
dict: Sentiment analysis results with label and scores
"""
ensure_nltk_resources()
# Handle empty text
if not text or not text.strip():
return {
"label": "neutral",
"scores": {
"contradiction": 0.33,
"neutral": 0.34,
"entailment": 0.33
},
"sentiment_score": 0.0,
"sentence_scores": []
}
# Load model
tokenizer, model = load_roberta_model()
if tokenizer is None or model is None:
return {
"error": "Failed to load RoBERTa model",
"label": "neutral",
"scores": {
"contradiction": 0.33,
"neutral": 0.34,
"entailment": 0.33
},
"sentiment_score": 0.0
}
try:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# Process the whole text
encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
encoded_text = {k: v.to(device) for k, v in encoded_text.items()}
with torch.no_grad():
outputs = model(**encoded_text)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
# Get prediction
contradiction_score = predictions[0, 0].item()
neutral_score = predictions[0, 1].item()
entailment_score = predictions[0, 2].item()
# Map to sentiment
# contradiction = negative, entailment = positive, with a scale
sentiment_score = (entailment_score - contradiction_score) * 2 # Scale from -2 to 2
# Determine sentiment label
if sentiment_score > 0.5:
label = "positive"
elif sentiment_score < -0.5:
label = "negative"
else:
label = "neutral"
# Analyze individual sentences if text is long enough
sentences = sent_tokenize(text)
sentence_scores = []
# Only process sentences if there are more than one and text is substantial
if len(sentences) > 1 and len(text) > 100:
for sentence in sentences:
if len(sentence.split()) >= 3: # Only analyze meaningful sentences
encoded_sentence = tokenizer(sentence, return_tensors='pt', truncation=True)
encoded_sentence = {k: v.to(device) for k, v in encoded_sentence.items()}
with torch.no_grad():
outputs = model(**encoded_sentence)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
# Calculate sentence sentiment score
sent_contradiction = predictions[0, 0].item()
sent_neutral = predictions[0, 1].item()
sent_entailment = predictions[0, 2].item()
sent_score = (sent_entailment - sent_contradiction) * 2
# Determine sentiment label for this sentence
if sent_score > 0.5:
sent_label = "positive"
elif sent_score < -0.5:
sent_label = "negative"
else:
sent_label = "neutral"
sentence_scores.append({
"text": sentence,
"score": sent_score,
"label": sent_label,
"scores": {
"contradiction": sent_contradiction,
"neutral": sent_neutral,
"entailment": sent_entailment
}
})
return {
"label": label,
"scores": {
"contradiction": contradiction_score,
"neutral": neutral_score,
"entailment": entailment_score
},
"sentiment_score": sentiment_score,
"sentence_scores": sentence_scores
}
except Exception as e:
import traceback
print(f"Error analyzing sentiment with RoBERTa: {str(e)}")
print(traceback.format_exc())
return {
"error": str(e),
"label": "neutral",
"scores": {
"contradiction": 0.33,
"neutral": 0.34,
"entailment": 0.33
},
"sentiment_score": 0.0
}
def compare_sentiment_roberta(texts, model_names=None):
"""
Compare sentiment between two texts using RoBERTa
"""
print(f"Starting sentiment comparison for {len(texts)} texts")
# Set default model names if not provided
if model_names is None or len(model_names) < 2:
model_names = ["Model 1", "Model 2"]
# Handle case with fewer than 2 texts
if len(texts) < 2:
return {
"error": "Need at least 2 texts to compare",
"models": model_names[:len(texts)]
}
# Get sentiment analysis for each text
sentiment_results = []
for text in texts:
sentiment_results.append(analyze_sentiment_roberta(text))
# Create result dictionary
result = {
"models": model_names[:len(texts)],
"sentiment_analysis": {}
}
# Add individual model results
for i, model_name in enumerate(model_names[:len(texts)]):
result["sentiment_analysis"][model_name] = sentiment_results[i]
# Compare sentiment scores
if len(sentiment_results) >= 2:
model1_name, model2_name = model_names[0], model_names[1]
# Add null checks for the sentiment results
score1 = 0
score2 = 0
if sentiment_results[0] and "sentiment_score" in sentiment_results[0]:
score1 = sentiment_results[0]["sentiment_score"]
if sentiment_results[1] and "sentiment_score" in sentiment_results[1]:
score2 = sentiment_results[1]["sentiment_score"]
# Calculate difference and determine which is more positive/negative
difference = abs(score1 - score2)
result["comparison"] = {
"sentiment_difference": difference,
"significant_difference": difference > 0.5, # Threshold for significant difference
}
if score1 > score2:
result["comparison"]["more_positive"] = model1_name
result["comparison"]["more_negative"] = model2_name
result["comparison"]["difference_direction"] = f"{model1_name} is more positive than {model2_name}"
elif score2 > score1:
result["comparison"]["more_positive"] = model2_name
result["comparison"]["more_negative"] = model1_name
result["comparison"]["difference_direction"] = f"{model2_name} is more positive than {model1_name}"
else:
result["comparison"]["equal_sentiment"] = True
result["comparison"]["difference_direction"] = f"{model1_name} and {model2_name} have similar sentiment"
return result