Spaces:
Sleeping
Sleeping
File size: 8,997 Bytes
7731b47 2d9e425 7731b47 8369b8b a1403d1 7731b47 15b45ea 7731b47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
"""
RoBERTa-based sentiment analysis for comparing LLM responses
"""
import torch
import numpy as np # ended up not using, but left in case I need it later.
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import nltk
from nltk.tokenize import sent_tokenize
# Global variables to store models once loaded
ROBERTA_TOKENIZER = None
ROBERTA_MODEL = None
def ensure_nltk_resources():
"""Make sure necessary NLTK resources are downloaded"""
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt', quiet=True)
def load_roberta_model():
"""
Load the RoBERTa model and tokenizer for sentiment analysis
Returns:
tuple: (tokenizer, model) for RoBERTa sentiment analysis
"""
global ROBERTA_TOKENIZER, ROBERTA_MODEL
# Return cached model if already loaded
if ROBERTA_TOKENIZER is not None and ROBERTA_MODEL is not None:
return ROBERTA_TOKENIZER, ROBERTA_MODEL
print("Loading RoBERTa model and tokenizer...")
try:
# Load tokenizer and model for sentiment analysis
ROBERTA_TOKENIZER = RobertaTokenizer.from_pretrained('roberta-base')
ROBERTA_MODEL = RobertaForSequenceClassification.from_pretrained('roberta-large-mnli')
return ROBERTA_TOKENIZER, ROBERTA_MODEL
except Exception as e:
print(f"Error loading RoBERTa model: {str(e)}")
# Return None values if loading fails
return None, None
def analyze_sentiment_roberta(text):
"""
Analyze sentiment using RoBERTa model
Args:
text (str): Text to analyze
Returns:
dict: Sentiment analysis results with label and scores
"""
ensure_nltk_resources()
# Handle empty text
if not text or not text.strip():
return {
"label": "neutral",
"scores": {
"contradiction": 0.33,
"neutral": 0.34,
"entailment": 0.33
},
"sentiment_score": 0.0,
"sentence_scores": []
}
# Load model
tokenizer, model = load_roberta_model()
if tokenizer is None or model is None:
return {
"error": "Failed to load RoBERTa model",
"label": "neutral",
"scores": {
"contradiction": 0.33,
"neutral": 0.34,
"entailment": 0.33
},
"sentiment_score": 0.0
}
try:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# Process the whole text
encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
encoded_text = {k: v.to(device) for k, v in encoded_text.items()}
with torch.no_grad():
outputs = model(**encoded_text)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
# Get prediction
contradiction_score = predictions[0, 0].item()
neutral_score = predictions[0, 1].item()
entailment_score = predictions[0, 2].item()
# Map to sentiment
# contradiction = negative, entailment = positive, with a scale
sentiment_score = (entailment_score - contradiction_score) * 2 # Scale from -2 to 2
# Determine sentiment label
if sentiment_score > 0.5:
label = "positive"
elif sentiment_score < -0.5:
label = "negative"
else:
label = "neutral"
# Analyze individual sentences if text is long enough
sentences = sent_tokenize(text)
sentence_scores = []
# Only process sentences if there are more than one and text is substantial
if len(sentences) > 1 and len(text) > 100:
for sentence in sentences:
if len(sentence.split()) >= 3: # Only analyze meaningful sentences
encoded_sentence = tokenizer(sentence, return_tensors='pt', truncation=True)
encoded_sentence = {k: v.to(device) for k, v in encoded_sentence.items()}
with torch.no_grad():
outputs = model(**encoded_sentence)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
# Calculate sentence sentiment score
sent_contradiction = predictions[0, 0].item()
sent_neutral = predictions[0, 1].item()
sent_entailment = predictions[0, 2].item()
sent_score = (sent_entailment - sent_contradiction) * 2
# Determine sentiment label for this sentence
if sent_score > 0.5:
sent_label = "positive"
elif sent_score < -0.5:
sent_label = "negative"
else:
sent_label = "neutral"
sentence_scores.append({
"text": sentence,
"score": sent_score,
"label": sent_label,
"scores": {
"contradiction": sent_contradiction,
"neutral": sent_neutral,
"entailment": sent_entailment
}
})
return {
"label": label,
"scores": {
"contradiction": contradiction_score,
"neutral": neutral_score,
"entailment": entailment_score
},
"sentiment_score": sentiment_score,
"sentence_scores": sentence_scores
}
except Exception as e:
import traceback
print(f"Error analyzing sentiment with RoBERTa: {str(e)}")
print(traceback.format_exc())
return {
"error": str(e),
"label": "neutral",
"scores": {
"contradiction": 0.33,
"neutral": 0.34,
"entailment": 0.33
},
"sentiment_score": 0.0
}
def compare_sentiment_roberta(texts, model_names=None):
"""
Compare sentiment between two texts using RoBERTa
"""
print(f"Starting sentiment comparison for {len(texts)} texts")
# Set default model names if not provided
if model_names is None or len(model_names) < 2:
model_names = ["Model 1", "Model 2"]
# Handle case with fewer than 2 texts
if len(texts) < 2:
return {
"error": "Need at least 2 texts to compare",
"models": model_names[:len(texts)]
}
# Get sentiment analysis for each text
sentiment_results = []
for text in texts:
sentiment_results.append(analyze_sentiment_roberta(text))
# Create result dictionary
result = {
"models": model_names[:len(texts)],
"sentiment_analysis": {}
}
# Add individual model results
for i, model_name in enumerate(model_names[:len(texts)]):
result["sentiment_analysis"][model_name] = sentiment_results[i]
# Compare sentiment scores
if len(sentiment_results) >= 2:
model1_name, model2_name = model_names[0], model_names[1]
# Add null checks for the sentiment results
score1 = 0
score2 = 0
if sentiment_results[0] and "sentiment_score" in sentiment_results[0]:
score1 = sentiment_results[0]["sentiment_score"]
if sentiment_results[1] and "sentiment_score" in sentiment_results[1]:
score2 = sentiment_results[1]["sentiment_score"]
# Calculate difference and determine which is more positive/negative
difference = abs(score1 - score2)
result["comparison"] = {
"sentiment_difference": difference,
"significant_difference": difference > 0.5, # Threshold for significant difference
}
if score1 > score2:
result["comparison"]["more_positive"] = model1_name
result["comparison"]["more_negative"] = model2_name
result["comparison"]["difference_direction"] = f"{model1_name} is more positive than {model2_name}"
elif score2 > score1:
result["comparison"]["more_positive"] = model2_name
result["comparison"]["more_negative"] = model1_name
result["comparison"]["difference_direction"] = f"{model2_name} is more positive than {model1_name}"
else:
result["comparison"]["equal_sentiment"] = True
result["comparison"]["difference_direction"] = f"{model1_name} and {model2_name} have similar sentiment"
return result
|