""" RoBERTa-based sentiment analysis for comparing LLM responses """ import torch import numpy as np # ended up not using, but left in case I need it later. from transformers import RobertaTokenizer, RobertaForSequenceClassification import nltk from nltk.tokenize import sent_tokenize # Global variables to store models once loaded ROBERTA_TOKENIZER = None ROBERTA_MODEL = None def ensure_nltk_resources(): """Make sure necessary NLTK resources are downloaded""" try: nltk.data.find('tokenizers/punkt') except LookupError: nltk.download('punkt', quiet=True) def load_roberta_model(): """ Load the RoBERTa model and tokenizer for sentiment analysis Returns: tuple: (tokenizer, model) for RoBERTa sentiment analysis """ global ROBERTA_TOKENIZER, ROBERTA_MODEL # Return cached model if already loaded if ROBERTA_TOKENIZER is not None and ROBERTA_MODEL is not None: return ROBERTA_TOKENIZER, ROBERTA_MODEL print("Loading RoBERTa model and tokenizer...") try: # Load tokenizer and model for sentiment analysis ROBERTA_TOKENIZER = RobertaTokenizer.from_pretrained('roberta-base') ROBERTA_MODEL = RobertaForSequenceClassification.from_pretrained('roberta-large-mnli') return ROBERTA_TOKENIZER, ROBERTA_MODEL except Exception as e: print(f"Error loading RoBERTa model: {str(e)}") # Return None values if loading fails return None, None def analyze_sentiment_roberta(text): """ Analyze sentiment using RoBERTa model Args: text (str): Text to analyze Returns: dict: Sentiment analysis results with label and scores """ ensure_nltk_resources() # Handle empty text if not text or not text.strip(): return { "label": "neutral", "scores": { "contradiction": 0.33, "neutral": 0.34, "entailment": 0.33 }, "sentiment_score": 0.0, "sentence_scores": [] } # Load model tokenizer, model = load_roberta_model() if tokenizer is None or model is None: return { "error": "Failed to load RoBERTa model", "label": "neutral", "scores": { "contradiction": 0.33, "neutral": 0.34, "entailment": 0.33 }, "sentiment_score": 0.0 } try: # Set device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) # Process the whole text encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512) encoded_text = {k: v.to(device) for k, v in encoded_text.items()} with torch.no_grad(): outputs = model(**encoded_text) predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) # Get prediction contradiction_score = predictions[0, 0].item() neutral_score = predictions[0, 1].item() entailment_score = predictions[0, 2].item() # Map to sentiment # contradiction = negative, entailment = positive, with a scale sentiment_score = (entailment_score - contradiction_score) * 2 # Scale from -2 to 2 # Determine sentiment label if sentiment_score > 0.5: label = "positive" elif sentiment_score < -0.5: label = "negative" else: label = "neutral" # Analyze individual sentences if text is long enough sentences = sent_tokenize(text) sentence_scores = [] # Only process sentences if there are more than one and text is substantial if len(sentences) > 1 and len(text) > 100: for sentence in sentences: if len(sentence.split()) >= 3: # Only analyze meaningful sentences encoded_sentence = tokenizer(sentence, return_tensors='pt', truncation=True) encoded_sentence = {k: v.to(device) for k, v in encoded_sentence.items()} with torch.no_grad(): outputs = model(**encoded_sentence) predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) # Calculate sentence sentiment score sent_contradiction = predictions[0, 0].item() sent_neutral = predictions[0, 1].item() sent_entailment = predictions[0, 2].item() sent_score = (sent_entailment - sent_contradiction) * 2 # Determine sentiment label for this sentence if sent_score > 0.5: sent_label = "positive" elif sent_score < -0.5: sent_label = "negative" else: sent_label = "neutral" sentence_scores.append({ "text": sentence, "score": sent_score, "label": sent_label, "scores": { "contradiction": sent_contradiction, "neutral": sent_neutral, "entailment": sent_entailment } }) return { "label": label, "scores": { "contradiction": contradiction_score, "neutral": neutral_score, "entailment": entailment_score }, "sentiment_score": sentiment_score, "sentence_scores": sentence_scores } except Exception as e: import traceback print(f"Error analyzing sentiment with RoBERTa: {str(e)}") print(traceback.format_exc()) return { "error": str(e), "label": "neutral", "scores": { "contradiction": 0.33, "neutral": 0.34, "entailment": 0.33 }, "sentiment_score": 0.0 } def compare_sentiment_roberta(texts, model_names=None): """ Compare sentiment between two texts using RoBERTa """ print(f"Starting sentiment comparison for {len(texts)} texts") # Set default model names if not provided if model_names is None or len(model_names) < 2: model_names = ["Model 1", "Model 2"] # Handle case with fewer than 2 texts if len(texts) < 2: return { "error": "Need at least 2 texts to compare", "models": model_names[:len(texts)] } # Get sentiment analysis for each text sentiment_results = [] for text in texts: sentiment_results.append(analyze_sentiment_roberta(text)) # Create result dictionary result = { "models": model_names[:len(texts)], "sentiment_analysis": {} } # Add individual model results for i, model_name in enumerate(model_names[:len(texts)]): result["sentiment_analysis"][model_name] = sentiment_results[i] # Compare sentiment scores if len(sentiment_results) >= 2: model1_name, model2_name = model_names[0], model_names[1] # Add null checks for the sentiment results score1 = 0 score2 = 0 if sentiment_results[0] and "sentiment_score" in sentiment_results[0]: score1 = sentiment_results[0]["sentiment_score"] if sentiment_results[1] and "sentiment_score" in sentiment_results[1]: score2 = sentiment_results[1]["sentiment_score"] # Calculate difference and determine which is more positive/negative difference = abs(score1 - score2) result["comparison"] = { "sentiment_difference": difference, "significant_difference": difference > 0.5, # Threshold for significant difference } if score1 > score2: result["comparison"]["more_positive"] = model1_name result["comparison"]["more_negative"] = model2_name result["comparison"]["difference_direction"] = f"{model1_name} is more positive than {model2_name}" elif score2 > score1: result["comparison"]["more_positive"] = model2_name result["comparison"]["more_negative"] = model1_name result["comparison"]["difference_direction"] = f"{model2_name} is more positive than {model1_name}" else: result["comparison"]["equal_sentiment"] = True result["comparison"]["difference_direction"] = f"{model1_name} and {model2_name} have similar sentiment" return result