"""
RoBERTa-based sentiment analysis for comparing LLM responses
"""
import torch
import numpy as np # ended up not using, but left in case I need it later.
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import nltk
from nltk.tokenize import sent_tokenize

# Global variables to store models once loaded
ROBERTA_TOKENIZER = None
ROBERTA_MODEL = None

def ensure_nltk_resources():
    """Make sure necessary NLTK resources are downloaded"""
    try:
        nltk.data.find('tokenizers/punkt')
    except LookupError:
        nltk.download('punkt', quiet=True)

def load_roberta_model():
    """
    Load the RoBERTa model and tokenizer for sentiment analysis
    
    Returns:
        tuple: (tokenizer, model) for RoBERTa sentiment analysis
    """
    global ROBERTA_TOKENIZER, ROBERTA_MODEL
    
    # Return cached model if already loaded
    if ROBERTA_TOKENIZER is not None and ROBERTA_MODEL is not None:
        return ROBERTA_TOKENIZER, ROBERTA_MODEL
    
    print("Loading RoBERTa model and tokenizer...")
    
    try:
        # Load tokenizer and model for sentiment analysis
        ROBERTA_TOKENIZER = RobertaTokenizer.from_pretrained('roberta-base')
        ROBERTA_MODEL = RobertaForSequenceClassification.from_pretrained('roberta-large-mnli')
        
        return ROBERTA_TOKENIZER, ROBERTA_MODEL
    except Exception as e:
        print(f"Error loading RoBERTa model: {str(e)}")
        # Return None values if loading fails
        return None, None

def analyze_sentiment_roberta(text):
    """
    Analyze sentiment using RoBERTa model
    
    Args:
        text (str): Text to analyze
        
    Returns:
        dict: Sentiment analysis results with label and scores
    """
    ensure_nltk_resources()
    
    # Handle empty text
    if not text or not text.strip():
        return {
            "label": "neutral",
            "scores": {
                "contradiction": 0.33,
                "neutral": 0.34,
                "entailment": 0.33
            },
            "sentiment_score": 0.0,
            "sentence_scores": []
        }
    
    # Load model
    tokenizer, model = load_roberta_model()
    if tokenizer is None or model is None:
        return {
            "error": "Failed to load RoBERTa model",
            "label": "neutral",
            "scores": {
                "contradiction": 0.33,
                "neutral": 0.34,
                "entailment": 0.33
            },
            "sentiment_score": 0.0
        }
    
    try:
        # Set device
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model.to(device)
        
        # Process the whole text
        encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
        encoded_text = {k: v.to(device) for k, v in encoded_text.items()}
        
        with torch.no_grad():
            outputs = model(**encoded_text)
            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
            
        # Get prediction
        contradiction_score = predictions[0, 0].item()
        neutral_score = predictions[0, 1].item()
        entailment_score = predictions[0, 2].item()
        
        # Map to sentiment
        # contradiction = negative, entailment = positive, with a scale
        sentiment_score = (entailment_score - contradiction_score) * 2  # Scale from -2 to 2
        
        # Determine sentiment label
        if sentiment_score > 0.5:
            label = "positive"
        elif sentiment_score < -0.5:
            label = "negative"
        else:
            label = "neutral"
        
        # Analyze individual sentences if text is long enough
        sentences = sent_tokenize(text)
        sentence_scores = []
        
        # Only process sentences if there are more than one and text is substantial
        if len(sentences) > 1 and len(text) > 100:
            for sentence in sentences:
                if len(sentence.split()) >= 3:  # Only analyze meaningful sentences
                    encoded_sentence = tokenizer(sentence, return_tensors='pt', truncation=True)
                    encoded_sentence = {k: v.to(device) for k, v in encoded_sentence.items()}
                    
                    with torch.no_grad():
                        outputs = model(**encoded_sentence)
                        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
                    
                    # Calculate sentence sentiment score
                    sent_contradiction = predictions[0, 0].item()
                    sent_neutral = predictions[0, 1].item()
                    sent_entailment = predictions[0, 2].item()
                    sent_score = (sent_entailment - sent_contradiction) * 2
                    
                    # Determine sentiment label for this sentence
                    if sent_score > 0.5:
                        sent_label = "positive"
                    elif sent_score < -0.5:
                        sent_label = "negative"
                    else:
                        sent_label = "neutral"
                    
                    sentence_scores.append({
                        "text": sentence,
                        "score": sent_score,
                        "label": sent_label,
                        "scores": {
                            "contradiction": sent_contradiction,
                            "neutral": sent_neutral,
                            "entailment": sent_entailment
                        }
                    })
        
        return {
            "label": label,
            "scores": {
                "contradiction": contradiction_score,
                "neutral": neutral_score,
                "entailment": entailment_score
            },
            "sentiment_score": sentiment_score,
            "sentence_scores": sentence_scores
        }
    
    except Exception as e:
        import traceback
        print(f"Error analyzing sentiment with RoBERTa: {str(e)}")
        print(traceback.format_exc())
        
        return {
            "error": str(e),
            "label": "neutral",
            "scores": {
                "contradiction": 0.33,
                "neutral": 0.34,
                "entailment": 0.33
            },
            "sentiment_score": 0.0
        }

def compare_sentiment_roberta(texts, model_names=None):
    """
    Compare sentiment between two texts using RoBERTa
    """
    print(f"Starting sentiment comparison for {len(texts)} texts")    

    # Set default model names if not provided
    if model_names is None or len(model_names) < 2:
        model_names = ["Model 1", "Model 2"]
    
    # Handle case with fewer than 2 texts
    if len(texts) < 2:
        return {
            "error": "Need at least 2 texts to compare",
            "models": model_names[:len(texts)]
        }
    
    # Get sentiment analysis for each text
    sentiment_results = []
    for text in texts:
        sentiment_results.append(analyze_sentiment_roberta(text))
    
    # Create result dictionary
    result = {
        "models": model_names[:len(texts)],
        "sentiment_analysis": {}
    }
    
    # Add individual model results
    for i, model_name in enumerate(model_names[:len(texts)]):
        result["sentiment_analysis"][model_name] = sentiment_results[i]
    
    # Compare sentiment scores
    if len(sentiment_results) >= 2:
        model1_name, model2_name = model_names[0], model_names[1]
        
        # Add null checks for the sentiment results
        score1 = 0
        score2 = 0
        
        if sentiment_results[0] and "sentiment_score" in sentiment_results[0]:
            score1 = sentiment_results[0]["sentiment_score"]
        
        if sentiment_results[1] and "sentiment_score" in sentiment_results[1]:
            score2 = sentiment_results[1]["sentiment_score"]
        
        # Calculate difference and determine which is more positive/negative
        difference = abs(score1 - score2)
        
        result["comparison"] = {
            "sentiment_difference": difference,
            "significant_difference": difference > 0.5,  # Threshold for significant difference
        }
        
        if score1 > score2:
            result["comparison"]["more_positive"] = model1_name
            result["comparison"]["more_negative"] = model2_name
            result["comparison"]["difference_direction"] = f"{model1_name} is more positive than {model2_name}"
        elif score2 > score1:
            result["comparison"]["more_positive"] = model2_name
            result["comparison"]["more_negative"] = model1_name
            result["comparison"]["difference_direction"] = f"{model2_name} is more positive than {model1_name}"
        else:
            result["comparison"]["equal_sentiment"] = True
            result["comparison"]["difference_direction"] = f"{model1_name} and {model2_name} have similar sentiment"
    
    return result