Spaces:

RyanS974
/

525GradioApp

Sleeping

File size: 4,612 Bytes

c435293

import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import statistics
import re

def download_nltk_resources():
    """Download required NLTK resources if not already downloaded"""
    try:
        nltk.download('vader_lexicon', quiet=True)
    except:
        pass

# Ensure NLTK resources are available
download_nltk_resources()

def classify_formality(text):
    """
    Classify text formality based on simple heuristics
    
    Args:
        text (str): Text to analyze
        
    Returns:
        str: Formality level (Formal, Neutral, or Informal)
    """
    # Simple formality indicators
    formal_indicators = [
        r'\b(therefore|thus|consequently|furthermore|moreover|however)\b',
        r'\b(in accordance with|with respect to|regarding|concerning)\b',
        r'\b(shall|must|may|will be required to)\b',
        r'\b(it is|there are|there is)\b',
        r'\b(Mr\.|Ms\.|Dr\.|Prof\.)\b'
    ]
    
    informal_indicators = [
        r'\b(like|yeah|cool|awesome|gonna|wanna|gotta)\b',
        r'(\!{2,}|\?{2,})',
        r'\b(lol|haha|wow|omg|btw)\b',
        r'\b(don\'t|can\'t|won\'t|shouldn\'t)\b',
        r'(\.{3,})'
    ]
    
    # Calculate scores
    formal_score = sum([len(re.findall(pattern, text, re.IGNORECASE)) for pattern in formal_indicators])
    informal_score = sum([len(re.findall(pattern, text, re.IGNORECASE)) for pattern in informal_indicators])
    
    # Normalize by text length
    words = len(text.split())
    if words > 0:
        formal_score = formal_score / (words / 100)  # per 100 words
        informal_score = informal_score / (words / 100)  # per 100 words
    
    # Determine formality
    if formal_score > informal_score * 1.5:
        return "Formal"
    elif informal_score > formal_score * 1.5:
        return "Informal"
    else:
        return "Neutral"

def classify_sentiment(text):
    """
    Classify text sentiment using NLTK's VADER
    
    Args:
        text (str): Text to analyze
        
    Returns:
        str: Sentiment (Positive, Neutral, or Negative)
    """
    try:
        sia = SentimentIntensityAnalyzer()
        sentiment = sia.polarity_scores(text)
        
        if sentiment['compound'] >= 0.05:
            return "Positive"
        elif sentiment['compound'] <= -0.05:
            return "Negative"
        else:
            return "Neutral"
    except:
        return "Neutral"

def classify_complexity(text):
    """
    Classify text complexity based on sentence length and word length
    
    Args:
        text (str): Text to analyze
        
    Returns:
        str: Complexity level (Simple, Average, or Complex)
    """
    # Split into sentences
    sentences = nltk.sent_tokenize(text)
    
    if not sentences:
        return "Average"
    
    # Calculate average sentence length
    sentence_lengths = [len(s.split()) for s in sentences]
    avg_sentence_length = statistics.mean(sentence_lengths) if sentence_lengths else 0
    
    # Calculate average word length
    words = [word for sentence in sentences for word in nltk.word_tokenize(sentence) 
             if word.isalnum()]  # only consider alphanumeric tokens
    
    avg_word_length = statistics.mean([len(word) for word in words]) if words else 0
    
    # Determine complexity
    if avg_sentence_length > 20 or avg_word_length > 6:
        return "Complex"
    elif avg_sentence_length < 12 or avg_word_length < 4:
        return "Simple"
    else:
        return "Average"

def compare_classifications(text1, text2):
    """
    Compare classifications between two texts
    
    Args:
        text1 (str): First text
        text2 (str): Second text
        
    Returns:
        dict: Comparison results
    """
    formality1 = classify_formality(text1)
    formality2 = classify_formality(text2)
    
    sentiment1 = classify_sentiment(text1)
    sentiment2 = classify_sentiment(text2)
    
    complexity1 = classify_complexity(text1)
    complexity2 = classify_complexity(text2)
    
    results = {}
    
    if formality1 != formality2:
        results["Formality"] = f"Model 1 is {formality1.lower()}, while Model 2 is {formality2.lower()}"
    
    if sentiment1 != sentiment2:
        results["Sentiment"] = f"Model 1 has a {sentiment1.lower()} tone, while Model 2 has a {sentiment2.lower()} tone"
    
    if complexity1 != complexity2:
        results["Complexity"] = f"Model 1 uses {complexity1.lower()} language, while Model 2 uses {complexity2.lower()} language"
    
    if not results:
        results["Summary"] = "Both responses have similar writing characteristics"
    
    return results