import nltk from nltk.sentiment import SentimentIntensityAnalyzer import statistics import re def download_nltk_resources(): """Download required NLTK resources if not already downloaded""" try: nltk.download('vader_lexicon', quiet=True) except: pass # Ensure NLTK resources are available download_nltk_resources() def classify_formality(text): """ Classify text formality based on simple heuristics Args: text (str): Text to analyze Returns: str: Formality level (Formal, Neutral, or Informal) """ # Simple formality indicators formal_indicators = [ r'\b(therefore|thus|consequently|furthermore|moreover|however)\b', r'\b(in accordance with|with respect to|regarding|concerning)\b', r'\b(shall|must|may|will be required to)\b', r'\b(it is|there are|there is)\b', r'\b(Mr\.|Ms\.|Dr\.|Prof\.)\b' ] informal_indicators = [ r'\b(like|yeah|cool|awesome|gonna|wanna|gotta)\b', r'(\!{2,}|\?{2,})', r'\b(lol|haha|wow|omg|btw)\b', r'\b(don\'t|can\'t|won\'t|shouldn\'t)\b', r'(\.{3,})' ] # Calculate scores formal_score = sum([len(re.findall(pattern, text, re.IGNORECASE)) for pattern in formal_indicators]) informal_score = sum([len(re.findall(pattern, text, re.IGNORECASE)) for pattern in informal_indicators]) # Normalize by text length words = len(text.split()) if words > 0: formal_score = formal_score / (words / 100) # per 100 words informal_score = informal_score / (words / 100) # per 100 words # Determine formality if formal_score > informal_score * 1.5: return "Formal" elif informal_score > formal_score * 1.5: return "Informal" else: return "Neutral" def classify_sentiment(text): """ Classify text sentiment using NLTK's VADER Args: text (str): Text to analyze Returns: str: Sentiment (Positive, Neutral, or Negative) """ try: sia = SentimentIntensityAnalyzer() sentiment = sia.polarity_scores(text) if sentiment['compound'] >= 0.05: return "Positive" elif sentiment['compound'] <= -0.05: return "Negative" else: return "Neutral" except: return "Neutral" def classify_complexity(text): """ Classify text complexity based on sentence length and word length Args: text (str): Text to analyze Returns: str: Complexity level (Simple, Average, or Complex) """ # Split into sentences sentences = nltk.sent_tokenize(text) if not sentences: return "Average" # Calculate average sentence length sentence_lengths = [len(s.split()) for s in sentences] avg_sentence_length = statistics.mean(sentence_lengths) if sentence_lengths else 0 # Calculate average word length words = [word for sentence in sentences for word in nltk.word_tokenize(sentence) if word.isalnum()] # only consider alphanumeric tokens avg_word_length = statistics.mean([len(word) for word in words]) if words else 0 # Determine complexity if avg_sentence_length > 20 or avg_word_length > 6: return "Complex" elif avg_sentence_length < 12 or avg_word_length < 4: return "Simple" else: return "Average" def compare_classifications(text1, text2): """ Compare classifications between two texts Args: text1 (str): First text text2 (str): Second text Returns: dict: Comparison results """ formality1 = classify_formality(text1) formality2 = classify_formality(text2) sentiment1 = classify_sentiment(text1) sentiment2 = classify_sentiment(text2) complexity1 = classify_complexity(text1) complexity2 = classify_complexity(text2) results = {} if formality1 != formality2: results["Formality"] = f"Model 1 is {formality1.lower()}, while Model 2 is {formality2.lower()}" if sentiment1 != sentiment2: results["Sentiment"] = f"Model 1 has a {sentiment1.lower()} tone, while Model 2 has a {sentiment2.lower()} tone" if complexity1 != complexity2: results["Complexity"] = f"Model 1 uses {complexity1.lower()} language, while Model 2 uses {complexity2.lower()} language" if not results: results["Summary"] = "Both responses have similar writing characteristics" return results