Spaces:
Sleeping
Sleeping
import nltk | |
from nltk.sentiment import SentimentIntensityAnalyzer | |
import statistics | |
import re | |
def download_nltk_resources(): | |
"""Download required NLTK resources if not already downloaded""" | |
try: | |
nltk.download('vader_lexicon', quiet=True) | |
except: | |
pass | |
# Ensure NLTK resources are available | |
download_nltk_resources() | |
def classify_formality(text): | |
""" | |
Classify text formality based on simple heuristics | |
Args: | |
text (str): Text to analyze | |
Returns: | |
str: Formality level (Formal, Neutral, or Informal) | |
""" | |
# Simple formality indicators | |
formal_indicators = [ | |
r'\b(therefore|thus|consequently|furthermore|moreover|however)\b', | |
r'\b(in accordance with|with respect to|regarding|concerning)\b', | |
r'\b(shall|must|may|will be required to)\b', | |
r'\b(it is|there are|there is)\b', | |
r'\b(Mr\.|Ms\.|Dr\.|Prof\.)\b' | |
] | |
informal_indicators = [ | |
r'\b(like|yeah|cool|awesome|gonna|wanna|gotta)\b', | |
r'(\!{2,}|\?{2,})', | |
r'\b(lol|haha|wow|omg|btw)\b', | |
r'\b(don\'t|can\'t|won\'t|shouldn\'t)\b', | |
r'(\.{3,})' | |
] | |
# Calculate scores | |
formal_score = sum([len(re.findall(pattern, text, re.IGNORECASE)) for pattern in formal_indicators]) | |
informal_score = sum([len(re.findall(pattern, text, re.IGNORECASE)) for pattern in informal_indicators]) | |
# Normalize by text length | |
words = len(text.split()) | |
if words > 0: | |
formal_score = formal_score / (words / 100) # per 100 words | |
informal_score = informal_score / (words / 100) # per 100 words | |
# Determine formality | |
if formal_score > informal_score * 1.5: | |
return "Formal" | |
elif informal_score > formal_score * 1.5: | |
return "Informal" | |
else: | |
return "Neutral" | |
def classify_sentiment(text): | |
""" | |
Classify text sentiment using NLTK's VADER | |
Args: | |
text (str): Text to analyze | |
Returns: | |
str: Sentiment (Positive, Neutral, or Negative) | |
""" | |
try: | |
sia = SentimentIntensityAnalyzer() | |
sentiment = sia.polarity_scores(text) | |
if sentiment['compound'] >= 0.05: | |
return "Positive" | |
elif sentiment['compound'] <= -0.05: | |
return "Negative" | |
else: | |
return "Neutral" | |
except: | |
return "Neutral" | |
def classify_complexity(text): | |
""" | |
Classify text complexity based on sentence length and word length | |
Args: | |
text (str): Text to analyze | |
Returns: | |
str: Complexity level (Simple, Average, or Complex) | |
""" | |
# Split into sentences | |
sentences = nltk.sent_tokenize(text) | |
if not sentences: | |
return "Average" | |
# Calculate average sentence length | |
sentence_lengths = [len(s.split()) for s in sentences] | |
avg_sentence_length = statistics.mean(sentence_lengths) if sentence_lengths else 0 | |
# Calculate average word length | |
words = [word for sentence in sentences for word in nltk.word_tokenize(sentence) | |
if word.isalnum()] # only consider alphanumeric tokens | |
avg_word_length = statistics.mean([len(word) for word in words]) if words else 0 | |
# Determine complexity | |
if avg_sentence_length > 20 or avg_word_length > 6: | |
return "Complex" | |
elif avg_sentence_length < 12 or avg_word_length < 4: | |
return "Simple" | |
else: | |
return "Average" | |
def compare_classifications(text1, text2): | |
""" | |
Compare classifications between two texts | |
Args: | |
text1 (str): First text | |
text2 (str): Second text | |
Returns: | |
dict: Comparison results | |
""" | |
formality1 = classify_formality(text1) | |
formality2 = classify_formality(text2) | |
sentiment1 = classify_sentiment(text1) | |
sentiment2 = classify_sentiment(text2) | |
complexity1 = classify_complexity(text1) | |
complexity2 = classify_complexity(text2) | |
results = {} | |
if formality1 != formality2: | |
results["Formality"] = f"Model 1 is {formality1.lower()}, while Model 2 is {formality2.lower()}" | |
if sentiment1 != sentiment2: | |
results["Sentiment"] = f"Model 1 has a {sentiment1.lower()} tone, while Model 2 has a {sentiment2.lower()} tone" | |
if complexity1 != complexity2: | |
results["Complexity"] = f"Model 1 uses {complexity1.lower()} language, while Model 2 uses {complexity2.lower()} language" | |
if not results: | |
results["Summary"] = "Both responses have similar writing characteristics" | |
return results |