First_agent_template / tools /text_analysis.py
strickvl's picture
Add text analysis tool and update project dependencies
4e257c7 unverified
raw
history blame
3.27 kB
from typing import Dict, List, Union
import logging
from textblob import TextBlob
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from smolagents import tool
# Set up logging
logger = logging.getLogger(__name__)
# Download required NLTK data
try:
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
except Exception as e:
logger.error(f"Failed to download NLTK data: {e}")
@tool
def analyze_text(text: str) -> Dict[str, Union[str, float, List[str]]]:
"""Performs comprehensive text analysis including sentiment, readability, and key phrases.
Args:
text: The input text to analyze
Returns:
Dict containing analysis results including:
- sentiment: Dict with polarity and subjectivity scores
- key_phrases: List of important noun phrases
- readability: Basic readability metrics
- summary: Brief statistical summary
"""
try:
# Create TextBlob object
blob = TextBlob(text)
# Sentiment analysis
sentiment = {
"polarity": round(blob.sentiment.polarity, 2),
"subjectivity": round(blob.sentiment.subjectivity, 2),
"sentiment_label": "positive" if blob.sentiment.polarity > 0 else "negative" if blob.sentiment.polarity < 0 else "neutral"
}
# Extract key phrases (noun phrases)
key_phrases = list(set([str(phrase) for phrase in blob.noun_phrases]))[:5]
# Basic text statistics
sentences = sent_tokenize(text)
words = word_tokenize(text)
words_no_stop = [word.lower() for word in words
if word.lower() not in stopwords.words('english')
and word.isalnum()]
# Calculate readability (basic metric based on sentence and word length)
avg_sentence_length = len(words) / len(sentences)
avg_word_length = sum(len(word) for word in words_no_stop) / len(words_no_stop)
readability_score = round((avg_sentence_length * 0.39) + (avg_word_length * 11.8) - 15.59, 1)
# Prepare response
analysis_result = {
"sentiment": sentiment,
"key_phrases": key_phrases,
"readability": {
"score": readability_score,
"avg_sentence_length": round(avg_sentence_length, 1),
"avg_word_length": round(avg_word_length, 1)
},
"summary": {
"sentence_count": len(sentences),
"word_count": len(words),
"unique_words": len(set(words_no_stop))
}
}
return analysis_result
except Exception as e:
logger.error(f"Error in text analysis: {e}")
return {
"error": f"Analysis failed: {str(e)}",
"sentiment": {"polarity": 0, "subjectivity": 0, "sentiment_label": "error"},
"key_phrases": [],
"readability": {"score": 0, "avg_sentence_length": 0, "avg_word_length": 0},
"summary": {"sentence_count": 0, "word_count": 0, "unique_words": 0}
}