Spaces:
Runtime error
Runtime error
Add text analysis tool and update project dependencies
Browse files- Introduced new text_analysis.py tool with comprehensive text analysis capabilities
- Updated requirements.txt with new dependencies for text analysis and project maintenance
- Integrated analyze_text tool into the CodeAgent in app.py
- app.py +2 -1
- requirements.txt +24 -5
- tools/text_analysis.py +86 -0
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import requests
|
|
| 4 |
import pytz
|
| 5 |
import yaml
|
| 6 |
from tools.final_answer import FinalAnswerTool
|
|
|
|
| 7 |
|
| 8 |
from Gradio_UI import GradioUI
|
| 9 |
|
|
@@ -55,7 +56,7 @@ with open("prompts.yaml", "r") as stream:
|
|
| 55 |
|
| 56 |
agent = CodeAgent(
|
| 57 |
model=model,
|
| 58 |
-
tools=[final_answer],
|
| 59 |
max_steps=6,
|
| 60 |
verbosity_level=1,
|
| 61 |
grammar=None,
|
|
|
|
| 4 |
import pytz
|
| 5 |
import yaml
|
| 6 |
from tools.final_answer import FinalAnswerTool
|
| 7 |
+
from tools.text_analysis import analyze_text
|
| 8 |
|
| 9 |
from Gradio_UI import GradioUI
|
| 10 |
|
|
|
|
| 56 |
|
| 57 |
agent = CodeAgent(
|
| 58 |
model=model,
|
| 59 |
+
tools=[final_answer, analyze_text], # Added analyze_text tool
|
| 60 |
max_steps=6,
|
| 61 |
verbosity_level=1,
|
| 62 |
grammar=None,
|
requirements.txt
CHANGED
|
@@ -1,5 +1,24 @@
|
|
| 1 |
-
|
| 2 |
-
smolagents
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core dependencies
|
| 2 |
+
smolagents>=0.1.0
|
| 3 |
+
gradio>=4.0.0
|
| 4 |
+
pyyaml>=6.0.1
|
| 5 |
+
pytz>=2024.1
|
| 6 |
+
requests>=2.31.0
|
| 7 |
+
|
| 8 |
+
# For text analysis and NLP
|
| 9 |
+
textblob>=0.17.1
|
| 10 |
+
nltk>=3.8.1
|
| 11 |
+
|
| 12 |
+
# For web scraping and URL processing
|
| 13 |
+
beautifulsoup4>=4.12.3
|
| 14 |
+
validators>=0.22.0
|
| 15 |
+
|
| 16 |
+
# For data visualization
|
| 17 |
+
matplotlib>=3.8.2
|
| 18 |
+
numpy>=1.26.4
|
| 19 |
+
|
| 20 |
+
# Development dependencies
|
| 21 |
+
pytest>=8.0.0
|
| 22 |
+
black>=24.1.1
|
| 23 |
+
isort>=5.13.2
|
| 24 |
+
mypy>=1.8.0
|
tools/text_analysis.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, List, Union
|
| 2 |
+
import logging
|
| 3 |
+
from textblob import TextBlob
|
| 4 |
+
import nltk
|
| 5 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 6 |
+
from nltk.corpus import stopwords
|
| 7 |
+
from smolagents import tool
|
| 8 |
+
|
| 9 |
+
# Set up logging
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
# Download required NLTK data
|
| 13 |
+
try:
|
| 14 |
+
nltk.download('punkt', quiet=True)
|
| 15 |
+
nltk.download('stopwords', quiet=True)
|
| 16 |
+
nltk.download('averaged_perceptron_tagger', quiet=True)
|
| 17 |
+
except Exception as e:
|
| 18 |
+
logger.error(f"Failed to download NLTK data: {e}")
|
| 19 |
+
|
| 20 |
+
@tool
|
| 21 |
+
def analyze_text(text: str) -> Dict[str, Union[str, float, List[str]]]:
|
| 22 |
+
"""Performs comprehensive text analysis including sentiment, readability, and key phrases.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
text: The input text to analyze
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
Dict containing analysis results including:
|
| 29 |
+
- sentiment: Dict with polarity and subjectivity scores
|
| 30 |
+
- key_phrases: List of important noun phrases
|
| 31 |
+
- readability: Basic readability metrics
|
| 32 |
+
- summary: Brief statistical summary
|
| 33 |
+
"""
|
| 34 |
+
try:
|
| 35 |
+
# Create TextBlob object
|
| 36 |
+
blob = TextBlob(text)
|
| 37 |
+
|
| 38 |
+
# Sentiment analysis
|
| 39 |
+
sentiment = {
|
| 40 |
+
"polarity": round(blob.sentiment.polarity, 2),
|
| 41 |
+
"subjectivity": round(blob.sentiment.subjectivity, 2),
|
| 42 |
+
"sentiment_label": "positive" if blob.sentiment.polarity > 0 else "negative" if blob.sentiment.polarity < 0 else "neutral"
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
# Extract key phrases (noun phrases)
|
| 46 |
+
key_phrases = list(set([str(phrase) for phrase in blob.noun_phrases]))[:5]
|
| 47 |
+
|
| 48 |
+
# Basic text statistics
|
| 49 |
+
sentences = sent_tokenize(text)
|
| 50 |
+
words = word_tokenize(text)
|
| 51 |
+
words_no_stop = [word.lower() for word in words
|
| 52 |
+
if word.lower() not in stopwords.words('english')
|
| 53 |
+
and word.isalnum()]
|
| 54 |
+
|
| 55 |
+
# Calculate readability (basic metric based on sentence and word length)
|
| 56 |
+
avg_sentence_length = len(words) / len(sentences)
|
| 57 |
+
avg_word_length = sum(len(word) for word in words_no_stop) / len(words_no_stop)
|
| 58 |
+
readability_score = round((avg_sentence_length * 0.39) + (avg_word_length * 11.8) - 15.59, 1)
|
| 59 |
+
|
| 60 |
+
# Prepare response
|
| 61 |
+
analysis_result = {
|
| 62 |
+
"sentiment": sentiment,
|
| 63 |
+
"key_phrases": key_phrases,
|
| 64 |
+
"readability": {
|
| 65 |
+
"score": readability_score,
|
| 66 |
+
"avg_sentence_length": round(avg_sentence_length, 1),
|
| 67 |
+
"avg_word_length": round(avg_word_length, 1)
|
| 68 |
+
},
|
| 69 |
+
"summary": {
|
| 70 |
+
"sentence_count": len(sentences),
|
| 71 |
+
"word_count": len(words),
|
| 72 |
+
"unique_words": len(set(words_no_stop))
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
return analysis_result
|
| 77 |
+
|
| 78 |
+
except Exception as e:
|
| 79 |
+
logger.error(f"Error in text analysis: {e}")
|
| 80 |
+
return {
|
| 81 |
+
"error": f"Analysis failed: {str(e)}",
|
| 82 |
+
"sentiment": {"polarity": 0, "subjectivity": 0, "sentiment_label": "error"},
|
| 83 |
+
"key_phrases": [],
|
| 84 |
+
"readability": {"score": 0, "avg_sentence_length": 0, "avg_word_length": 0},
|
| 85 |
+
"summary": {"sentence_count": 0, "word_count": 0, "unique_words": 0}
|
| 86 |
+
}
|