diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -7,43 +7,18 @@ from plotly.subplots import make_subplots
import numpy as np
from wordcloud import WordCloud
from collections import Counter, defaultdict
-import re
-import json
-import csv
-import io
-import tempfile
-from datetime import datetime
-import logging
-from functools import lru_cache
from dataclasses import dataclass
-from typing import List, Dict, Optional, Tuple
+from typing import List, Dict, Optional, Tuple, Any, Callable
+from contextlib import contextmanager
import nltk
from nltk.corpus import stopwords
import langdetect
import pandas as pd
-
-# Try to import SHAP and LIME, fall back to basic analysis if not available
-try:
- import shap
- SHAP_AVAILABLE = True
-except ImportError:
- SHAP_AVAILABLE = False
- logger.warning("SHAP not available, using basic analysis")
-
-try:
- from lime.lime_text import LimeTextExplainer
- LIME_AVAILABLE = True
-except ImportError:
- LIME_AVAILABLE = False
- logger.warning("LIME not available, using basic analysis")
+import gc
# Configuration
-@dataclass
-class Config:
- MAX_HISTORY_SIZE: int = 500
- BATCH_SIZE_LIMIT: int = 30
- MAX_TEXT_LENGTH: int = 512
- CACHE_SIZE: int = 64
+ CACHE_SIZE: int = 128
+ BATCH_PROCESSING_SIZE: int = 8
# Supported languages and models
SUPPORTED_LANGUAGES = {
@@ -60,9 +35,10 @@ class Config:
'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
'zh': "uer/roberta-base-finetuned-dianping-chinese"
+
}
- # Color themes
+ # Color themes for Plotly
THEMES = {
'default': {'pos': '#4CAF50', 'neg': '#F44336', 'neu': '#FF9800'},
'ocean': {'pos': '#0077BE', 'neg': '#FF6B35', 'neu': '#00BCD4'},
@@ -84,16 +60,59 @@ try:
except:
STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
+# Decorators and Context Managers
+def handle_errors(default_return=None):
+ """Centralized error handling decorator"""
+ return decorator
+
+@contextmanager
+def memory_cleanup():
+ """Context manager for memory cleanup"""
+
+ try:
+ yield
+ finally:
+
+ gc.collect()
+
+class ThemeContext:
+ self.theme = theme
+ self.colors = config.THEMES.get(theme, config.THEMES['default'])
+
+# Enhanced Model Manager with Multi-language Support
class ModelManager:
- """Manages multiple language models"""
+ """Multi-language model manager with lazy loading"""
+ _instance = None
+
+
+
+
+ def __new__(cls):
+ if cls._instance is None:
+ cls._instance = super().__new__(cls)
+ cls._instance._initialized = False
+ return cls._instance
+
def __init__(self):
- self.models = {}
- self.tokenizers = {}
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- self._load_default_model()
+ if not self._initialized:
+ self.models = {}
+ self.tokenizers = {}
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ self._load_default_models()
+ self._initialized = True
- def _load_default_model(self):
- """Load the default models"""
+ def _load_default_models(self):
+ """Load default models"""
+
+
+
+
+
+
+
+
+
+
try:
# Load multilingual model as default
model_name = config.MODELS['multilingual']
@@ -117,17 +136,13 @@ class ModelManager:
"""Get model for specific language"""
if language == 'zh':
return self.models['zh'], self.tokenizers['zh']
- elif language in ['en', 'auto'] or language not in config.SUPPORTED_LANGUAGES:
- return self.models['default'], self.tokenizers['default']
- return self.models['default'], self.tokenizers['default'] # Use multilingual for other languages
+ return self.models['default'], self.tokenizers['default']
@staticmethod
def detect_language(text: str) -> str:
- """Detect text language properly"""
+ """Detect text language"""
try:
- # Use langdetect for all languages
detected = langdetect.detect(text)
- # Map some common langdetect codes to our supported languages
language_mapping = {
'zh-cn': 'zh',
'zh-tw': 'zh'
@@ -137,32 +152,80 @@ class ModelManager:
except:
return 'en'
-model_manager = ModelManager()
+# Simplified Text Processing
+class TextProcessor:
+ """Optimized text processing with multi-language support"""
+
+ @staticmethod
+ @lru_cache(maxsize=config.CACHE_SIZE)
+ def clean_text(text: str, remove_punctuation: bool = True, remove_numbers: bool = False) -> str:
+ """Clean text with language awareness"""
+ text = text.strip()
+
+ # Don't clean Chinese text aggressively
+ if re.search(r'[\u4e00-\u9fff]', text):
+ return text
+
+ text = text.lower()
+
+ if remove_numbers:
+ text = re.sub(r'\d+', '', text)
+
+ if remove_punctuation:
+ text = re.sub(r'[^\w\s]', '', text)
+
+ words = text.split()
+ cleaned_words = [w for w in words if w not in STOP_WORDS and len(w) >= config.MIN_WORD_LENGTH]
+ return ' '.join(cleaned_words)
+
+ @staticmethod
+ def extract_keywords(text: str, top_k: int = 5) -> List[str]:
+ """Extract keywords with language support"""
+ if re.search(r'[\u4e00-\u9fff]', text):
+ # Chinese text processing
+ words = re.findall(r'[\u4e00-\u9fff]+', text)
+ all_chars = ''.join(words)
+ char_freq = Counter(all_chars)
+ return [char for char, _ in char_freq.most_common(top_k)]
+ else:
+ # Other languages
+ cleaned = TextProcessor.clean_text(text)
+ words = cleaned.split()
+ word_freq = Counter(words)
+ return [word for word, _ in word_freq.most_common(top_k)]
+
+ @staticmethod
+ def parse_batch_input(text: str) -> List[str]:
+ """Parse batch input from textarea"""
+ lines = text.strip().split('\n')
+ return [line.strip() for line in lines if line.strip()]
+# Enhanced History Manager
class HistoryManager:
- """Enhanced history manager with more features"""
+ """Enhanced history management with filtering"""
def __init__(self):
self._history = []
- def add_entry(self, entry: Dict):
+ def add(self, entry: Dict):
+ """Add entry with timestamp"""
+ entry['timestamp'] = datetime.now().isoformat()
self._history.append(entry)
if len(self._history) > config.MAX_HISTORY_SIZE:
self._history = self._history[-config.MAX_HISTORY_SIZE:]
- def add_batch_entries(self, entries: List[Dict]):
- """Add multiple entries at once"""
+ def add_batch(self, entries: List[Dict]):
+ """Add multiple entries"""
for entry in entries:
- self.add_entry(entry)
+ self.add(entry)
- def get_history(self) -> List[Dict]:
+ def get_all(self) -> List[Dict]:
return self._history.copy()
- def get_recent_history(self, n: int = 10) -> List[Dict]:
- """Get n most recent entries"""
+ def get_recent(self, n: int = 10) -> List[Dict]:
return self._history[-n:] if self._history else []
- def filter_history(self, sentiment: str = None, language: str = None,
- min_confidence: float = None) -> List[Dict]:
+ def filter_by(self, sentiment: str = None, language: str = None,
+ min_confidence: float = None) -> List[Dict]:
"""Filter history by criteria"""
filtered = self._history
@@ -178,9 +241,12 @@ class HistoryManager:
def clear(self) -> int:
count = len(self._history)
self._history.clear()
- return count
+
+ def size(self) -> int:
+ return len(self._history)
def get_stats(self) -> Dict:
+ """Get comprehensive statistics"""
if not self._history:
return {}
@@ -197,72 +263,109 @@ class HistoryManager:
'max_confidence': np.max(confidences),
'min_confidence': np.min(confidences),
'languages_detected': len(set(languages)),
- 'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en',
- 'avg_text_length': np.mean([len(item.get('full_text', '')) for item in self._history])
+ 'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en'
}
-history_manager = HistoryManager()
-
-class TextProcessor:
- """Enhanced text processing"""
+# Core Sentiment Analysis Engine
+class SentimentEngine:
+ """Multi-language sentiment analysis engine"""
- @staticmethod
- @lru_cache(maxsize=config.CACHE_SIZE)
- def clean_text(text: str, remove_punctuation: bool = True, remove_numbers: bool = False) -> str:
- """Clean text with options"""
- text = text.lower().strip()
-
- if remove_numbers:
- text = re.sub(r'\d+', '', text)
-
- if remove_punctuation:
- text = re.sub(r'[^\w\s]', '', text)
-
- words = text.split()
- cleaned_words = [w for w in words if w not in STOP_WORDS and len(w) > 2]
- return ' '.join(cleaned_words)
+ def __init__(self):
+ self.model_manager = ModelManager()
- @staticmethod
- def extract_keywords(text: str, top_k: int = 5) -> List[str]:
- """Extract key words from text"""
- # For Chinese text, extract characters
- if re.search(r'[\u4e00-\u9fff]', text):
- words = re.findall(r'[\u4e00-\u9fff]+', text)
- all_chars = ''.join(words)
- char_freq = Counter(all_chars)
- return [char for char, _ in char_freq.most_common(top_k)]
- else:
- # For other languages, use word-based extraction
- cleaned = TextProcessor.clean_text(text)
- words = cleaned.split()
- word_freq = Counter(words)
- return [word for word, _ in word_freq.most_common(top_k)]
+ def extract_attention_keywords(self, text: str, language: str = 'auto', top_k: int = 10) -> List[Tuple[str, float]]:
+ """Extract keywords using attention weights"""
+ try:
+ if language == 'auto':
+ language = self.model_manager.detect_language(text)
+
+ model, tokenizer = self.model_manager.get_model(language)
+
+ inputs = tokenizer(
+ text, return_tensors="pt", padding=True,
+ truncation=True, max_length=config.MAX_TEXT_LENGTH
+ ).to(self.model_manager.device)
+
+
+ with torch.no_grad():
+ outputs = model(**inputs, output_attentions=True)
+
+
+ if hasattr(outputs, 'attentions') and outputs.attentions:
+ # Use attention weights
+ attention = outputs.attentions[-1]
+ avg_attention = attention.mean(dim=1)[0, 0, :]
+
+ tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
+ attention_scores = avg_attention.cpu().numpy()
+
+ # Process tokens and scores
+ word_scores = {}
+ current_word = ""
+ current_score = 0.0
+
+ for token, score in zip(tokens, attention_scores):
+ if token in ['[CLS]', '[SEP]', '[PAD]', '', '']:
+ continue
+
+ if token.startswith('##') or token.startswith('▁'):
+ current_word += token.replace('##', '').replace('▁', '')
+ current_score = max(current_score, score)
+ else:
+ if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
+ word_scores[current_word.lower()] = current_score
+ current_word = token
+ current_score = score
+
+
+
+
+
+
+
+ if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
+ word_scores[current_word.lower()] = current_score
+
+ # Filter and sort
+ filtered_words = {
+ word: score for word, score in word_scores.items()
+ if word not in STOP_WORDS and len(word) >= config.MIN_WORD_LENGTH
+ }
+
+ sorted_words = sorted(filtered_words.items(), key=lambda x: x[1], reverse=True)
+ return sorted_words[:top_k]
+
+
- @staticmethod
- def parse_batch_input(text: str) -> List[str]:
- """Parse batch input from textarea"""
- lines = text.strip().split('\n')
- return [line.strip() for line in lines if line.strip()]
-class SentimentAnalyzer:
- """Enhanced sentiment analysis"""
+
+
+
+
+
+ except Exception as e:
+ logger.error(f"Attention keyword extraction failed: {e}")
+
+ # Fallback to simple keyword extraction
+ keywords = TextProcessor.extract_keywords(text, top_k)
+ return [(word, 0.1) for word in keywords]
- @staticmethod
- def analyze_text(text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
- """Analyze single text with language support"""
+ @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'keywords': []})
+ def analyze_single(self, text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
+ """Analyze single text with enhanced features"""
if not text.strip():
raise ValueError("Empty text provided")
- # Detect language if auto
+ # Detect language
if language == 'auto':
- detected_lang = model_manager.detect_language(text)
+ detected_lang = self.model_manager.detect_language(text)
else:
detected_lang = language
# Get appropriate model
- model, tokenizer = model_manager.get_model(detected_lang)
+ model, tokenizer = self.model_manager.get_model(detected_lang)
- # Preprocessing options - don't clean Chinese text
+ # Preprocessing
options = preprocessing_options or {}
processed_text = text
if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
@@ -272,324 +375,112 @@ class SentimentAnalyzer:
options.get('remove_numbers', False)
)
- try:
- # Tokenize and analyze
- inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
- truncation=True, max_length=config.MAX_TEXT_LENGTH).to(model_manager.device)
-
- with torch.no_grad():
- outputs = model(**inputs)
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
-
- # Handle different model outputs
- if len(probs) == 3: # negative, neutral, positive
- sentiment_idx = np.argmax(probs)
- sentiment_labels = ['Negative', 'Neutral', 'Positive']
- sentiment = sentiment_labels[sentiment_idx]
- confidence = float(probs[sentiment_idx])
-
- result = {
- 'sentiment': sentiment,
- 'confidence': confidence,
- 'neg_prob': float(probs[0]),
- 'neu_prob': float(probs[1]),
- 'pos_prob': float(probs[2]),
- 'has_neutral': True
- }
- else: # negative, positive
- pred = np.argmax(probs)
- sentiment = "Positive" if pred == 1 else "Negative"
- confidence = float(probs[pred])
-
- result = {
- 'sentiment': sentiment,
- 'confidence': confidence,
- 'neg_prob': float(probs[0]),
- 'pos_prob': float(probs[1]),
- 'neu_prob': 0.0,
- 'has_neutral': False
- }
-
- # Add metadata
- result.update({
- 'language': detected_lang,
- 'keywords': TextProcessor.extract_keywords(text),
- 'word_count': len(text.split()),
- 'char_count': len(text)
- })
-
- return result
-
- except Exception as e:
- logger.error(f"Analysis failed: {e}")
- raise
-
- @staticmethod
- def analyze_batch(texts: List[str], language: str = 'auto',
- preprocessing_options: Dict = None) -> List[Dict]:
- """Analyze multiple texts"""
- results = []
- for i, text in enumerate(texts):
- try:
- result = SentimentAnalyzer.analyze_text(text, language, preprocessing_options)
- result['batch_index'] = i
- results.append(result)
- except Exception as e:
- # Add error result
- results.append({
- 'sentiment': 'Error',
- 'confidence': 0.0,
- 'error': str(e),
- 'batch_index': i,
- 'text': text
- })
- return results
+ # Tokenize and analyze
+ inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
+ truncation=True, max_length=config.MAX_TEXT_LENGTH).to(self.model_manager.device)
-class ExplainabilityAnalyzer:
- """SHAP and LIME explainability analysis with fallbacks"""
-
- @staticmethod
- def create_prediction_function(model, tokenizer, device):
- """Create prediction function for LIME"""
- def predict_proba(texts):
- if isinstance(texts, str):
- texts = [texts]
-
- results = []
- for text in texts:
- try:
- inputs = tokenizer(text, return_tensors="pt", padding=True,
- truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
- with torch.no_grad():
- outputs = model(**inputs)
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
- results.append(probs)
- except Exception as e:
- # Return neutral probabilities on error
- if len(results) > 0:
- results.append(results[0]) # Use previous result
- else:
- results.append(np.array([0.33, 0.33, 0.34])) # Neutral fallback
-
- return np.array(results)
- return predict_proba
-
- @staticmethod
- def analyze_with_lime(text: str, model, tokenizer, device, num_features: int = 10) -> Dict:
- """Analyze text with LIME"""
- if not LIME_AVAILABLE:
- return {'method': 'LIME', 'error': 'LIME library not available'}
- try:
- # Create prediction function
- predict_fn = ExplainabilityAnalyzer.create_prediction_function(model, tokenizer, device)
-
- # Test prediction function first
- test_probs = predict_fn([text])
- if len(test_probs) == 0:
- return {'method': 'LIME', 'error': 'Prediction function failed'}
-
- # Determine class names based on model output
- num_classes = len(test_probs[0])
- if num_classes == 3:
- class_names = ['Negative', 'Neutral', 'Positive']
- else:
- class_names = ['Negative', 'Positive']
-
- # Initialize LIME explainer
- explainer = LimeTextExplainer(
- class_names=class_names,
- feature_selection='auto',
- split_expression=r'\W+',
- bow=False
- )
-
- # Generate explanation
- explanation = explainer.explain_instance(
- text,
- predict_fn,
- num_features=min(num_features, len(text.split())),
- num_samples=50 # Reduced for faster processing
- )
-
- # Extract feature importance
- feature_importance = explanation.as_list()
+ with torch.no_grad():
+ outputs = model(**inputs)
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
+
+ # Handle different model outputs
+ if len(probs) == 3: # negative, neutral, positive
+ sentiment_idx = np.argmax(probs)
+ sentiment_labels = ['Negative', 'Neutral', 'Positive']
+ sentiment = sentiment_labels[sentiment_idx]
+ confidence = float(probs[sentiment_idx])
- return {
- 'method': 'LIME',
- 'feature_importance': feature_importance,
- 'class_names': class_names
+ result = {
+ 'sentiment': sentiment,
+ 'confidence': confidence,
+ 'neg_prob': float(probs[0]),
+ 'neu_prob': float(probs[1]),
+ 'pos_prob': float(probs[2]),
+ 'has_neutral': True
}
+ else: # negative, positive
+ pred = np.argmax(probs)
+ sentiment = "Positive" if pred == 1 else "Negative"
+ confidence = float(probs[pred])
- except Exception as e:
- logger.error(f"LIME analysis failed: {e}")
- return {'method': 'LIME', 'error': str(e)}
-
- @staticmethod
- def analyze_with_attention(text: str, model, tokenizer, device) -> Dict:
- """Analyze text with attention weights - simplified version"""
- try:
- # Tokenize input
- inputs = tokenizer(text, return_tensors="pt", padding=True,
- truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
-
- # Get tokens for display
- tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
-
- # Simple attention simulation based on input importance
- # This is a fallback when model doesn't support attention output
- try:
- with torch.no_grad():
- outputs = model(**inputs, output_attentions=True)
- if hasattr(outputs, 'attentions') and outputs.attentions is not None:
- attentions = outputs.attentions
- # Average attention across layers and heads
- avg_attention = torch.mean(torch.stack(attentions), dim=(0, 1, 2)).cpu().numpy()
- else:
- raise AttributeError("No attention outputs")
- except:
- # Fallback: simulate attention based on token position and type
- avg_attention = np.random.uniform(0.1, 1.0, len(tokens))
- # Give higher attention to non-special tokens
- for i, token in enumerate(tokens):
- if token in ['[CLS]', '[SEP]', '', '', '']:
- avg_attention[i] *= 0.3
-
- # Create attention weights for each token
- attention_weights = []
- for i, token in enumerate(tokens):
- if i < len(avg_attention):
- # Clean token for display
- clean_token = token.replace('Ġ', '').replace('##', '')
- if clean_token.strip():
- attention_weights.append((clean_token, float(avg_attention[i])))
-
- return {
- 'method': 'Attention',
- 'tokens': [t[0] for t in attention_weights],
- 'attention_weights': attention_weights
+ result = {
+ 'sentiment': sentiment,
+ 'confidence': confidence,
+ 'neg_prob': float(probs[0]),
+ 'pos_prob': float(probs[1]),
+ 'neu_prob': 0.0,
+ 'has_neutral': False
}
-
- except Exception as e:
- logger.error(f"Attention analysis failed: {e}")
- return {'method': 'Attention', 'error': str(e)}
-
-class AdvancedVisualizer:
- """Visualizations for explainability analysis"""
-
- @staticmethod
- def create_lime_plot(lime_result: Dict, theme: str = 'default') -> go.Figure:
- """Create LIME feature importance plot"""
- if 'error' in lime_result:
- fig = go.Figure()
- fig.add_annotation(text=f"LIME Error: {lime_result['error']}",
- x=0.5, y=0.5, showarrow=False)
- return fig
-
- features, scores = zip(*lime_result['feature_importance'])
- colors = ['red' if score < 0 else 'green' for score in scores]
- fig = go.Figure(data=[
- go.Bar(
- y=features,
- x=scores,
- orientation='h',
- marker_color=colors,
- text=[f'{score:.3f}' for score in scores],
- textposition='auto'
- )
- ])
+ # Extract keywords
+ keywords = self.extract_attention_keywords(text, detected_lang)
- fig.update_layout(
- title="LIME Feature Importance",
- xaxis_title="Importance Score",
- yaxis_title="Features",
- height=400,
- showlegend=False
- )
+ # Add metadata
+ result.update({
+ 'language': detected_lang,
+ 'keywords': keywords,
+ 'word_count': len(text.split()),
+ 'char_count': len(text)
+ })
- return fig
+ return result
- @staticmethod
- def create_attention_plot(attention_result: Dict, theme: str = 'default') -> go.Figure:
- """Create attention weights visualization"""
- if 'error' in attention_result:
- fig = go.Figure()
- fig.add_annotation(
- text=f"Attention Error: {attention_result['error']}",
- x=0.5, y=0.5,
- xref="paper", yref="paper",
- showarrow=False,
- font=dict(size=14)
- )
- fig.update_layout(height=400, title="Attention Analysis Error")
- return fig
-
- if not attention_result.get('attention_weights'):
- fig = go.Figure()
- fig.add_annotation(
- text="No attention weights available",
- x=0.5, y=0.5,
- xref="paper", yref="paper",
- showarrow=False
- )
- fig.update_layout(height=400, title="No Attention Data")
- return fig
-
- tokens, weights = zip(*attention_result['attention_weights'])
-
- # Normalize weights for better visualization
- weights = np.array(weights)
- if weights.max() > weights.min():
- normalized_weights = (weights - weights.min()) / (weights.max() - weights.min())
- else:
- normalized_weights = weights
-
- # Limit display to top 15 tokens for readability
- if len(tokens) > 15:
- # Get top 15 by attention weight
- top_indices = np.argsort(weights)[-15:]
- tokens = [tokens[i] for i in top_indices]
- normalized_weights = normalized_weights[top_indices]
-
- fig = go.Figure(data=[
- go.Bar(
- x=list(range(len(tokens))),
- y=normalized_weights,
- text=tokens,
- textposition='outside',
- marker_color=normalized_weights,
- colorscale='Viridis',
- hovertemplate='%{text}
Weight: %{y:.3f}'
- )
- ])
+ @handle_errors(default_return=[])
+ def analyze_batch(self, texts: List[str], language: str = 'auto',
+ preprocessing_options: Dict = None, progress_callback=None) -> List[Dict]:
+ """Optimized batch processing"""
+ if len(texts) > config.BATCH_SIZE_LIMIT:
+ texts = texts[:config.BATCH_SIZE_LIMIT]
- fig.update_layout(
- title="Attention Weights (Top Tokens)",
- xaxis_title="Token Position",
- yaxis_title="Attention Weight (Normalized)",
- height=400,
- showlegend=False,
- xaxis=dict(tickmode='array', tickvals=list(range(len(tokens))), ticktext=tokens)
- )
+ if progress_callback:
+ progress_callback((i + len(batch)) / len(texts))
+
+ for text in batch:
+ try:
+ result = self.analyze_single(text, language, preprocessing_options)
+ result['batch_index'] = len(results)
+ result['text'] = text[:100] + '...' if len(text) > 100 else text
+ result['full_text'] = text
+ results.append(result)
+ except Exception as e:
+ results.append({
+ 'sentiment': 'Error',
+ 'confidence': 0.0,
+ 'error': str(e),
+ 'batch_index': len(results),
+ 'text': text[:100] + '...' if len(text) > 100 else text,
+ 'full_text': text
+ })
+
+
+
+
+
+
+
- return fig
- """Enhanced visualizations with Plotly"""
+ return results
+
+# Advanced Plotly Visualization System
+class PlotlyVisualizer:
+ """Enhanced Plotly visualizations"""
@staticmethod
- def create_sentiment_gauge(result: Dict, theme: str = 'default') -> go.Figure:
- """Create an animated sentiment gauge"""
- colors = config.THEMES[theme]
+ @handle_errors(default_return=None)
+ def create_sentiment_gauge(result: Dict, theme: ThemeContext) -> go.Figure:
+ """Create animated sentiment gauge"""
+ colors = theme.colors
- if result['has_neutral']:
+ if result.get('has_neutral', False):
# Three-way gauge
fig = go.Figure(go.Indicator(
- mode = "gauge+number+delta",
- value = result['pos_prob'] * 100,
- domain = {'x': [0, 1], 'y': [0, 1]},
- title = {'text': f"Sentiment: {result['sentiment']}"},
- delta = {'reference': 50},
- gauge = {
+ mode="gauge+number+delta",
+ value=result['pos_prob'] * 100,
+ domain={'x': [0, 1], 'y': [0, 1]},
+ title={'text': f"Sentiment: {result['sentiment']}"},
+ delta={'reference': 50},
+ gauge={
'axis': {'range': [None, 100]},
'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
'steps': [
@@ -607,11 +498,11 @@ class AdvancedVisualizer:
else:
# Two-way gauge
fig = go.Figure(go.Indicator(
- mode = "gauge+number",
- value = result['confidence'] * 100,
- domain = {'x': [0, 1], 'y': [0, 1]},
- title = {'text': f"Confidence: {result['sentiment']}"},
- gauge = {
+ mode="gauge+number",
+ value=result['confidence'] * 100,
+ domain={'x': [0, 1], 'y': [0, 1]},
+ title={'text': f"Confidence: {result['sentiment']}"},
+ gauge={
'axis': {'range': [None, 100]},
'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
'steps': [
@@ -625,11 +516,12 @@ class AdvancedVisualizer:
return fig
@staticmethod
- def create_probability_bars(result: Dict, theme: str = 'default') -> go.Figure:
+ @handle_errors(default_return=None)
+ def create_probability_bars(result: Dict, theme: ThemeContext) -> go.Figure:
"""Create probability bar chart"""
- colors = config.THEMES[theme]
+ colors = theme.colors
- if result['has_neutral']:
+ if result.get('has_neutral', False):
labels = ['Negative', 'Neutral', 'Positive']
values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
bar_colors = [colors['neg'], colors['neu'], colors['pos']]
@@ -639,10 +531,10 @@ class AdvancedVisualizer:
bar_colors = [colors['neg'], colors['pos']]
fig = go.Figure(data=[
- go.Bar(x=labels, y=values, marker_color=bar_colors, text=[f'{v:.3f}' for v in values])
+ go.Bar(x=labels, y=values, marker_color=bar_colors,
+ text=[f'{v:.3f}' for v in values], textposition='outside')
])
- fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.update_layout(
title="Sentiment Probabilities",
yaxis_title="Probability",
@@ -651,14 +543,71 @@ class AdvancedVisualizer:
)
return fig
+
+ @staticmethod
+ @handle_errors(default_return=None)
+ def create_keyword_chart(keywords: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> go.Figure:
+ """Create keyword importance chart"""
+ if not keywords:
+ fig = go.Figure()
+ fig.add_annotation(text="No keywords extracted",
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
+ fig.update_layout(height=400, title="Keywords")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ return fig
+
+ words = [word for word, score in keywords]
+ scores = [score for word, score in keywords]
+
+ color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg']
+
+ fig = go.Figure(data=[
+ go.Bar(
+ y=words,
+ x=scores,
+ orientation='h',
+ marker_color=color,
+ text=[f'{score:.3f}' for score in scores],
+ textposition='auto'
+ )
+ ])
+
+ fig.update_layout(
+ title=f"Top Keywords ({sentiment})",
+ xaxis_title="Attention Weight",
+ yaxis_title="Keywords",
+ height=400,
+ showlegend=False
+ )
+
+ return fig
+
@staticmethod
- def create_batch_summary(results: List[Dict], theme: str = 'default') -> go.Figure:
+ @handle_errors(default_return=None)
+ def create_batch_summary(results: List[Dict], theme: ThemeContext) -> go.Figure:
"""Create batch analysis summary"""
- colors = config.THEMES[theme]
+ colors = theme.colors
# Count sentiments
- sentiments = [r['sentiment'] for r in results if 'sentiment' in r]
+ sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error']
sentiment_counts = Counter(sentiments)
# Create pie chart
@@ -677,7 +626,16 @@ class AdvancedVisualizer:
return fig
+
+
+
+
+
+
+
+
@staticmethod
+ @handle_errors(default_return=None)
def create_confidence_distribution(results: List[Dict]) -> go.Figure:
"""Create confidence distribution plot"""
confidences = [r['confidence'] for r in results if 'confidence' in r and r['sentiment'] != 'Error']
@@ -702,7 +660,8 @@ class AdvancedVisualizer:
return fig
@staticmethod
- def create_history_dashboard(history: List[Dict]) -> go.Figure:
+ @handle_errors(default_return=None)
+ def create_history_dashboard(history: List[Dict], theme: ThemeContext) -> go.Figure:
"""Create comprehensive history dashboard"""
if len(history) < 2:
return go.Figure()
@@ -718,13 +677,15 @@ class AdvancedVisualizer:
# Extract data
indices = list(range(len(history)))
- pos_probs = [item['pos_prob'] for item in history]
+ pos_probs = [item.get('pos_prob', 0) for item in history]
confidences = [item['confidence'] for item in history]
sentiments = [item['sentiment'] for item in history]
languages = [item.get('language', 'en') for item in history]
# Sentiment timeline
- colors = ['#4CAF50' if s == 'Positive' else '#F44336' for s in sentiments]
+ colors_map = {'Positive': theme.colors['pos'], 'Negative': theme.colors['neg'], 'Neutral': theme.colors['neu']}
+ colors = [colors_map.get(s, '#999999') for s in sentiments]
+
fig.add_trace(
go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
marker=dict(color=colors, size=8),
@@ -748,33 +709,113 @@ class AdvancedVisualizer:
# Sentiment summary
sent_counts = Counter(sentiments)
+ sent_colors = [colors_map.get(k, '#999999') for k in sent_counts.keys()]
fig.add_trace(
go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
- marker_color=['#4CAF50' if k == 'Positive' else '#F44336' for k in sent_counts.keys()]),
+ marker_color=sent_colors),
row=2, col=2
)
fig.update_layout(height=800, showlegend=False)
return fig
-# Main application functions
-def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
+# Universal Data Handler
+class DataHandler:
+ """Enhanced data operations"""
+
+ @staticmethod
+ @handle_errors(default_return=(None, "Export failed"))
+ def export_data(data: List[Dict], format_type: str) -> Tuple[Optional[str], str]:
+ """Export data with comprehensive information"""
+ if not data:
+ return None, "No data to export"
+
+
+ if format_type == 'csv':
+ writer = csv.writer(temp_file)
+ writer.writerow(['Timestamp', 'Text', 'Sentiment', 'Confidence', 'Language',
+ 'Pos_Prob', 'Neg_Prob', 'Neu_Prob', 'Keywords', 'Word_Count'])
+ for entry in data:
+ keywords_str = "|".join([f"{word}:{score:.3f}" for word, score in entry.get('keywords', [])])
+ writer.writerow([
+ entry.get('timestamp', ''),
+ entry.get('text', ''),
+ entry.get('sentiment', ''),
+ f"{entry.get('confidence', 0):.4f}",
+ entry.get('language', 'en'),
+ f"{entry.get('pos_prob', 0):.4f}",
+ f"{entry.get('neg_prob', 0):.4f}",
+ f"{entry.get('neu_prob', 0):.4f}",
+ keywords_str,
+ entry.get('word_count', 0)
+ ])
+ elif format_type == 'json':
+ json.dump(data, temp_file, indent=2, ensure_ascii=False)
+ temp_file.close()
+ return temp_file.name, f"Exported {len(data)} entries"
+
+
+ @staticmethod
+ @handle_errors(default_return="")
+ def process_file(file) -> str:
+ """Process uploaded files"""
+ if not file:
+ return ""
+
+ content = file.read().decode('utf-8')
+
+ if file.name.endswith('.csv'):
+
+ csv_file = io.StringIO(content)
+ reader = csv.reader(csv_file)
+ try:
+ next(reader) # Skip header
+ texts = []
+ for row in reader:
+ if row and row[0].strip():
+ text = row[0].strip().strip('"')
+ if text:
+ texts.append(text)
+ return '\n'.join(texts)
+ except:
+ lines = content.strip().split('\n')[1:]
+ texts = []
+ for line in lines:
+ if line.strip():
+ if text:
+ texts.append(text)
+ return '\n'.join(texts)
+
+ return content
+
+# Main Application Class
+class SentimentApp:
+ """Main multilingual sentiment analysis application"""
+
+ def __init__(self):
+ self.engine = SentimentEngine()
+ self.history = HistoryManager()
+ self.data_handler = DataHandler()
+
+ # Multi-language examples
+ self.examples = [
+ ["This movie was absolutely fantastic! The acting was superb and the plot kept me engaged throughout."],
+ ["The film was disappointing with poor character development and a confusing storyline."],
+ ["这部电影真的很棒!演技精湛,情节引人入胜。"], # Chinese
+ ["Esta película fue increíble, me encantó la cinematografía."], # Spanish
+ ["Ce film était magnifique, j'ai adoré la réalisation."], # French
+ ]
+
+
+ @handle_errors(default_return=("Please enter text", None, None, None))
+ def analyze_single(self, text: str, language: str, theme: str, clean_text: bool,
remove_punct: bool, remove_nums: bool):
- """Enhanced single text analysis"""
- try:
+ """Single text analysis with enhanced visualizations"""
if not text.strip():
- return "Please enter text", None, None
-
- # Map display names back to language codes
- language_map = {
- 'Auto Detect': 'auto',
- 'English': 'en',
- 'Chinese': 'zh',
- 'Spanish': 'es',
- 'French': 'fr',
- 'German': 'de',
- 'Swedish': 'sv'
- }
+ return "Please enter text", None, None, None
+
+ # Map display names to language codes
+ language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
language_code = language_map.get(language, 'auto')
preprocessing_options = {
@@ -783,46 +824,48 @@ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
'remove_numbers': remove_nums
}
- result = SentimentAnalyzer.analyze_text(text, language_code, preprocessing_options)
-
- # Add to history
- history_entry = {
- 'text': text[:100] + '...' if len(text) > 100 else text,
- 'full_text': text,
- 'sentiment': result['sentiment'],
- 'confidence': result['confidence'],
- 'pos_prob': result['pos_prob'],
- 'neg_prob': result['neg_prob'],
- 'neu_prob': result.get('neu_prob', 0),
- 'language': result['language'],
- 'timestamp': datetime.now().isoformat(),
- 'analysis_type': 'single'
- }
- history_manager.add_entry(history_entry)
-
- # Create visualizations
- gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme)
- bars_fig = PlotlyVisualizer.create_probability_bars(result, theme)
-
- # Create info text
- info_text = f"""
+ with memory_cleanup():
+ result = self.engine.analyze_single(text, language_code, preprocessing_options)
+
+ # Add to history
+ history_entry = {
+ 'text': text[:100] + '...' if len(text) > 100 else text,
+ 'full_text': text,
+ 'sentiment': result['sentiment'],
+ 'confidence': result['confidence'],
+ 'pos_prob': result.get('pos_prob', 0),
+ 'neg_prob': result.get('neg_prob', 0),
+ 'neu_prob': result.get('neu_prob', 0),
+ 'language': result['language'],
+ 'keywords': result['keywords'],
+ 'word_count': result['word_count'],
+ 'analysis_type': 'single'
+ }
+ self.history.add(history_entry)
+
+ # Create visualizations
+ theme_ctx = ThemeContext(theme)
+ gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme_ctx)
+ bars_fig = PlotlyVisualizer.create_probability_bars(result, theme_ctx)
+ keyword_fig = PlotlyVisualizer.create_keyword_chart(result['keywords'], result['sentiment'], theme_ctx)
+
+ # Create comprehensive result text
+ keywords_str = ", ".join([f"{word}({score:.3f})" for word, score in result['keywords'][:5]])
+
+ info_text = f"""
**Analysis Results:**
- **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
- **Language:** {result['language'].upper()}
-- **Keywords:** {', '.join(result['keywords'])}
-- **Stats:** {result['word_count']} words, {result['char_count']} characters
- """
-
- return info_text, gauge_fig, bars_fig
-
- except Exception as e:
- logger.error(f"Analysis failed: {e}")
- return f"Error: {str(e)}", None, None
-
-def analyze_batch_texts(batch_text: str, language: str, theme: str,
- clean_text: bool, remove_punct: bool, remove_nums: bool):
- """Batch text analysis"""
- try:
+- **Keywords:** {keywords_str}
+- **Statistics:** {result['word_count']} words, {result['char_count']} characters
+ """
+
+ return info_text, gauge_fig, bars_fig, keyword_fig
+
+ @handle_errors(default_return=("Please enter texts", None, None, None))
+ def analyze_batch(self, batch_text: str, language: str, theme: str,
+ clean_text: bool, remove_punct: bool, remove_nums: bool):
+ """Enhanced batch analysis"""
if not batch_text.strip():
return "Please enter texts (one per line)", None, None, None
@@ -835,16 +878,8 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str,
if not texts:
return "No valid texts found", None, None, None
- # Map display names back to language codes
- language_map = {
- 'Auto Detect': 'auto',
- 'English': 'en',
- 'Chinese': 'zh',
- 'Spanish': 'es',
- 'French': 'fr',
- 'German': 'de',
- 'Swedish': 'sv'
- }
+ # Map display names to language codes
+ language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
language_code = language_map.get(language, 'auto')
preprocessing_options = {
@@ -853,616 +888,669 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str,
'remove_numbers': remove_nums
}
- # Analyze all texts
- results = SentimentAnalyzer.analyze_batch(texts, language_code, preprocessing_options)
-
- # Add to history
- batch_entries = []
- for i, (text, result) in enumerate(zip(texts, results)):
- if 'error' not in result:
- entry = {
- 'text': text[:100] + '...' if len(text) > 100 else text,
- 'full_text': text,
- 'sentiment': result['sentiment'],
- 'confidence': result['confidence'],
- 'pos_prob': result['pos_prob'],
- 'neg_prob': result['neg_prob'],
- 'neu_prob': result.get('neu_prob', 0),
- 'language': result['language'],
- 'timestamp': datetime.now().isoformat(),
- 'analysis_type': 'batch',
- 'batch_index': i
- }
- batch_entries.append(entry)
-
- history_manager.add_batch_entries(batch_entries)
-
- # Create visualizations
- summary_fig = PlotlyVisualizer.create_batch_summary(results, theme)
- confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
-
- # Create results table
- df_data = []
- for i, (text, result) in enumerate(zip(texts, results)):
- if 'error' in result:
- df_data.append({
- 'Index': i+1,
- 'Text': text[:50] + '...' if len(text) > 50 else text,
- 'Sentiment': 'Error',
- 'Confidence': 0.0,
- 'Language': 'Unknown',
- 'Error': result['error']
- })
- else:
- df_data.append({
- 'Index': i+1,
- 'Text': text[:50] + '...' if len(text) > 50 else text,
- 'Sentiment': result['sentiment'],
- 'Confidence': f"{result['confidence']:.3f}",
- 'Language': result['language'].upper(),
- 'Keywords': ', '.join(result['keywords'][:3])
- })
-
- df = pd.DataFrame(df_data)
-
- # Summary info
- successful_results = [r for r in results if 'error' not in r]
- error_count = len(results) - len(successful_results)
-
- if successful_results:
- sentiment_counts = Counter([r['sentiment'] for r in successful_results])
- avg_confidence = np.mean([r['confidence'] for r in successful_results])
+ with memory_cleanup():
+ results = self.engine.analyze_batch(texts, language_code, preprocessing_options)
+
+ # Add to history
+ batch_entries = []
+ for result in results:
+ if 'error' not in result:
+ entry = {
+ 'text': result['text'],
+ 'full_text': result['full_text'],
+ 'sentiment': result['sentiment'],
+ 'confidence': result['confidence'],
+ 'pos_prob': result.get('pos_prob', 0),
+ 'neg_prob': result.get('neg_prob', 0),
+ 'neu_prob': result.get('neu_prob', 0),
+ 'language': result['language'],
+ 'keywords': result['keywords'],
+ 'word_count': result['word_count'],
+ 'analysis_type': 'batch',
+ 'batch_index': result['batch_index']
+ }
+ batch_entries.append(entry)
+
+ self.history.add_batch(batch_entries)
+
+ # Create visualizations
+ theme_ctx = ThemeContext(theme)
+ summary_fig = PlotlyVisualizer.create_batch_summary(results, theme_ctx)
+ confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
+
+ # Create results DataFrame
+ df_data = []
+ for result in results:
+ if 'error' in result:
+ df_data.append({
+ 'Index': result['batch_index'] + 1,
+ 'Text': result['text'],
+ 'Sentiment': 'Error',
+ 'Confidence': 0.0,
+ 'Language': 'Unknown',
+ 'Error': result['error']
+ })
+ else:
+ keywords_str = ', '.join([word for word, _ in result['keywords'][:3]])
+ df_data.append({
+ 'Index': result['batch_index'] + 1,
+ 'Text': result['text'],
+ 'Sentiment': result['sentiment'],
+ 'Confidence': f"{result['confidence']:.3f}",
+ 'Language': result['language'].upper(),
+ 'Keywords': keywords_str
+ })
- summary_text = f"""
+ df = pd.DataFrame(df_data)
+
+ # Create summary text
+ successful_results = [r for r in results if 'error' not in r]
+ error_count = len(results) - len(successful_results)
+
+ if successful_results:
+ sentiment_counts = Counter([r['sentiment'] for r in successful_results])
+ avg_confidence = np.mean([r['confidence'] for r in successful_results])
+ languages = Counter([r['language'] for r in successful_results])
+
+ summary_text = f"""
**Batch Analysis Summary:**
- **Total Texts:** {len(texts)}
- **Successful:** {len(successful_results)}
- **Errors:** {error_count}
- **Average Confidence:** {avg_confidence:.3f}
- **Sentiments:** {dict(sentiment_counts)}
- """
- else:
- summary_text = f"All {len(texts)} texts failed to analyze."
-
- return summary_text, df, summary_fig, confidence_fig
+- **Languages Detected:** {dict(languages)}
+ """
+ else:
+ summary_text = f"All {len(texts)} texts failed to analyze."
+
+ return summary_text, df, summary_fig, confidence_fig
+
+ @handle_errors(default_return=(None, "No history available"))
+ def plot_history(self, theme: str = 'default'):
+ """Plot comprehensive history analysis"""
+ history = self.history.get_all()
+ if len(history) < 2:
+ return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
- except Exception as e:
- logger.error(f"Batch analysis failed: {e}")
- return f"Error: {str(e)}", None, None, None
-def analyze_advanced_text(text: str, language: str, theme: str, use_lime: bool,
- use_attention: bool, lime_features: int):
- """Advanced analysis with SHAP and LIME explainability"""
- try:
- if not text.strip():
- return "Please enter text", None, None, None, None
-
- # Map display names back to language codes
- language_map = {
- 'Auto Detect': 'auto',
- 'English': 'en',
- 'Chinese': 'zh',
- 'Spanish': 'es',
- 'French': 'fr',
- 'German': 'de',
- 'Swedish': 'sv'
- }
- language_code = language_map.get(language, 'auto')
-
- # Basic sentiment analysis first
- result = SentimentAnalyzer.analyze_text(text, language_code)
-
- # Create basic visualizations first
- gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme)
- bars_fig = PlotlyVisualizer.create_probability_bars(result, theme)
-
- # Initialize explainability results
- lime_result = None
- attention_result = None
- lime_plot = None
- attention_plot = None
+ theme_ctx = ThemeContext(theme)
+
- # Get model for explainability analysis
- try:
- model, tokenizer = model_manager.get_model(language_code)
+ with memory_cleanup():
+ fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx)
+ stats = self.history.get_stats()
- # LIME Analysis
- if use_lime:
- lime_result = ExplainabilityAnalyzer.analyze_with_lime(
- text, model, tokenizer, model_manager.device, lime_features
- )
- lime_plot = AdvancedVisualizer.create_lime_plot(lime_result, theme)
- else:
- # Create empty plot
- lime_plot = go.Figure()
- lime_plot.add_annotation(text="LIME analysis disabled", x=0.5, y=0.5,
- xref="paper", yref="paper", showarrow=False)
- lime_plot.update_layout(height=400, title="LIME Analysis (Disabled)")
+ stats_text = f"""
+**History Statistics:**
+- **Total Analyses:** {stats.get('total_analyses', 0)}
+- **Positive:** {stats.get('positive_count', 0)}
+- **Negative:** {stats.get('negative_count', 0)}
+- **Neutral:** {stats.get('neutral_count', 0)}
+- **Average Confidence:** {stats.get('avg_confidence', 0):.3f}
+- **Languages:** {stats.get('languages_detected', 0)}
+- **Most Common Language:** {stats.get('most_common_language', 'N/A').upper()}
+ """
- # Attention Analysis
- if use_attention:
- attention_result = ExplainabilityAnalyzer.analyze_with_attention(
- text, model, tokenizer, model_manager.device
- )
- attention_plot = AdvancedVisualizer.create_attention_plot(attention_result, theme)
- else:
- # Create empty plot
- attention_plot = go.Figure()
- attention_plot.add_annotation(text="Attention analysis disabled", x=0.5, y=0.5,
- xref="paper", yref="paper", showarrow=False)
- attention_plot.update_layout(height=400, title="Attention Analysis (Disabled)")
+ return fig, stats_text
+
+ @handle_errors(default_return=("No data available",))
+ def get_history_status(self):
+ """Get current history status"""
+ stats = self.history.get_stats()
+ if not stats:
+ return "No analyses performed yet"
+
+ return f"""
+**Current Status:**
+- **Total Analyses:** {stats['total_analyses']}
+- **Recent Sentiment Distribution:**
+ * Positive: {stats['positive_count']}
+ * Negative: {stats['negative_count']}
+ * Neutral: {stats['neutral_count']}
+- **Average Confidence:** {stats['avg_confidence']:.3f}
+- **Languages Detected:** {stats['languages_detected']}
+ """
+
+# Gradio Interface
+def create_interface():
+ """Create comprehensive Gradio interface"""
+ app = SentimentApp()
+
+ with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
+ gr.Markdown("# 🌍 Advanced Multilingual Sentiment Analyzer")
+ gr.Markdown("AI-powered sentiment analysis with support for multiple languages, advanced visualizations, and explainable AI features")
+
+ with gr.Tab("Single Analysis"):
+ with gr.Row():
+ with gr.Column():
+ text_input = gr.Textbox(
+ label="Enter Text for Analysis",
+ placeholder="Enter your text in any supported language...",
+ lines=5
+ )
+
+ with gr.Row():
+ language_selector = gr.Dropdown(
+ choices=list(config.SUPPORTED_LANGUAGES.values()),
+ value="Auto Detect",
+ label="Language"
+ )
+ theme_selector = gr.Dropdown(
+ choices=list(config.THEMES.keys()),
+ value="default",
+ label="Theme"
+ )
+
+ with gr.Row():
+ clean_text_cb = gr.Checkbox(label="Clean Text", value=False)
+ remove_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
+ remove_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
+
+ analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
+
+ gr.Examples(
+ examples=app.examples,
+ inputs=text_input,
+ cache_examples=False
+ )
- except Exception as e:
- logger.error(f"Explainability analysis failed: {e}")
- # Create error plots
- lime_plot = go.Figure()
- lime_plot.add_annotation(text=f"Analysis Error: {str(e)}", x=0.5, y=0.5,
- xref="paper", yref="paper", showarrow=False)
- lime_plot.update_layout(height=400, title="Analysis Error")
+ with gr.Column():
+ result_output = gr.Textbox(label="Analysis Results", lines=8)
- attention_plot = go.Figure()
- attention_plot.add_annotation(text=f"Analysis Error: {str(e)}", x=0.5, y=0.5,
- xref="paper", yref="paper", showarrow=False)
- attention_plot.update_layout(height=400, title="Analysis Error")
-
- # Add to history
- history_entry = {
- 'text': text[:100] + '...' if len(text) > 100 else text,
- 'full_text': text,
- 'sentiment': result['sentiment'],
- 'confidence': result['confidence'],
- 'pos_prob': result['pos_prob'],
- 'neg_prob': result['neg_prob'],
- 'neu_prob': result.get('neu_prob', 0),
- 'language': result['language'],
- 'timestamp': datetime.now().isoformat(),
- 'analysis_type': 'advanced',
- 'explainability_used': use_lime or use_attention
- }
- history_manager.add_entry(history_entry)
+ with gr.Row():
+ gauge_plot = gr.Plot(label="Sentiment Gauge")
+ probability_plot = gr.Plot(label="Probability Distribution")
+
+ with gr.Row():
+ keyword_plot = gr.Plot(label="Key Contributing Words")
+
+ with gr.Tab("Batch Analysis"):
+ with gr.Row():
+ with gr.Column():
+ file_upload = gr.File(
+ label="Upload File (CSV/TXT)",
+ file_types=[".csv", ".txt"]
+ )
+ batch_input = gr.Textbox(
+ label="Batch Input (one text per line)",
+ placeholder="Enter multiple texts, one per line...",
+ lines=10
+ )
+
+ with gr.Row():
+ batch_language = gr.Dropdown(
+ choices=list(config.SUPPORTED_LANGUAGES.values()),
+ value="Auto Detect",
+ label="Language"
+ )
+ batch_theme = gr.Dropdown(
+ choices=list(config.THEMES.keys()),
+ value="default",
+ label="Theme"
+ )
+
+ with gr.Row():
+ batch_clean_cb = gr.Checkbox(label="Clean Text", value=False)
+ batch_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
+ batch_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
+
+ with gr.Row():
+ load_file_btn = gr.Button("Load File")
+ analyze_batch_btn = gr.Button("Analyze Batch", variant="primary")
+
+ with gr.Column():
+ batch_summary = gr.Textbox(label="Batch Summary", lines=8)
+ batch_results_df = gr.Dataframe(
+ label="Detailed Results",
+ headers=["Index", "Text", "Sentiment", "Confidence", "Language", "Keywords"],
+ datatype=["number", "str", "str", "str", "str", "str"]
+ )
+
+ with gr.Row():
+ batch_plot = gr.Plot(label="Batch Analysis Summary")
+ confidence_dist_plot = gr.Plot(label="Confidence Distribution")
+
+ with gr.Tab("History & Analytics"):
+ with gr.Row():
+ with gr.Column():
+ with gr.Row():
+ refresh_history_btn = gr.Button("Refresh History")
+ clear_history_btn = gr.Button("Clear History", variant="stop")
+ status_btn = gr.Button("Get Status")
+
+ history_theme = gr.Dropdown(
+ choices=list(config.THEMES.keys()),
+ value="default",
+ label="Dashboard Theme"
+ )
+
+ with gr.Row():
+ export_csv_btn = gr.Button("Export CSV")
+ export_json_btn = gr.Button("Export JSON")
+
+ with gr.Column():
+ history_status = gr.Textbox(label="History Status", lines=8)
+
+ history_dashboard = gr.Plot(label="History Analytics Dashboard")
+
+ with gr.Row():
+ csv_download = gr.File(label="CSV Download", visible=True)
+ json_download = gr.File(label="JSON Download", visible=True)
+
+ # Event Handlers
+ analyze_btn.click(
+ app.analyze_single,
+ inputs=[text_input, language_selector, theme_selector,
+ clean_text_cb, remove_punct_cb, remove_nums_cb],
+ outputs=[result_output, gauge_plot, probability_plot, keyword_plot]
+ )
- # Create detailed info text
- info_text = f"""
-**Advanced Analysis Results:**
-- **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
-- **Language:** {result['language'].upper()}
-- **Text Statistics:**
- - Words: {result['word_count']}
- - Characters: {result['char_count']}
- - Average word length: {result['char_count']/max(result['word_count'], 1):.1f}
-- **Keywords:** {', '.join(result['keywords'])}
-
-**Explainability Analysis:**
- """
+ load_file_btn.click(
+ app.data_handler.process_file,
+ inputs=file_upload,
+ outputs=batch_input
+ )
- if use_lime:
- if lime_result and 'error' not in lime_result:
- info_text += f"\n- **LIME:** ✅ Analyzed top {lime_features} features"
- else:
- error_msg = lime_result.get('error', 'Unknown error') if lime_result else 'Not available'
- info_text += f"\n- **LIME:** ❌ {error_msg}"
- else:
- info_text += f"\n- **LIME:** ⏸️ Disabled"
+ analyze_batch_btn.click(
+ app.analyze_batch,
+ inputs=[batch_input, batch_language, batch_theme,
+ batch_clean_cb, batch_punct_cb, batch_nums_cb],
+ outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot]
+ )
- if use_attention:
- if attention_result and 'error' not in attention_result:
- info_text += f"\n- **Attention:** ✅ Token-level attention weights computed"
- else:
- error_msg = attention_result.get('error', 'Unknown error') if attention_result else 'Not available'
- info_text += f"\n- **Attention:** ❌ {error_msg}"
- else:
- info_text += f"\n- **Attention:** ⏸️ Disabled"
+ refresh_history_btn.click(
+ app.plot_history,
+ inputs=history_theme,
+ outputs=[history_dashboard, history_status]
+ )
- return info_text, gauge_fig, bars_fig, lime_plot, attention_plot
+ clear_history_btn.click(
+ lambda: f"Cleared {app.history.clear()} entries",
+ outputs=history_status
+ )
- except Exception as e:
- logger.error(f"Advanced analysis failed: {e}")
- # Return basic empty plots on complete failure
- empty_fig = go.Figure()
- empty_fig.add_annotation(text=f"Analysis failed: {str(e)}", x=0.5, y=0.5,
- xref="paper", yref="paper", showarrow=False)
- empty_fig.update_layout(height=400)
-
- return f"Error: {str(e)}", empty_fig, empty_fig, empty_fig, empty_fig
-
-def get_history_stats():
- """Get enhanced history statistics"""
- stats = history_manager.get_stats()
- if not stats:
- return "No analysis history available"
-
- return f"""
-**Comprehensive History Statistics:**
-
-**Analysis Counts:**
-- Total Analyses: {stats['total_analyses']}
-- Positive: {stats['positive_count']}
-- Negative: {stats['negative_count']}
-- Neutral: {stats['neutral_count']}
-
-**Confidence Metrics:**
-- Average Confidence: {stats['avg_confidence']:.3f}
-- Highest Confidence: {stats['max_confidence']:.3f}
-- Lowest Confidence: {stats['min_confidence']:.3f}
-
-**Language Statistics:**
-- Languages Detected: {stats['languages_detected']}
-- Most Common Language: {stats['most_common_language'].upper()}
-
-**Text Statistics:**
-- Average Text Length: {stats['avg_text_length']:.1f} characters
- """
-
-def filter_history_display(sentiment_filter: str, language_filter: str, min_confidence: float):
- """Display filtered history"""
- # Convert filters
- sentiment = sentiment_filter if sentiment_filter != "All" else None
- language = language_filter.lower() if language_filter != "All" else None
-
- filtered_history = history_manager.filter_history(
- sentiment=sentiment,
- language=language,
- min_confidence=min_confidence if min_confidence > 0 else None
- )
-
- if not filtered_history:
- return "No entries match the filter criteria", None
-
- # Create DataFrame for display
- df_data = []
- for entry in filtered_history[-20:]: # Show last 20 entries
- df_data.append({
- 'Timestamp': entry['timestamp'][:16], # YYYY-MM-DD HH:MM
- 'Text': entry['text'],
- 'Sentiment': entry['sentiment'],
- 'Confidence': f"{entry['confidence']:.3f}",
- 'Language': entry['language'].upper(),
- 'Type': entry.get('analysis_type', 'single')
- })
-
- df = pd.DataFrame(df_data)
-
- summary = f"""
-**Filtered Results:**
-- Found {len(filtered_history)} entries matching criteria
-- Showing most recent {min(20, len(filtered_history))} entries
- """
+ status_btn.click(
+ app.get_history_status,
+ outputs=history_status
+ )
+
+ export_csv_btn.click(
+ lambda: app.data_handler.export_data(app.history.get_all(), 'csv'),
+ outputs=[csv_download, history_status]
+ )
+
+ export_json_btn.click(
+ lambda: app.data_handler.export_data(app.history.get_all(), 'json'),
+ outputs=[json_download, history_status]
+ )
- return summary, df
+ return demo
-def plot_history_dashboard():
- """Create history dashboard"""
- history = history_manager.get_history()
- if len(history) < 2:
- return None, "Need at least 2 analyses for dashboard"
-
- fig = PlotlyVisualizer.create_history_dashboard(history)
- return fig, f"Dashboard showing {len(history)} analyses"
-
-def export_history_csv():
- """Export history to CSV"""
- history = history_manager.get_history()
- if not history:
- return None, "No history to export"
+# Application Entry Point
+if __name__ == "__main__":
+ logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+ )
try:
- df = pd.DataFrame(history)
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w')
- df.to_csv(temp_file.name, index=False)
- return temp_file.name, f"Exported {len(history)} entries to CSV"
+ demo = create_interface()
+ demo.launch(
+ share=True,
+ server_name="0.0.0.0",
+ server_port=7860,
+ show_error=True
+ )
except Exception as e:
- return None, f"Export failed: {str(e)}"
-
-def export_history_excel():
- """Export history to Excel"""
- history = history_manager.get_history()
- if not history:
- return None, "No history to export"
+ logger.error(f"Failed to launch application: {e}")
+ raise
- try:
- df = pd.DataFrame(history)
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
- df.to_excel(temp_file.name, index=False)
- return temp_file.name, f"Exported {len(history)} entries to Excel"
- except Exception as e:
- return None, f"Export failed: {str(e)}"
-
-def clear_all_history():
- """Clear analysis history"""
- count = history_manager.clear()
- return f"Cleared {count} entries from history"
-
-def get_recent_analyses():
- """Get recent analysis summary"""
- recent = history_manager.get_recent_history(10)
- if not recent:
- return "No recent analyses available"
+ @handle_errors(default_return=("Please enter texts", None, None, None))
+ def analyze_batch(self, batch_text: str, language: str, theme: str,
+ clean_text: bool, remove_punct: bool, remove_nums: bool):
+ """Enhanced batch analysis"""
+ if not batch_text.strip():
+ return "Please enter texts (one per line)", None, None, None
+
+ # Parse batch input
+ texts = TextProcessor.parse_batch_input(batch_text)
+
+ if len(texts) > config.BATCH_SIZE_LIMIT:
+ return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
+
+ if not texts:
+ return "No valid texts found", None, None, None
+
+
+ # Map display names to language codes
+ language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
+ language_code = language_map.get(language, 'auto')
+
+ preprocessing_options = {
+ 'clean_text': clean_text,
+ 'remove_punctuation': remove_punct,
+ 'remove_numbers': remove_nums
+ }
+
+ with memory_cleanup():
+ results = self.engine.analyze_batch(texts, language_code, preprocessing_options)
+
+ # Add to history
+ batch_entries = []
+ for result in results:
+ if 'error' not in result:
+ entry = {
+ 'text': result['text'],
+ 'full_text': result['full_text'],
+ 'sentiment': result['sentiment'],
+ 'confidence': result['confidence'],
+ 'pos_prob': result.get('pos_prob', 0),
+ 'neg_prob': result.get('neg_prob', 0),
+ 'neu_prob': result.get('neu_prob', 0),
+ 'language': result['language'],
+ 'keywords': result['keywords'],
+ 'word_count': result['word_count'],
+ 'analysis_type': 'batch',
+ 'batch_index': result['batch_index']
+ }
+ batch_entries.append(entry)
+
+ self.history.add_batch(batch_entries)
+
+ # Create visualizations
+ theme_ctx = ThemeContext(theme)
+ summary_fig = PlotlyVisualizer.create_batch_summary(results, theme_ctx)
+ confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
+
+ # Create results DataFrame
+ df_data = []
+ for result in results:
+ if 'error' in result:
+ df_data.append({
+ 'Index': result['batch_index'] + 1,
+ 'Text': result['text'],
+ 'Sentiment': 'Error',
+ 'Confidence': 0.0,
+ 'Language': 'Unknown',
+ 'Error': result['error']
+ })
+ else:
+ keywords_str = ', '.join([word for word, _ in result['keywords'][:3]])
+ df_data.append({
+ 'Index': result['batch_index'] + 1,
+ 'Text': result['text'],
+ 'Sentiment': result['sentiment'],
+ 'Confidence': f"{result['confidence']:.3f}",
+ 'Language': result['language'].upper(),
+ 'Keywords': keywords_str
+ })
+
+ df = pd.DataFrame(df_data)
+
+ # Create summary text
+ successful_results = [r for r in results if 'error' not in r]
+ error_count = len(results) - len(successful_results)
+
+ if successful_results:
+ sentiment_counts = Counter([r['sentiment'] for r in successful_results])
+ avg_confidence = np.mean([r['confidence'] for r in successful_results])
+ languages = Counter([r['language'] for r in successful_results])
+
+ summary_text = f"""
+**Batch Analysis Summary:**
+- **Total Texts:** {len(texts)}
+- **Successful:** {len(successful_results)}
+- **Errors:** {error_count}
+- **Average Confidence:** {avg_confidence:.3f}
+- **Sentiments:** {dict(sentiment_counts)}
+- **Languages Detected:** {dict(languages)}
+ """
+ else:
+ summary_text = f"All {len(texts)} texts failed to analyze."
+
+ return summary_text, df, summary_fig, confidence_fig
- summary_text = "**Recent Analyses (Last 10):**\n\n"
- for i, entry in enumerate(recent, 1):
- summary_text += f"{i}. **{entry['sentiment']}** ({entry['confidence']:.3f}) - {entry['text']}\n"
+ @handle_errors(default_return=(None, "No history available"))
+ def plot_history(self, theme: str = 'default'):
+ """Plot comprehensive history analysis"""
+ history = self.history.get_all()
+ if len(history) < 2:
+ return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
+
+ theme_ctx = ThemeContext(theme)
+
+ with memory_cleanup():
+ fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx)
+ stats = self.history.get_stats()
+
+ stats_text = f"""
+**History Statistics:**
+- **Total Analyses:** {stats.get('total_analyses', 0)}
+- **Positive:** {stats.get('positive_count', 0)}
+- **Negative:** {stats.get('negative_count', 0)}
+- **Neutral:** {stats.get('neutral_count', 0)}
+- **Average Confidence:** {stats.get('avg_confidence', 0):.3f}
+- **Languages:** {stats.get('languages_detected', 0)}
+- **Most Common Language:** {stats.get('most_common_language', 'N/A').upper()}
+ """
+
+ return fig, stats_text
- return summary_text
-
-# Sample data
-SAMPLE_TEXTS = [
- # Auto Detect
- ["The film had its moments, but overall it felt a bit too long and lacked emotional depth."],
-
- # English
- ["I was completely blown away by the movie — the performances were raw and powerful, and the story stayed with me long after the credits rolled."],
-
- # Chinese
- ["这部电影节奏拖沓,剧情老套,完全没有让我产生任何共鸣,是一次失望的观影体验。"],
-
- # Spanish
- ["Una obra maestra del cine contemporáneo, con actuaciones sobresalientes, un guion bien escrito y una dirección impecable."],
-
- # French
- ["Je m'attendais à beaucoup mieux. Le scénario était confus, les dialogues ennuyeux, et je me suis presque endormi au milieu du film."],
-
- # German
- ["Der Film war ein emotionales Erlebnis mit großartigen Bildern, einem mitreißenden Soundtrack und einer Geschichte, die zum Nachdenken anregt."],
-
- # Swedish
- ["Filmen var en besvikelse – tråkig handling, överdrivet skådespeleri och ett slut som inte gav något avslut alls."]
-]
-
-BATCH_SAMPLE = """I love this product! It works perfectly.
-The service was terrible and slow.
-Not sure if I like it or not.
-Amazing quality and fast delivery!
-Could be better, but it's okay."""
+ @handle_errors(default_return=("No data available",))
+ def get_history_status(self):
+ """Get current history status"""
+ stats = self.history.get_stats()
+ if not stats:
+ return "No analyses performed yet"
+
+ return f"""
+**Current Status:**
+- **Total Analyses:** {stats['total_analyses']}
+- **Recent Sentiment Distribution:**
+ * Positive: {stats['positive_count']}
+ * Negative: {stats['negative_count']}
+ * Neutral: {stats['neutral_count']}
+- **Average Confidence:** {stats['avg_confidence']:.3f}
+- **Languages Detected:** {stats['languages_detected']}
+ """
# Gradio Interface
-with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Multilingual Sentiment Analyzer") as demo:
- gr.Markdown("# 🎭 Advanced Multilingual Sentiment Analyzer")
- gr.Markdown("Comprehensive sentiment analysis with batch processing, advanced analytics, and multilingual support")
+def create_interface():
+ """Create comprehensive Gradio interface"""
+ app = SentimentApp()
- with gr.Tab("📝 Single Analysis"):
- with gr.Row():
- with gr.Column(scale=2):
- text_input = gr.Textbox(
- label="Text to Analyze",
- placeholder="Enter your text here... (supports multiple languages)",
- lines=4
- )
-
- with gr.Row():
- language_select = gr.Dropdown(
- choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
- value='Auto Detect',
- label="Language"
+ with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
+ gr.Markdown("# 🌍 Advanced Multilingual Sentiment Analyzer")
+ gr.Markdown("AI-powered sentiment analysis with support for multiple languages, advanced visualizations, and explainable AI features")
+
+ with gr.Tab("Single Analysis"):
+ with gr.Row():
+ with gr.Column():
+ text_input = gr.Textbox(
+ label="Enter Text for Analysis",
+ placeholder="Enter your text in any supported language...",
+ lines=5
)
- theme_select = gr.Dropdown(
- choices=list(config.THEMES.keys()),
- value='default',
- label="Theme"
+
+ with gr.Row():
+ language_selector = gr.Dropdown(
+ choices=list(config.SUPPORTED_LANGUAGES.values()),
+ value="Auto Detect",
+ label="Language"
+ )
+ theme_selector = gr.Dropdown(
+ choices=list(config.THEMES.keys()),
+ value="default",
+ label="Theme"
+ )
+
+ with gr.Row():
+ clean_text_cb = gr.Checkbox(label="Clean Text", value=False)
+ remove_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
+ remove_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
+
+ analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
+
+ gr.Examples(
+ examples=app.examples,
+ inputs=text_input,
+ cache_examples=False
)
- with gr.Row():
- clean_text = gr.Checkbox(label="Clean Text", value=False)
- remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
- remove_nums = gr.Checkbox(label="Remove Numbers", value=False)
-
- analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")
-
- gr.Examples(
- examples=SAMPLE_TEXTS,
- inputs=text_input,
- label="Sample Texts (Multiple Languages)"
- )
+ with gr.Column():
+ result_output = gr.Textbox(label="Analysis Results", lines=8)
- with gr.Column(scale=1):
- result_info = gr.Markdown("Enter text and click Analyze")
-
- with gr.Row():
- gauge_plot = gr.Plot(label="Sentiment Gauge")
- bars_plot = gr.Plot(label="Probability Distribution")
-
- with gr.Tab("🔬 Advanced Analysis"):
- with gr.Row():
- with gr.Column(scale=2):
- advanced_input = gr.Textbox(
- label="Text for Advanced Analysis",
- placeholder="Enter text for explainability analysis...",
- lines=4
- )
-
- with gr.Row():
- advanced_language = gr.Dropdown(
- choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
- value='Auto Detect',
- label="Language"
+ with gr.Row():
+ gauge_plot = gr.Plot(label="Sentiment Gauge")
+ probability_plot = gr.Plot(label="Probability Distribution")
+
+ with gr.Row():
+
+ keyword_plot = gr.Plot(label="Key Contributing Words")
+
+ with gr.Tab("Batch Analysis"):
+ with gr.Row():
+ with gr.Column():
+ file_upload = gr.File(
+ label="Upload File (CSV/TXT)",
+ file_types=[".csv", ".txt"]
)
- advanced_theme = gr.Dropdown(
- choices=list(config.THEMES.keys()),
- value='default',
- label="Theme"
+ batch_input = gr.Textbox(
+ label="Batch Input (one text per line)",
+ placeholder="Enter multiple texts, one per line...",
+ lines=10
)
+
+ with gr.Row():
+ batch_language = gr.Dropdown(
+ choices=list(config.SUPPORTED_LANGUAGES.values()),
+ value="Auto Detect",
+ label="Language"
+ )
+ batch_theme = gr.Dropdown(
+ choices=list(config.THEMES.keys()),
+ value="default",
+ label="Theme"
+ )
+
+ with gr.Row():
+ batch_clean_cb = gr.Checkbox(label="Clean Text", value=False)
+ batch_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
+ batch_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
+
+ with gr.Row():
+ load_file_btn = gr.Button("Load File")
+ analyze_batch_btn = gr.Button("Analyze Batch", variant="primary")
- gr.Markdown("### 🔍 Explainability Options")
- with gr.Row():
- use_lime = gr.Checkbox(label="Use LIME Analysis", value=True)
- use_attention = gr.Checkbox(label="Use Attention Weights", value=True)
-
- lime_features = gr.Slider(
- minimum=5,
- maximum=20,
- value=10,
- step=1,
- label="LIME Features Count"
- )
-
- advanced_analyze_btn = gr.Button("🔬 Advanced Analyze", variant="primary", size="lg")
-
- with gr.Column(scale=1):
- advanced_result_info = gr.Markdown("Configure explainability settings and click Advanced Analyze")
-
- with gr.Row():
- advanced_gauge_plot = gr.Plot(label="Sentiment Gauge")
- advanced_bars_plot = gr.Plot(label="Probability Distribution")
-
- with gr.Row():
- lime_plot = gr.Plot(label="LIME Feature Importance")
- attention_plot = gr.Plot(label="Attention Weights")
-
- with gr.Tab("📊 Batch Analysis"):
- with gr.Row():
- with gr.Column(scale=2):
- batch_input = gr.Textbox(
- label="Batch Text Input (One text per line)",
- placeholder="Enter multiple texts, one per line...",
- lines=8
- )
-
- with gr.Row():
- batch_language = gr.Dropdown(
- choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
- value='Auto Detect',
- label="Language"
+ with gr.Column():
+ batch_summary = gr.Textbox(label="Batch Summary", lines=8)
+ batch_results_df = gr.Dataframe(
+ label="Detailed Results",
+ headers=["Index", "Text", "Sentiment", "Confidence", "Language", "Keywords"],
+ datatype=["number", "str", "str", "str", "str", "str"]
)
- batch_theme = gr.Dropdown(
+
+ with gr.Row():
+ batch_plot = gr.Plot(label="Batch Analysis Summary")
+ confidence_dist_plot = gr.Plot(label="Confidence Distribution")
+
+ with gr.Tab("History & Analytics"):
+ with gr.Row():
+ with gr.Column():
+ with gr.Row():
+ refresh_history_btn = gr.Button("Refresh History")
+ clear_history_btn = gr.Button("Clear History", variant="stop")
+ status_btn = gr.Button("Get Status")
+
+ history_theme = gr.Dropdown(
choices=list(config.THEMES.keys()),
- value='default',
- label="Theme"
+ value="default",
+ label="Dashboard Theme"
)
+
+ with gr.Row():
+ export_csv_btn = gr.Button("Export CSV")
+ export_json_btn = gr.Button("Export JSON")
- with gr.Row():
- batch_clean = gr.Checkbox(label="Clean Text", value=False)
- batch_remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
- batch_remove_nums = gr.Checkbox(label="Remove Numbers", value=False)
-
- batch_analyze_btn = gr.Button("🔍 Analyze Batch", variant="primary", size="lg")
-
- gr.Examples(
- examples=[[BATCH_SAMPLE]],
- inputs=batch_input,
- label="Sample Batch Input"
- )
+ with gr.Column():
+ history_status = gr.Textbox(label="History Status", lines=8)
- with gr.Column(scale=1):
- batch_summary = gr.Markdown("Enter texts and click Analyze Batch")
+ history_dashboard = gr.Plot(label="History Analytics Dashboard")
+
+ with gr.Row():
+ csv_download = gr.File(label="CSV Download", visible=True)
+ json_download = gr.File(label="JSON Download", visible=True)
+
+
+
+
+
- with gr.Row():
- batch_results_table = gr.DataFrame(
- label="Detailed Results",
- interactive=False
- )
+ # Event Handlers
+ analyze_btn.click(
+ app.analyze_single,
+ inputs=[text_input, language_selector, theme_selector,
+ clean_text_cb, remove_punct_cb, remove_nums_cb],
+ outputs=[result_output, gauge_plot, probability_plot, keyword_plot]
+ )
- with gr.Row():
- batch_summary_plot = gr.Plot(label="Sentiment Summary")
- batch_confidence_plot = gr.Plot(label="Confidence Distribution")
-
- with gr.Tab("📈 History & Analytics"):
- with gr.Row():
- with gr.Column():
- gr.Markdown("### 📊 Statistics")
- stats_btn = gr.Button("📈 Get Statistics")
- recent_btn = gr.Button("🕒 Recent Analyses")
- stats_output = gr.Markdown("Click 'Get Statistics' to view analysis history")
-
- with gr.Column():
- gr.Markdown("### 🔍 Filter History")
- with gr.Row():
- sentiment_filter = gr.Dropdown(
- choices=["All", "Positive", "Negative", "Neutral"],
- value="All",
- label="Filter by Sentiment"
- )
- language_filter = gr.Dropdown(
- choices=["All", "English", "Chinese", "Spanish", "French", "German", "Swedish"],
- value="All",
- label="Filter by Language"
- )
-
- confidence_filter = gr.Slider(
- minimum=0.0,
- maximum=1.0,
- value=0.0,
- step=0.1,
- label="Minimum Confidence"
- )
-
- filter_btn = gr.Button("🔍 Filter History")
+ load_file_btn.click(
+ app.data_handler.process_file,
+ inputs=file_upload,
+ outputs=batch_input
+ )
- with gr.Row():
- dashboard_btn = gr.Button("📊 View Dashboard")
- clear_btn = gr.Button("🗑️ Clear History", variant="stop")
+ analyze_batch_btn.click(
+ app.analyze_batch,
+ inputs=[batch_input, batch_language, batch_theme,
+ batch_clean_cb, batch_punct_cb, batch_nums_cb],
+ outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot]
+ )
- with gr.Row():
- export_csv_btn = gr.Button("📄 Export CSV")
- export_excel_btn = gr.Button("📊 Export Excel")
+ refresh_history_btn.click(
+ app.plot_history,
+ inputs=history_theme,
+ outputs=[history_dashboard, history_status]
+ )
- dashboard_plot = gr.Plot(label="Analytics Dashboard")
+ clear_history_btn.click(
+ lambda: f"Cleared {app.history.clear()} entries",
+ outputs=history_status
+ )
- with gr.Row():
- filtered_results = gr.Markdown("Use filters to view specific entries")
- filtered_table = gr.DataFrame(label="Filtered History", interactive=False)
+ status_btn.click(
+ app.get_history_status,
+ outputs=history_status
+ )
- csv_file = gr.File(label="Download CSV Report")
- excel_file = gr.File(label="Download Excel Report")
- history_status = gr.Textbox(label="Status", interactive=False)
-
- # Event handlers
-
- # Single Analysis
- analyze_btn.click(
- analyze_single_text,
- inputs=[text_input, language_select, theme_select, clean_text, remove_punct, remove_nums],
- outputs=[result_info, gauge_plot, bars_plot]
- )
-
- # Batch Analysis
- batch_analyze_btn.click(
- analyze_batch_texts,
- inputs=[batch_input, batch_language, batch_theme, batch_clean, batch_remove_punct, batch_remove_nums],
- outputs=[batch_summary, batch_results_table, batch_summary_plot, batch_confidence_plot]
- )
-
- # Advanced Analysis
- advanced_analyze_btn.click(
- analyze_advanced_text,
- inputs=[advanced_input, advanced_language, advanced_theme, use_lime, use_attention, lime_features],
- outputs=[advanced_result_info, advanced_gauge_plot, advanced_bars_plot, lime_plot, attention_plot]
- )
-
- # History & Analytics
- stats_btn.click(
- get_history_stats,
- outputs=stats_output
- )
-
- recent_btn.click(
- get_recent_analyses,
- outputs=stats_output
- )
-
- filter_btn.click(
- filter_history_display,
- inputs=[sentiment_filter, language_filter, confidence_filter],
- outputs=[filtered_results, filtered_table]
- )
-
- dashboard_btn.click(
- plot_history_dashboard,
- outputs=[dashboard_plot, history_status]
- )
-
- export_csv_btn.click(
- export_history_csv,
- outputs=[csv_file, history_status]
- )
-
- export_excel_btn.click(
- export_history_excel,
- outputs=[excel_file, history_status]
- )
+ export_csv_btn.click(
+ lambda: app.data_handler.export_data(app.history.get_all(), 'csv'),
+ outputs=[csv_download, history_status]
+ )
+
+ export_json_btn.click(
+ lambda: app.data_handler.export_data(app.history.get_all(), 'json'),
+ outputs=[json_download, history_status]
+ )
- clear_btn.click(
- clear_all_history,
- outputs=history_status
- )
+ return demo
+# Application Entry Point
if __name__ == "__main__":
- demo.launch(share=True)
\ No newline at end of file
+ logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+ )
+
+ try:
+ demo = create_interface()
+ demo.launch(
+ share=True,
+ server_name="0.0.0.0",
+ server_port=7860,
+ show_error=True
+ )
+ except Exception as e:
+ logger.error(f"Failed to launch application: {e}")
+ raise
\ No newline at end of file