diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -7,43 +7,18 @@ from plotly.subplots import make_subplots import numpy as np from wordcloud import WordCloud from collections import Counter, defaultdict -import re -import json -import csv -import io -import tempfile -from datetime import datetime -import logging -from functools import lru_cache from dataclasses import dataclass -from typing import List, Dict, Optional, Tuple +from typing import List, Dict, Optional, Tuple, Any, Callable +from contextlib import contextmanager import nltk from nltk.corpus import stopwords import langdetect import pandas as pd - -# Try to import SHAP and LIME, fall back to basic analysis if not available -try: - import shap - SHAP_AVAILABLE = True -except ImportError: - SHAP_AVAILABLE = False - logger.warning("SHAP not available, using basic analysis") - -try: - from lime.lime_text import LimeTextExplainer - LIME_AVAILABLE = True -except ImportError: - LIME_AVAILABLE = False - logger.warning("LIME not available, using basic analysis") +import gc # Configuration -@dataclass -class Config: - MAX_HISTORY_SIZE: int = 500 - BATCH_SIZE_LIMIT: int = 30 - MAX_TEXT_LENGTH: int = 512 - CACHE_SIZE: int = 64 + CACHE_SIZE: int = 128 + BATCH_PROCESSING_SIZE: int = 8 # Supported languages and models SUPPORTED_LANGUAGES = { @@ -60,9 +35,10 @@ class Config: 'en': "cardiffnlp/twitter-roberta-base-sentiment-latest", 'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment", 'zh': "uer/roberta-base-finetuned-dianping-chinese" + } - # Color themes + # Color themes for Plotly THEMES = { 'default': {'pos': '#4CAF50', 'neg': '#F44336', 'neu': '#FF9800'}, 'ocean': {'pos': '#0077BE', 'neg': '#FF6B35', 'neu': '#00BCD4'}, @@ -84,16 +60,59 @@ try: except: STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'} +# Decorators and Context Managers +def handle_errors(default_return=None): + """Centralized error handling decorator""" + return decorator + +@contextmanager +def memory_cleanup(): + """Context manager for memory cleanup""" + + try: + yield + finally: + + gc.collect() + +class ThemeContext: + self.theme = theme + self.colors = config.THEMES.get(theme, config.THEMES['default']) + +# Enhanced Model Manager with Multi-language Support class ModelManager: - """Manages multiple language models""" + """Multi-language model manager with lazy loading""" + _instance = None + + + + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._initialized = False + return cls._instance + def __init__(self): - self.models = {} - self.tokenizers = {} - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self._load_default_model() + if not self._initialized: + self.models = {} + self.tokenizers = {} + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self._load_default_models() + self._initialized = True - def _load_default_model(self): - """Load the default models""" + def _load_default_models(self): + """Load default models""" + + + + + + + + + + try: # Load multilingual model as default model_name = config.MODELS['multilingual'] @@ -117,17 +136,13 @@ class ModelManager: """Get model for specific language""" if language == 'zh': return self.models['zh'], self.tokenizers['zh'] - elif language in ['en', 'auto'] or language not in config.SUPPORTED_LANGUAGES: - return self.models['default'], self.tokenizers['default'] - return self.models['default'], self.tokenizers['default'] # Use multilingual for other languages + return self.models['default'], self.tokenizers['default'] @staticmethod def detect_language(text: str) -> str: - """Detect text language properly""" + """Detect text language""" try: - # Use langdetect for all languages detected = langdetect.detect(text) - # Map some common langdetect codes to our supported languages language_mapping = { 'zh-cn': 'zh', 'zh-tw': 'zh' @@ -137,32 +152,80 @@ class ModelManager: except: return 'en' -model_manager = ModelManager() +# Simplified Text Processing +class TextProcessor: + """Optimized text processing with multi-language support""" + + @staticmethod + @lru_cache(maxsize=config.CACHE_SIZE) + def clean_text(text: str, remove_punctuation: bool = True, remove_numbers: bool = False) -> str: + """Clean text with language awareness""" + text = text.strip() + + # Don't clean Chinese text aggressively + if re.search(r'[\u4e00-\u9fff]', text): + return text + + text = text.lower() + + if remove_numbers: + text = re.sub(r'\d+', '', text) + + if remove_punctuation: + text = re.sub(r'[^\w\s]', '', text) + + words = text.split() + cleaned_words = [w for w in words if w not in STOP_WORDS and len(w) >= config.MIN_WORD_LENGTH] + return ' '.join(cleaned_words) + + @staticmethod + def extract_keywords(text: str, top_k: int = 5) -> List[str]: + """Extract keywords with language support""" + if re.search(r'[\u4e00-\u9fff]', text): + # Chinese text processing + words = re.findall(r'[\u4e00-\u9fff]+', text) + all_chars = ''.join(words) + char_freq = Counter(all_chars) + return [char for char, _ in char_freq.most_common(top_k)] + else: + # Other languages + cleaned = TextProcessor.clean_text(text) + words = cleaned.split() + word_freq = Counter(words) + return [word for word, _ in word_freq.most_common(top_k)] + + @staticmethod + def parse_batch_input(text: str) -> List[str]: + """Parse batch input from textarea""" + lines = text.strip().split('\n') + return [line.strip() for line in lines if line.strip()] +# Enhanced History Manager class HistoryManager: - """Enhanced history manager with more features""" + """Enhanced history management with filtering""" def __init__(self): self._history = [] - def add_entry(self, entry: Dict): + def add(self, entry: Dict): + """Add entry with timestamp""" + entry['timestamp'] = datetime.now().isoformat() self._history.append(entry) if len(self._history) > config.MAX_HISTORY_SIZE: self._history = self._history[-config.MAX_HISTORY_SIZE:] - def add_batch_entries(self, entries: List[Dict]): - """Add multiple entries at once""" + def add_batch(self, entries: List[Dict]): + """Add multiple entries""" for entry in entries: - self.add_entry(entry) + self.add(entry) - def get_history(self) -> List[Dict]: + def get_all(self) -> List[Dict]: return self._history.copy() - def get_recent_history(self, n: int = 10) -> List[Dict]: - """Get n most recent entries""" + def get_recent(self, n: int = 10) -> List[Dict]: return self._history[-n:] if self._history else [] - def filter_history(self, sentiment: str = None, language: str = None, - min_confidence: float = None) -> List[Dict]: + def filter_by(self, sentiment: str = None, language: str = None, + min_confidence: float = None) -> List[Dict]: """Filter history by criteria""" filtered = self._history @@ -178,9 +241,12 @@ class HistoryManager: def clear(self) -> int: count = len(self._history) self._history.clear() - return count + + def size(self) -> int: + return len(self._history) def get_stats(self) -> Dict: + """Get comprehensive statistics""" if not self._history: return {} @@ -197,72 +263,109 @@ class HistoryManager: 'max_confidence': np.max(confidences), 'min_confidence': np.min(confidences), 'languages_detected': len(set(languages)), - 'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en', - 'avg_text_length': np.mean([len(item.get('full_text', '')) for item in self._history]) + 'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en' } -history_manager = HistoryManager() - -class TextProcessor: - """Enhanced text processing""" +# Core Sentiment Analysis Engine +class SentimentEngine: + """Multi-language sentiment analysis engine""" - @staticmethod - @lru_cache(maxsize=config.CACHE_SIZE) - def clean_text(text: str, remove_punctuation: bool = True, remove_numbers: bool = False) -> str: - """Clean text with options""" - text = text.lower().strip() - - if remove_numbers: - text = re.sub(r'\d+', '', text) - - if remove_punctuation: - text = re.sub(r'[^\w\s]', '', text) - - words = text.split() - cleaned_words = [w for w in words if w not in STOP_WORDS and len(w) > 2] - return ' '.join(cleaned_words) + def __init__(self): + self.model_manager = ModelManager() - @staticmethod - def extract_keywords(text: str, top_k: int = 5) -> List[str]: - """Extract key words from text""" - # For Chinese text, extract characters - if re.search(r'[\u4e00-\u9fff]', text): - words = re.findall(r'[\u4e00-\u9fff]+', text) - all_chars = ''.join(words) - char_freq = Counter(all_chars) - return [char for char, _ in char_freq.most_common(top_k)] - else: - # For other languages, use word-based extraction - cleaned = TextProcessor.clean_text(text) - words = cleaned.split() - word_freq = Counter(words) - return [word for word, _ in word_freq.most_common(top_k)] + def extract_attention_keywords(self, text: str, language: str = 'auto', top_k: int = 10) -> List[Tuple[str, float]]: + """Extract keywords using attention weights""" + try: + if language == 'auto': + language = self.model_manager.detect_language(text) + + model, tokenizer = self.model_manager.get_model(language) + + inputs = tokenizer( + text, return_tensors="pt", padding=True, + truncation=True, max_length=config.MAX_TEXT_LENGTH + ).to(self.model_manager.device) + + + with torch.no_grad(): + outputs = model(**inputs, output_attentions=True) + + + if hasattr(outputs, 'attentions') and outputs.attentions: + # Use attention weights + attention = outputs.attentions[-1] + avg_attention = attention.mean(dim=1)[0, 0, :] + + tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0]) + attention_scores = avg_attention.cpu().numpy() + + # Process tokens and scores + word_scores = {} + current_word = "" + current_score = 0.0 + + for token, score in zip(tokens, attention_scores): + if token in ['[CLS]', '[SEP]', '[PAD]', '', '']: + continue + + if token.startswith('##') or token.startswith('▁'): + current_word += token.replace('##', '').replace('▁', '') + current_score = max(current_score, score) + else: + if current_word and len(current_word) >= config.MIN_WORD_LENGTH: + word_scores[current_word.lower()] = current_score + current_word = token + current_score = score + + + + + + + + if current_word and len(current_word) >= config.MIN_WORD_LENGTH: + word_scores[current_word.lower()] = current_score + + # Filter and sort + filtered_words = { + word: score for word, score in word_scores.items() + if word not in STOP_WORDS and len(word) >= config.MIN_WORD_LENGTH + } + + sorted_words = sorted(filtered_words.items(), key=lambda x: x[1], reverse=True) + return sorted_words[:top_k] + + - @staticmethod - def parse_batch_input(text: str) -> List[str]: - """Parse batch input from textarea""" - lines = text.strip().split('\n') - return [line.strip() for line in lines if line.strip()] -class SentimentAnalyzer: - """Enhanced sentiment analysis""" + + + + + + except Exception as e: + logger.error(f"Attention keyword extraction failed: {e}") + + # Fallback to simple keyword extraction + keywords = TextProcessor.extract_keywords(text, top_k) + return [(word, 0.1) for word in keywords] - @staticmethod - def analyze_text(text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict: - """Analyze single text with language support""" + @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'keywords': []}) + def analyze_single(self, text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict: + """Analyze single text with enhanced features""" if not text.strip(): raise ValueError("Empty text provided") - # Detect language if auto + # Detect language if language == 'auto': - detected_lang = model_manager.detect_language(text) + detected_lang = self.model_manager.detect_language(text) else: detected_lang = language # Get appropriate model - model, tokenizer = model_manager.get_model(detected_lang) + model, tokenizer = self.model_manager.get_model(detected_lang) - # Preprocessing options - don't clean Chinese text + # Preprocessing options = preprocessing_options or {} processed_text = text if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text): @@ -272,324 +375,112 @@ class SentimentAnalyzer: options.get('remove_numbers', False) ) - try: - # Tokenize and analyze - inputs = tokenizer(processed_text, return_tensors="pt", padding=True, - truncation=True, max_length=config.MAX_TEXT_LENGTH).to(model_manager.device) - - with torch.no_grad(): - outputs = model(**inputs) - probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0] - - # Handle different model outputs - if len(probs) == 3: # negative, neutral, positive - sentiment_idx = np.argmax(probs) - sentiment_labels = ['Negative', 'Neutral', 'Positive'] - sentiment = sentiment_labels[sentiment_idx] - confidence = float(probs[sentiment_idx]) - - result = { - 'sentiment': sentiment, - 'confidence': confidence, - 'neg_prob': float(probs[0]), - 'neu_prob': float(probs[1]), - 'pos_prob': float(probs[2]), - 'has_neutral': True - } - else: # negative, positive - pred = np.argmax(probs) - sentiment = "Positive" if pred == 1 else "Negative" - confidence = float(probs[pred]) - - result = { - 'sentiment': sentiment, - 'confidence': confidence, - 'neg_prob': float(probs[0]), - 'pos_prob': float(probs[1]), - 'neu_prob': 0.0, - 'has_neutral': False - } - - # Add metadata - result.update({ - 'language': detected_lang, - 'keywords': TextProcessor.extract_keywords(text), - 'word_count': len(text.split()), - 'char_count': len(text) - }) - - return result - - except Exception as e: - logger.error(f"Analysis failed: {e}") - raise - - @staticmethod - def analyze_batch(texts: List[str], language: str = 'auto', - preprocessing_options: Dict = None) -> List[Dict]: - """Analyze multiple texts""" - results = [] - for i, text in enumerate(texts): - try: - result = SentimentAnalyzer.analyze_text(text, language, preprocessing_options) - result['batch_index'] = i - results.append(result) - except Exception as e: - # Add error result - results.append({ - 'sentiment': 'Error', - 'confidence': 0.0, - 'error': str(e), - 'batch_index': i, - 'text': text - }) - return results + # Tokenize and analyze + inputs = tokenizer(processed_text, return_tensors="pt", padding=True, + truncation=True, max_length=config.MAX_TEXT_LENGTH).to(self.model_manager.device) -class ExplainabilityAnalyzer: - """SHAP and LIME explainability analysis with fallbacks""" - - @staticmethod - def create_prediction_function(model, tokenizer, device): - """Create prediction function for LIME""" - def predict_proba(texts): - if isinstance(texts, str): - texts = [texts] - - results = [] - for text in texts: - try: - inputs = tokenizer(text, return_tensors="pt", padding=True, - truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device) - with torch.no_grad(): - outputs = model(**inputs) - probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0] - results.append(probs) - except Exception as e: - # Return neutral probabilities on error - if len(results) > 0: - results.append(results[0]) # Use previous result - else: - results.append(np.array([0.33, 0.33, 0.34])) # Neutral fallback - - return np.array(results) - return predict_proba - - @staticmethod - def analyze_with_lime(text: str, model, tokenizer, device, num_features: int = 10) -> Dict: - """Analyze text with LIME""" - if not LIME_AVAILABLE: - return {'method': 'LIME', 'error': 'LIME library not available'} - try: - # Create prediction function - predict_fn = ExplainabilityAnalyzer.create_prediction_function(model, tokenizer, device) - - # Test prediction function first - test_probs = predict_fn([text]) - if len(test_probs) == 0: - return {'method': 'LIME', 'error': 'Prediction function failed'} - - # Determine class names based on model output - num_classes = len(test_probs[0]) - if num_classes == 3: - class_names = ['Negative', 'Neutral', 'Positive'] - else: - class_names = ['Negative', 'Positive'] - - # Initialize LIME explainer - explainer = LimeTextExplainer( - class_names=class_names, - feature_selection='auto', - split_expression=r'\W+', - bow=False - ) - - # Generate explanation - explanation = explainer.explain_instance( - text, - predict_fn, - num_features=min(num_features, len(text.split())), - num_samples=50 # Reduced for faster processing - ) - - # Extract feature importance - feature_importance = explanation.as_list() + with torch.no_grad(): + outputs = model(**inputs) + probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0] + + # Handle different model outputs + if len(probs) == 3: # negative, neutral, positive + sentiment_idx = np.argmax(probs) + sentiment_labels = ['Negative', 'Neutral', 'Positive'] + sentiment = sentiment_labels[sentiment_idx] + confidence = float(probs[sentiment_idx]) - return { - 'method': 'LIME', - 'feature_importance': feature_importance, - 'class_names': class_names + result = { + 'sentiment': sentiment, + 'confidence': confidence, + 'neg_prob': float(probs[0]), + 'neu_prob': float(probs[1]), + 'pos_prob': float(probs[2]), + 'has_neutral': True } + else: # negative, positive + pred = np.argmax(probs) + sentiment = "Positive" if pred == 1 else "Negative" + confidence = float(probs[pred]) - except Exception as e: - logger.error(f"LIME analysis failed: {e}") - return {'method': 'LIME', 'error': str(e)} - - @staticmethod - def analyze_with_attention(text: str, model, tokenizer, device) -> Dict: - """Analyze text with attention weights - simplified version""" - try: - # Tokenize input - inputs = tokenizer(text, return_tensors="pt", padding=True, - truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device) - - # Get tokens for display - tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0]) - - # Simple attention simulation based on input importance - # This is a fallback when model doesn't support attention output - try: - with torch.no_grad(): - outputs = model(**inputs, output_attentions=True) - if hasattr(outputs, 'attentions') and outputs.attentions is not None: - attentions = outputs.attentions - # Average attention across layers and heads - avg_attention = torch.mean(torch.stack(attentions), dim=(0, 1, 2)).cpu().numpy() - else: - raise AttributeError("No attention outputs") - except: - # Fallback: simulate attention based on token position and type - avg_attention = np.random.uniform(0.1, 1.0, len(tokens)) - # Give higher attention to non-special tokens - for i, token in enumerate(tokens): - if token in ['[CLS]', '[SEP]', '', '', '']: - avg_attention[i] *= 0.3 - - # Create attention weights for each token - attention_weights = [] - for i, token in enumerate(tokens): - if i < len(avg_attention): - # Clean token for display - clean_token = token.replace('Ġ', '').replace('##', '') - if clean_token.strip(): - attention_weights.append((clean_token, float(avg_attention[i]))) - - return { - 'method': 'Attention', - 'tokens': [t[0] for t in attention_weights], - 'attention_weights': attention_weights + result = { + 'sentiment': sentiment, + 'confidence': confidence, + 'neg_prob': float(probs[0]), + 'pos_prob': float(probs[1]), + 'neu_prob': 0.0, + 'has_neutral': False } - - except Exception as e: - logger.error(f"Attention analysis failed: {e}") - return {'method': 'Attention', 'error': str(e)} - -class AdvancedVisualizer: - """Visualizations for explainability analysis""" - - @staticmethod - def create_lime_plot(lime_result: Dict, theme: str = 'default') -> go.Figure: - """Create LIME feature importance plot""" - if 'error' in lime_result: - fig = go.Figure() - fig.add_annotation(text=f"LIME Error: {lime_result['error']}", - x=0.5, y=0.5, showarrow=False) - return fig - - features, scores = zip(*lime_result['feature_importance']) - colors = ['red' if score < 0 else 'green' for score in scores] - fig = go.Figure(data=[ - go.Bar( - y=features, - x=scores, - orientation='h', - marker_color=colors, - text=[f'{score:.3f}' for score in scores], - textposition='auto' - ) - ]) + # Extract keywords + keywords = self.extract_attention_keywords(text, detected_lang) - fig.update_layout( - title="LIME Feature Importance", - xaxis_title="Importance Score", - yaxis_title="Features", - height=400, - showlegend=False - ) + # Add metadata + result.update({ + 'language': detected_lang, + 'keywords': keywords, + 'word_count': len(text.split()), + 'char_count': len(text) + }) - return fig + return result - @staticmethod - def create_attention_plot(attention_result: Dict, theme: str = 'default') -> go.Figure: - """Create attention weights visualization""" - if 'error' in attention_result: - fig = go.Figure() - fig.add_annotation( - text=f"Attention Error: {attention_result['error']}", - x=0.5, y=0.5, - xref="paper", yref="paper", - showarrow=False, - font=dict(size=14) - ) - fig.update_layout(height=400, title="Attention Analysis Error") - return fig - - if not attention_result.get('attention_weights'): - fig = go.Figure() - fig.add_annotation( - text="No attention weights available", - x=0.5, y=0.5, - xref="paper", yref="paper", - showarrow=False - ) - fig.update_layout(height=400, title="No Attention Data") - return fig - - tokens, weights = zip(*attention_result['attention_weights']) - - # Normalize weights for better visualization - weights = np.array(weights) - if weights.max() > weights.min(): - normalized_weights = (weights - weights.min()) / (weights.max() - weights.min()) - else: - normalized_weights = weights - - # Limit display to top 15 tokens for readability - if len(tokens) > 15: - # Get top 15 by attention weight - top_indices = np.argsort(weights)[-15:] - tokens = [tokens[i] for i in top_indices] - normalized_weights = normalized_weights[top_indices] - - fig = go.Figure(data=[ - go.Bar( - x=list(range(len(tokens))), - y=normalized_weights, - text=tokens, - textposition='outside', - marker_color=normalized_weights, - colorscale='Viridis', - hovertemplate='%{text}
Weight: %{y:.3f}' - ) - ]) + @handle_errors(default_return=[]) + def analyze_batch(self, texts: List[str], language: str = 'auto', + preprocessing_options: Dict = None, progress_callback=None) -> List[Dict]: + """Optimized batch processing""" + if len(texts) > config.BATCH_SIZE_LIMIT: + texts = texts[:config.BATCH_SIZE_LIMIT] - fig.update_layout( - title="Attention Weights (Top Tokens)", - xaxis_title="Token Position", - yaxis_title="Attention Weight (Normalized)", - height=400, - showlegend=False, - xaxis=dict(tickmode='array', tickvals=list(range(len(tokens))), ticktext=tokens) - ) + if progress_callback: + progress_callback((i + len(batch)) / len(texts)) + + for text in batch: + try: + result = self.analyze_single(text, language, preprocessing_options) + result['batch_index'] = len(results) + result['text'] = text[:100] + '...' if len(text) > 100 else text + result['full_text'] = text + results.append(result) + except Exception as e: + results.append({ + 'sentiment': 'Error', + 'confidence': 0.0, + 'error': str(e), + 'batch_index': len(results), + 'text': text[:100] + '...' if len(text) > 100 else text, + 'full_text': text + }) + + + + + + + - return fig - """Enhanced visualizations with Plotly""" + return results + +# Advanced Plotly Visualization System +class PlotlyVisualizer: + """Enhanced Plotly visualizations""" @staticmethod - def create_sentiment_gauge(result: Dict, theme: str = 'default') -> go.Figure: - """Create an animated sentiment gauge""" - colors = config.THEMES[theme] + @handle_errors(default_return=None) + def create_sentiment_gauge(result: Dict, theme: ThemeContext) -> go.Figure: + """Create animated sentiment gauge""" + colors = theme.colors - if result['has_neutral']: + if result.get('has_neutral', False): # Three-way gauge fig = go.Figure(go.Indicator( - mode = "gauge+number+delta", - value = result['pos_prob'] * 100, - domain = {'x': [0, 1], 'y': [0, 1]}, - title = {'text': f"Sentiment: {result['sentiment']}"}, - delta = {'reference': 50}, - gauge = { + mode="gauge+number+delta", + value=result['pos_prob'] * 100, + domain={'x': [0, 1], 'y': [0, 1]}, + title={'text': f"Sentiment: {result['sentiment']}"}, + delta={'reference': 50}, + gauge={ 'axis': {'range': [None, 100]}, 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']}, 'steps': [ @@ -607,11 +498,11 @@ class AdvancedVisualizer: else: # Two-way gauge fig = go.Figure(go.Indicator( - mode = "gauge+number", - value = result['confidence'] * 100, - domain = {'x': [0, 1], 'y': [0, 1]}, - title = {'text': f"Confidence: {result['sentiment']}"}, - gauge = { + mode="gauge+number", + value=result['confidence'] * 100, + domain={'x': [0, 1], 'y': [0, 1]}, + title={'text': f"Confidence: {result['sentiment']}"}, + gauge={ 'axis': {'range': [None, 100]}, 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']}, 'steps': [ @@ -625,11 +516,12 @@ class AdvancedVisualizer: return fig @staticmethod - def create_probability_bars(result: Dict, theme: str = 'default') -> go.Figure: + @handle_errors(default_return=None) + def create_probability_bars(result: Dict, theme: ThemeContext) -> go.Figure: """Create probability bar chart""" - colors = config.THEMES[theme] + colors = theme.colors - if result['has_neutral']: + if result.get('has_neutral', False): labels = ['Negative', 'Neutral', 'Positive'] values = [result['neg_prob'], result['neu_prob'], result['pos_prob']] bar_colors = [colors['neg'], colors['neu'], colors['pos']] @@ -639,10 +531,10 @@ class AdvancedVisualizer: bar_colors = [colors['neg'], colors['pos']] fig = go.Figure(data=[ - go.Bar(x=labels, y=values, marker_color=bar_colors, text=[f'{v:.3f}' for v in values]) + go.Bar(x=labels, y=values, marker_color=bar_colors, + text=[f'{v:.3f}' for v in values], textposition='outside') ]) - fig.update_traces(texttemplate='%{text}', textposition='outside') fig.update_layout( title="Sentiment Probabilities", yaxis_title="Probability", @@ -651,14 +543,71 @@ class AdvancedVisualizer: ) return fig + + @staticmethod + @handle_errors(default_return=None) + def create_keyword_chart(keywords: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> go.Figure: + """Create keyword importance chart""" + if not keywords: + fig = go.Figure() + fig.add_annotation(text="No keywords extracted", + xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False) + fig.update_layout(height=400, title="Keywords") + + + + + + + + + + + + + + + + + + + return fig + + words = [word for word, score in keywords] + scores = [score for word, score in keywords] + + color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg'] + + fig = go.Figure(data=[ + go.Bar( + y=words, + x=scores, + orientation='h', + marker_color=color, + text=[f'{score:.3f}' for score in scores], + textposition='auto' + ) + ]) + + fig.update_layout( + title=f"Top Keywords ({sentiment})", + xaxis_title="Attention Weight", + yaxis_title="Keywords", + height=400, + showlegend=False + ) + + return fig + @staticmethod - def create_batch_summary(results: List[Dict], theme: str = 'default') -> go.Figure: + @handle_errors(default_return=None) + def create_batch_summary(results: List[Dict], theme: ThemeContext) -> go.Figure: """Create batch analysis summary""" - colors = config.THEMES[theme] + colors = theme.colors # Count sentiments - sentiments = [r['sentiment'] for r in results if 'sentiment' in r] + sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error'] sentiment_counts = Counter(sentiments) # Create pie chart @@ -677,7 +626,16 @@ class AdvancedVisualizer: return fig + + + + + + + + @staticmethod + @handle_errors(default_return=None) def create_confidence_distribution(results: List[Dict]) -> go.Figure: """Create confidence distribution plot""" confidences = [r['confidence'] for r in results if 'confidence' in r and r['sentiment'] != 'Error'] @@ -702,7 +660,8 @@ class AdvancedVisualizer: return fig @staticmethod - def create_history_dashboard(history: List[Dict]) -> go.Figure: + @handle_errors(default_return=None) + def create_history_dashboard(history: List[Dict], theme: ThemeContext) -> go.Figure: """Create comprehensive history dashboard""" if len(history) < 2: return go.Figure() @@ -718,13 +677,15 @@ class AdvancedVisualizer: # Extract data indices = list(range(len(history))) - pos_probs = [item['pos_prob'] for item in history] + pos_probs = [item.get('pos_prob', 0) for item in history] confidences = [item['confidence'] for item in history] sentiments = [item['sentiment'] for item in history] languages = [item.get('language', 'en') for item in history] # Sentiment timeline - colors = ['#4CAF50' if s == 'Positive' else '#F44336' for s in sentiments] + colors_map = {'Positive': theme.colors['pos'], 'Negative': theme.colors['neg'], 'Neutral': theme.colors['neu']} + colors = [colors_map.get(s, '#999999') for s in sentiments] + fig.add_trace( go.Scatter(x=indices, y=pos_probs, mode='lines+markers', marker=dict(color=colors, size=8), @@ -748,33 +709,113 @@ class AdvancedVisualizer: # Sentiment summary sent_counts = Counter(sentiments) + sent_colors = [colors_map.get(k, '#999999') for k in sent_counts.keys()] fig.add_trace( go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()), - marker_color=['#4CAF50' if k == 'Positive' else '#F44336' for k in sent_counts.keys()]), + marker_color=sent_colors), row=2, col=2 ) fig.update_layout(height=800, showlegend=False) return fig -# Main application functions -def analyze_single_text(text: str, language: str, theme: str, clean_text: bool, +# Universal Data Handler +class DataHandler: + """Enhanced data operations""" + + @staticmethod + @handle_errors(default_return=(None, "Export failed")) + def export_data(data: List[Dict], format_type: str) -> Tuple[Optional[str], str]: + """Export data with comprehensive information""" + if not data: + return None, "No data to export" + + + if format_type == 'csv': + writer = csv.writer(temp_file) + writer.writerow(['Timestamp', 'Text', 'Sentiment', 'Confidence', 'Language', + 'Pos_Prob', 'Neg_Prob', 'Neu_Prob', 'Keywords', 'Word_Count']) + for entry in data: + keywords_str = "|".join([f"{word}:{score:.3f}" for word, score in entry.get('keywords', [])]) + writer.writerow([ + entry.get('timestamp', ''), + entry.get('text', ''), + entry.get('sentiment', ''), + f"{entry.get('confidence', 0):.4f}", + entry.get('language', 'en'), + f"{entry.get('pos_prob', 0):.4f}", + f"{entry.get('neg_prob', 0):.4f}", + f"{entry.get('neu_prob', 0):.4f}", + keywords_str, + entry.get('word_count', 0) + ]) + elif format_type == 'json': + json.dump(data, temp_file, indent=2, ensure_ascii=False) + temp_file.close() + return temp_file.name, f"Exported {len(data)} entries" + + + @staticmethod + @handle_errors(default_return="") + def process_file(file) -> str: + """Process uploaded files""" + if not file: + return "" + + content = file.read().decode('utf-8') + + if file.name.endswith('.csv'): + + csv_file = io.StringIO(content) + reader = csv.reader(csv_file) + try: + next(reader) # Skip header + texts = [] + for row in reader: + if row and row[0].strip(): + text = row[0].strip().strip('"') + if text: + texts.append(text) + return '\n'.join(texts) + except: + lines = content.strip().split('\n')[1:] + texts = [] + for line in lines: + if line.strip(): + if text: + texts.append(text) + return '\n'.join(texts) + + return content + +# Main Application Class +class SentimentApp: + """Main multilingual sentiment analysis application""" + + def __init__(self): + self.engine = SentimentEngine() + self.history = HistoryManager() + self.data_handler = DataHandler() + + # Multi-language examples + self.examples = [ + ["This movie was absolutely fantastic! The acting was superb and the plot kept me engaged throughout."], + ["The film was disappointing with poor character development and a confusing storyline."], + ["这部电影真的很棒!演技精湛,情节引人入胜。"], # Chinese + ["Esta película fue increíble, me encantó la cinematografía."], # Spanish + ["Ce film était magnifique, j'ai adoré la réalisation."], # French + ] + + + @handle_errors(default_return=("Please enter text", None, None, None)) + def analyze_single(self, text: str, language: str, theme: str, clean_text: bool, remove_punct: bool, remove_nums: bool): - """Enhanced single text analysis""" - try: + """Single text analysis with enhanced visualizations""" if not text.strip(): - return "Please enter text", None, None - - # Map display names back to language codes - language_map = { - 'Auto Detect': 'auto', - 'English': 'en', - 'Chinese': 'zh', - 'Spanish': 'es', - 'French': 'fr', - 'German': 'de', - 'Swedish': 'sv' - } + return "Please enter text", None, None, None + + # Map display names to language codes + language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()} language_code = language_map.get(language, 'auto') preprocessing_options = { @@ -783,46 +824,48 @@ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool, 'remove_numbers': remove_nums } - result = SentimentAnalyzer.analyze_text(text, language_code, preprocessing_options) - - # Add to history - history_entry = { - 'text': text[:100] + '...' if len(text) > 100 else text, - 'full_text': text, - 'sentiment': result['sentiment'], - 'confidence': result['confidence'], - 'pos_prob': result['pos_prob'], - 'neg_prob': result['neg_prob'], - 'neu_prob': result.get('neu_prob', 0), - 'language': result['language'], - 'timestamp': datetime.now().isoformat(), - 'analysis_type': 'single' - } - history_manager.add_entry(history_entry) - - # Create visualizations - gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme) - bars_fig = PlotlyVisualizer.create_probability_bars(result, theme) - - # Create info text - info_text = f""" + with memory_cleanup(): + result = self.engine.analyze_single(text, language_code, preprocessing_options) + + # Add to history + history_entry = { + 'text': text[:100] + '...' if len(text) > 100 else text, + 'full_text': text, + 'sentiment': result['sentiment'], + 'confidence': result['confidence'], + 'pos_prob': result.get('pos_prob', 0), + 'neg_prob': result.get('neg_prob', 0), + 'neu_prob': result.get('neu_prob', 0), + 'language': result['language'], + 'keywords': result['keywords'], + 'word_count': result['word_count'], + 'analysis_type': 'single' + } + self.history.add(history_entry) + + # Create visualizations + theme_ctx = ThemeContext(theme) + gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme_ctx) + bars_fig = PlotlyVisualizer.create_probability_bars(result, theme_ctx) + keyword_fig = PlotlyVisualizer.create_keyword_chart(result['keywords'], result['sentiment'], theme_ctx) + + # Create comprehensive result text + keywords_str = ", ".join([f"{word}({score:.3f})" for word, score in result['keywords'][:5]]) + + info_text = f""" **Analysis Results:** - **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence) - **Language:** {result['language'].upper()} -- **Keywords:** {', '.join(result['keywords'])} -- **Stats:** {result['word_count']} words, {result['char_count']} characters - """ - - return info_text, gauge_fig, bars_fig - - except Exception as e: - logger.error(f"Analysis failed: {e}") - return f"Error: {str(e)}", None, None - -def analyze_batch_texts(batch_text: str, language: str, theme: str, - clean_text: bool, remove_punct: bool, remove_nums: bool): - """Batch text analysis""" - try: +- **Keywords:** {keywords_str} +- **Statistics:** {result['word_count']} words, {result['char_count']} characters + """ + + return info_text, gauge_fig, bars_fig, keyword_fig + + @handle_errors(default_return=("Please enter texts", None, None, None)) + def analyze_batch(self, batch_text: str, language: str, theme: str, + clean_text: bool, remove_punct: bool, remove_nums: bool): + """Enhanced batch analysis""" if not batch_text.strip(): return "Please enter texts (one per line)", None, None, None @@ -835,16 +878,8 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str, if not texts: return "No valid texts found", None, None, None - # Map display names back to language codes - language_map = { - 'Auto Detect': 'auto', - 'English': 'en', - 'Chinese': 'zh', - 'Spanish': 'es', - 'French': 'fr', - 'German': 'de', - 'Swedish': 'sv' - } + # Map display names to language codes + language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()} language_code = language_map.get(language, 'auto') preprocessing_options = { @@ -853,616 +888,669 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str, 'remove_numbers': remove_nums } - # Analyze all texts - results = SentimentAnalyzer.analyze_batch(texts, language_code, preprocessing_options) - - # Add to history - batch_entries = [] - for i, (text, result) in enumerate(zip(texts, results)): - if 'error' not in result: - entry = { - 'text': text[:100] + '...' if len(text) > 100 else text, - 'full_text': text, - 'sentiment': result['sentiment'], - 'confidence': result['confidence'], - 'pos_prob': result['pos_prob'], - 'neg_prob': result['neg_prob'], - 'neu_prob': result.get('neu_prob', 0), - 'language': result['language'], - 'timestamp': datetime.now().isoformat(), - 'analysis_type': 'batch', - 'batch_index': i - } - batch_entries.append(entry) - - history_manager.add_batch_entries(batch_entries) - - # Create visualizations - summary_fig = PlotlyVisualizer.create_batch_summary(results, theme) - confidence_fig = PlotlyVisualizer.create_confidence_distribution(results) - - # Create results table - df_data = [] - for i, (text, result) in enumerate(zip(texts, results)): - if 'error' in result: - df_data.append({ - 'Index': i+1, - 'Text': text[:50] + '...' if len(text) > 50 else text, - 'Sentiment': 'Error', - 'Confidence': 0.0, - 'Language': 'Unknown', - 'Error': result['error'] - }) - else: - df_data.append({ - 'Index': i+1, - 'Text': text[:50] + '...' if len(text) > 50 else text, - 'Sentiment': result['sentiment'], - 'Confidence': f"{result['confidence']:.3f}", - 'Language': result['language'].upper(), - 'Keywords': ', '.join(result['keywords'][:3]) - }) - - df = pd.DataFrame(df_data) - - # Summary info - successful_results = [r for r in results if 'error' not in r] - error_count = len(results) - len(successful_results) - - if successful_results: - sentiment_counts = Counter([r['sentiment'] for r in successful_results]) - avg_confidence = np.mean([r['confidence'] for r in successful_results]) + with memory_cleanup(): + results = self.engine.analyze_batch(texts, language_code, preprocessing_options) + + # Add to history + batch_entries = [] + for result in results: + if 'error' not in result: + entry = { + 'text': result['text'], + 'full_text': result['full_text'], + 'sentiment': result['sentiment'], + 'confidence': result['confidence'], + 'pos_prob': result.get('pos_prob', 0), + 'neg_prob': result.get('neg_prob', 0), + 'neu_prob': result.get('neu_prob', 0), + 'language': result['language'], + 'keywords': result['keywords'], + 'word_count': result['word_count'], + 'analysis_type': 'batch', + 'batch_index': result['batch_index'] + } + batch_entries.append(entry) + + self.history.add_batch(batch_entries) + + # Create visualizations + theme_ctx = ThemeContext(theme) + summary_fig = PlotlyVisualizer.create_batch_summary(results, theme_ctx) + confidence_fig = PlotlyVisualizer.create_confidence_distribution(results) + + # Create results DataFrame + df_data = [] + for result in results: + if 'error' in result: + df_data.append({ + 'Index': result['batch_index'] + 1, + 'Text': result['text'], + 'Sentiment': 'Error', + 'Confidence': 0.0, + 'Language': 'Unknown', + 'Error': result['error'] + }) + else: + keywords_str = ', '.join([word for word, _ in result['keywords'][:3]]) + df_data.append({ + 'Index': result['batch_index'] + 1, + 'Text': result['text'], + 'Sentiment': result['sentiment'], + 'Confidence': f"{result['confidence']:.3f}", + 'Language': result['language'].upper(), + 'Keywords': keywords_str + }) - summary_text = f""" + df = pd.DataFrame(df_data) + + # Create summary text + successful_results = [r for r in results if 'error' not in r] + error_count = len(results) - len(successful_results) + + if successful_results: + sentiment_counts = Counter([r['sentiment'] for r in successful_results]) + avg_confidence = np.mean([r['confidence'] for r in successful_results]) + languages = Counter([r['language'] for r in successful_results]) + + summary_text = f""" **Batch Analysis Summary:** - **Total Texts:** {len(texts)} - **Successful:** {len(successful_results)} - **Errors:** {error_count} - **Average Confidence:** {avg_confidence:.3f} - **Sentiments:** {dict(sentiment_counts)} - """ - else: - summary_text = f"All {len(texts)} texts failed to analyze." - - return summary_text, df, summary_fig, confidence_fig +- **Languages Detected:** {dict(languages)} + """ + else: + summary_text = f"All {len(texts)} texts failed to analyze." + + return summary_text, df, summary_fig, confidence_fig + + @handle_errors(default_return=(None, "No history available")) + def plot_history(self, theme: str = 'default'): + """Plot comprehensive history analysis""" + history = self.history.get_all() + if len(history) < 2: + return None, f"Need at least 2 analyses for trends. Current: {len(history)}" - except Exception as e: - logger.error(f"Batch analysis failed: {e}") - return f"Error: {str(e)}", None, None, None -def analyze_advanced_text(text: str, language: str, theme: str, use_lime: bool, - use_attention: bool, lime_features: int): - """Advanced analysis with SHAP and LIME explainability""" - try: - if not text.strip(): - return "Please enter text", None, None, None, None - - # Map display names back to language codes - language_map = { - 'Auto Detect': 'auto', - 'English': 'en', - 'Chinese': 'zh', - 'Spanish': 'es', - 'French': 'fr', - 'German': 'de', - 'Swedish': 'sv' - } - language_code = language_map.get(language, 'auto') - - # Basic sentiment analysis first - result = SentimentAnalyzer.analyze_text(text, language_code) - - # Create basic visualizations first - gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme) - bars_fig = PlotlyVisualizer.create_probability_bars(result, theme) - - # Initialize explainability results - lime_result = None - attention_result = None - lime_plot = None - attention_plot = None + theme_ctx = ThemeContext(theme) + - # Get model for explainability analysis - try: - model, tokenizer = model_manager.get_model(language_code) + with memory_cleanup(): + fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx) + stats = self.history.get_stats() - # LIME Analysis - if use_lime: - lime_result = ExplainabilityAnalyzer.analyze_with_lime( - text, model, tokenizer, model_manager.device, lime_features - ) - lime_plot = AdvancedVisualizer.create_lime_plot(lime_result, theme) - else: - # Create empty plot - lime_plot = go.Figure() - lime_plot.add_annotation(text="LIME analysis disabled", x=0.5, y=0.5, - xref="paper", yref="paper", showarrow=False) - lime_plot.update_layout(height=400, title="LIME Analysis (Disabled)") + stats_text = f""" +**History Statistics:** +- **Total Analyses:** {stats.get('total_analyses', 0)} +- **Positive:** {stats.get('positive_count', 0)} +- **Negative:** {stats.get('negative_count', 0)} +- **Neutral:** {stats.get('neutral_count', 0)} +- **Average Confidence:** {stats.get('avg_confidence', 0):.3f} +- **Languages:** {stats.get('languages_detected', 0)} +- **Most Common Language:** {stats.get('most_common_language', 'N/A').upper()} + """ - # Attention Analysis - if use_attention: - attention_result = ExplainabilityAnalyzer.analyze_with_attention( - text, model, tokenizer, model_manager.device - ) - attention_plot = AdvancedVisualizer.create_attention_plot(attention_result, theme) - else: - # Create empty plot - attention_plot = go.Figure() - attention_plot.add_annotation(text="Attention analysis disabled", x=0.5, y=0.5, - xref="paper", yref="paper", showarrow=False) - attention_plot.update_layout(height=400, title="Attention Analysis (Disabled)") + return fig, stats_text + + @handle_errors(default_return=("No data available",)) + def get_history_status(self): + """Get current history status""" + stats = self.history.get_stats() + if not stats: + return "No analyses performed yet" + + return f""" +**Current Status:** +- **Total Analyses:** {stats['total_analyses']} +- **Recent Sentiment Distribution:** + * Positive: {stats['positive_count']} + * Negative: {stats['negative_count']} + * Neutral: {stats['neutral_count']} +- **Average Confidence:** {stats['avg_confidence']:.3f} +- **Languages Detected:** {stats['languages_detected']} + """ + +# Gradio Interface +def create_interface(): + """Create comprehensive Gradio interface""" + app = SentimentApp() + + with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo: + gr.Markdown("# 🌍 Advanced Multilingual Sentiment Analyzer") + gr.Markdown("AI-powered sentiment analysis with support for multiple languages, advanced visualizations, and explainable AI features") + + with gr.Tab("Single Analysis"): + with gr.Row(): + with gr.Column(): + text_input = gr.Textbox( + label="Enter Text for Analysis", + placeholder="Enter your text in any supported language...", + lines=5 + ) + + with gr.Row(): + language_selector = gr.Dropdown( + choices=list(config.SUPPORTED_LANGUAGES.values()), + value="Auto Detect", + label="Language" + ) + theme_selector = gr.Dropdown( + choices=list(config.THEMES.keys()), + value="default", + label="Theme" + ) + + with gr.Row(): + clean_text_cb = gr.Checkbox(label="Clean Text", value=False) + remove_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False) + remove_nums_cb = gr.Checkbox(label="Remove Numbers", value=False) + + analyze_btn = gr.Button("Analyze", variant="primary", size="lg") + + gr.Examples( + examples=app.examples, + inputs=text_input, + cache_examples=False + ) - except Exception as e: - logger.error(f"Explainability analysis failed: {e}") - # Create error plots - lime_plot = go.Figure() - lime_plot.add_annotation(text=f"Analysis Error: {str(e)}", x=0.5, y=0.5, - xref="paper", yref="paper", showarrow=False) - lime_plot.update_layout(height=400, title="Analysis Error") + with gr.Column(): + result_output = gr.Textbox(label="Analysis Results", lines=8) - attention_plot = go.Figure() - attention_plot.add_annotation(text=f"Analysis Error: {str(e)}", x=0.5, y=0.5, - xref="paper", yref="paper", showarrow=False) - attention_plot.update_layout(height=400, title="Analysis Error") - - # Add to history - history_entry = { - 'text': text[:100] + '...' if len(text) > 100 else text, - 'full_text': text, - 'sentiment': result['sentiment'], - 'confidence': result['confidence'], - 'pos_prob': result['pos_prob'], - 'neg_prob': result['neg_prob'], - 'neu_prob': result.get('neu_prob', 0), - 'language': result['language'], - 'timestamp': datetime.now().isoformat(), - 'analysis_type': 'advanced', - 'explainability_used': use_lime or use_attention - } - history_manager.add_entry(history_entry) + with gr.Row(): + gauge_plot = gr.Plot(label="Sentiment Gauge") + probability_plot = gr.Plot(label="Probability Distribution") + + with gr.Row(): + keyword_plot = gr.Plot(label="Key Contributing Words") + + with gr.Tab("Batch Analysis"): + with gr.Row(): + with gr.Column(): + file_upload = gr.File( + label="Upload File (CSV/TXT)", + file_types=[".csv", ".txt"] + ) + batch_input = gr.Textbox( + label="Batch Input (one text per line)", + placeholder="Enter multiple texts, one per line...", + lines=10 + ) + + with gr.Row(): + batch_language = gr.Dropdown( + choices=list(config.SUPPORTED_LANGUAGES.values()), + value="Auto Detect", + label="Language" + ) + batch_theme = gr.Dropdown( + choices=list(config.THEMES.keys()), + value="default", + label="Theme" + ) + + with gr.Row(): + batch_clean_cb = gr.Checkbox(label="Clean Text", value=False) + batch_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False) + batch_nums_cb = gr.Checkbox(label="Remove Numbers", value=False) + + with gr.Row(): + load_file_btn = gr.Button("Load File") + analyze_batch_btn = gr.Button("Analyze Batch", variant="primary") + + with gr.Column(): + batch_summary = gr.Textbox(label="Batch Summary", lines=8) + batch_results_df = gr.Dataframe( + label="Detailed Results", + headers=["Index", "Text", "Sentiment", "Confidence", "Language", "Keywords"], + datatype=["number", "str", "str", "str", "str", "str"] + ) + + with gr.Row(): + batch_plot = gr.Plot(label="Batch Analysis Summary") + confidence_dist_plot = gr.Plot(label="Confidence Distribution") + + with gr.Tab("History & Analytics"): + with gr.Row(): + with gr.Column(): + with gr.Row(): + refresh_history_btn = gr.Button("Refresh History") + clear_history_btn = gr.Button("Clear History", variant="stop") + status_btn = gr.Button("Get Status") + + history_theme = gr.Dropdown( + choices=list(config.THEMES.keys()), + value="default", + label="Dashboard Theme" + ) + + with gr.Row(): + export_csv_btn = gr.Button("Export CSV") + export_json_btn = gr.Button("Export JSON") + + with gr.Column(): + history_status = gr.Textbox(label="History Status", lines=8) + + history_dashboard = gr.Plot(label="History Analytics Dashboard") + + with gr.Row(): + csv_download = gr.File(label="CSV Download", visible=True) + json_download = gr.File(label="JSON Download", visible=True) + + # Event Handlers + analyze_btn.click( + app.analyze_single, + inputs=[text_input, language_selector, theme_selector, + clean_text_cb, remove_punct_cb, remove_nums_cb], + outputs=[result_output, gauge_plot, probability_plot, keyword_plot] + ) - # Create detailed info text - info_text = f""" -**Advanced Analysis Results:** -- **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence) -- **Language:** {result['language'].upper()} -- **Text Statistics:** - - Words: {result['word_count']} - - Characters: {result['char_count']} - - Average word length: {result['char_count']/max(result['word_count'], 1):.1f} -- **Keywords:** {', '.join(result['keywords'])} - -**Explainability Analysis:** - """ + load_file_btn.click( + app.data_handler.process_file, + inputs=file_upload, + outputs=batch_input + ) - if use_lime: - if lime_result and 'error' not in lime_result: - info_text += f"\n- **LIME:** ✅ Analyzed top {lime_features} features" - else: - error_msg = lime_result.get('error', 'Unknown error') if lime_result else 'Not available' - info_text += f"\n- **LIME:** ❌ {error_msg}" - else: - info_text += f"\n- **LIME:** ⏸️ Disabled" + analyze_batch_btn.click( + app.analyze_batch, + inputs=[batch_input, batch_language, batch_theme, + batch_clean_cb, batch_punct_cb, batch_nums_cb], + outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot] + ) - if use_attention: - if attention_result and 'error' not in attention_result: - info_text += f"\n- **Attention:** ✅ Token-level attention weights computed" - else: - error_msg = attention_result.get('error', 'Unknown error') if attention_result else 'Not available' - info_text += f"\n- **Attention:** ❌ {error_msg}" - else: - info_text += f"\n- **Attention:** ⏸️ Disabled" + refresh_history_btn.click( + app.plot_history, + inputs=history_theme, + outputs=[history_dashboard, history_status] + ) - return info_text, gauge_fig, bars_fig, lime_plot, attention_plot + clear_history_btn.click( + lambda: f"Cleared {app.history.clear()} entries", + outputs=history_status + ) - except Exception as e: - logger.error(f"Advanced analysis failed: {e}") - # Return basic empty plots on complete failure - empty_fig = go.Figure() - empty_fig.add_annotation(text=f"Analysis failed: {str(e)}", x=0.5, y=0.5, - xref="paper", yref="paper", showarrow=False) - empty_fig.update_layout(height=400) - - return f"Error: {str(e)}", empty_fig, empty_fig, empty_fig, empty_fig - -def get_history_stats(): - """Get enhanced history statistics""" - stats = history_manager.get_stats() - if not stats: - return "No analysis history available" - - return f""" -**Comprehensive History Statistics:** - -**Analysis Counts:** -- Total Analyses: {stats['total_analyses']} -- Positive: {stats['positive_count']} -- Negative: {stats['negative_count']} -- Neutral: {stats['neutral_count']} - -**Confidence Metrics:** -- Average Confidence: {stats['avg_confidence']:.3f} -- Highest Confidence: {stats['max_confidence']:.3f} -- Lowest Confidence: {stats['min_confidence']:.3f} - -**Language Statistics:** -- Languages Detected: {stats['languages_detected']} -- Most Common Language: {stats['most_common_language'].upper()} - -**Text Statistics:** -- Average Text Length: {stats['avg_text_length']:.1f} characters - """ - -def filter_history_display(sentiment_filter: str, language_filter: str, min_confidence: float): - """Display filtered history""" - # Convert filters - sentiment = sentiment_filter if sentiment_filter != "All" else None - language = language_filter.lower() if language_filter != "All" else None - - filtered_history = history_manager.filter_history( - sentiment=sentiment, - language=language, - min_confidence=min_confidence if min_confidence > 0 else None - ) - - if not filtered_history: - return "No entries match the filter criteria", None - - # Create DataFrame for display - df_data = [] - for entry in filtered_history[-20:]: # Show last 20 entries - df_data.append({ - 'Timestamp': entry['timestamp'][:16], # YYYY-MM-DD HH:MM - 'Text': entry['text'], - 'Sentiment': entry['sentiment'], - 'Confidence': f"{entry['confidence']:.3f}", - 'Language': entry['language'].upper(), - 'Type': entry.get('analysis_type', 'single') - }) - - df = pd.DataFrame(df_data) - - summary = f""" -**Filtered Results:** -- Found {len(filtered_history)} entries matching criteria -- Showing most recent {min(20, len(filtered_history))} entries - """ + status_btn.click( + app.get_history_status, + outputs=history_status + ) + + export_csv_btn.click( + lambda: app.data_handler.export_data(app.history.get_all(), 'csv'), + outputs=[csv_download, history_status] + ) + + export_json_btn.click( + lambda: app.data_handler.export_data(app.history.get_all(), 'json'), + outputs=[json_download, history_status] + ) - return summary, df + return demo -def plot_history_dashboard(): - """Create history dashboard""" - history = history_manager.get_history() - if len(history) < 2: - return None, "Need at least 2 analyses for dashboard" - - fig = PlotlyVisualizer.create_history_dashboard(history) - return fig, f"Dashboard showing {len(history)} analyses" - -def export_history_csv(): - """Export history to CSV""" - history = history_manager.get_history() - if not history: - return None, "No history to export" +# Application Entry Point +if __name__ == "__main__": + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) try: - df = pd.DataFrame(history) - temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w') - df.to_csv(temp_file.name, index=False) - return temp_file.name, f"Exported {len(history)} entries to CSV" + demo = create_interface() + demo.launch( + share=True, + server_name="0.0.0.0", + server_port=7860, + show_error=True + ) except Exception as e: - return None, f"Export failed: {str(e)}" - -def export_history_excel(): - """Export history to Excel""" - history = history_manager.get_history() - if not history: - return None, "No history to export" + logger.error(f"Failed to launch application: {e}") + raise - try: - df = pd.DataFrame(history) - temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') - df.to_excel(temp_file.name, index=False) - return temp_file.name, f"Exported {len(history)} entries to Excel" - except Exception as e: - return None, f"Export failed: {str(e)}" - -def clear_all_history(): - """Clear analysis history""" - count = history_manager.clear() - return f"Cleared {count} entries from history" - -def get_recent_analyses(): - """Get recent analysis summary""" - recent = history_manager.get_recent_history(10) - if not recent: - return "No recent analyses available" + @handle_errors(default_return=("Please enter texts", None, None, None)) + def analyze_batch(self, batch_text: str, language: str, theme: str, + clean_text: bool, remove_punct: bool, remove_nums: bool): + """Enhanced batch analysis""" + if not batch_text.strip(): + return "Please enter texts (one per line)", None, None, None + + # Parse batch input + texts = TextProcessor.parse_batch_input(batch_text) + + if len(texts) > config.BATCH_SIZE_LIMIT: + return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None + + if not texts: + return "No valid texts found", None, None, None + + + # Map display names to language codes + language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()} + language_code = language_map.get(language, 'auto') + + preprocessing_options = { + 'clean_text': clean_text, + 'remove_punctuation': remove_punct, + 'remove_numbers': remove_nums + } + + with memory_cleanup(): + results = self.engine.analyze_batch(texts, language_code, preprocessing_options) + + # Add to history + batch_entries = [] + for result in results: + if 'error' not in result: + entry = { + 'text': result['text'], + 'full_text': result['full_text'], + 'sentiment': result['sentiment'], + 'confidence': result['confidence'], + 'pos_prob': result.get('pos_prob', 0), + 'neg_prob': result.get('neg_prob', 0), + 'neu_prob': result.get('neu_prob', 0), + 'language': result['language'], + 'keywords': result['keywords'], + 'word_count': result['word_count'], + 'analysis_type': 'batch', + 'batch_index': result['batch_index'] + } + batch_entries.append(entry) + + self.history.add_batch(batch_entries) + + # Create visualizations + theme_ctx = ThemeContext(theme) + summary_fig = PlotlyVisualizer.create_batch_summary(results, theme_ctx) + confidence_fig = PlotlyVisualizer.create_confidence_distribution(results) + + # Create results DataFrame + df_data = [] + for result in results: + if 'error' in result: + df_data.append({ + 'Index': result['batch_index'] + 1, + 'Text': result['text'], + 'Sentiment': 'Error', + 'Confidence': 0.0, + 'Language': 'Unknown', + 'Error': result['error'] + }) + else: + keywords_str = ', '.join([word for word, _ in result['keywords'][:3]]) + df_data.append({ + 'Index': result['batch_index'] + 1, + 'Text': result['text'], + 'Sentiment': result['sentiment'], + 'Confidence': f"{result['confidence']:.3f}", + 'Language': result['language'].upper(), + 'Keywords': keywords_str + }) + + df = pd.DataFrame(df_data) + + # Create summary text + successful_results = [r for r in results if 'error' not in r] + error_count = len(results) - len(successful_results) + + if successful_results: + sentiment_counts = Counter([r['sentiment'] for r in successful_results]) + avg_confidence = np.mean([r['confidence'] for r in successful_results]) + languages = Counter([r['language'] for r in successful_results]) + + summary_text = f""" +**Batch Analysis Summary:** +- **Total Texts:** {len(texts)} +- **Successful:** {len(successful_results)} +- **Errors:** {error_count} +- **Average Confidence:** {avg_confidence:.3f} +- **Sentiments:** {dict(sentiment_counts)} +- **Languages Detected:** {dict(languages)} + """ + else: + summary_text = f"All {len(texts)} texts failed to analyze." + + return summary_text, df, summary_fig, confidence_fig - summary_text = "**Recent Analyses (Last 10):**\n\n" - for i, entry in enumerate(recent, 1): - summary_text += f"{i}. **{entry['sentiment']}** ({entry['confidence']:.3f}) - {entry['text']}\n" + @handle_errors(default_return=(None, "No history available")) + def plot_history(self, theme: str = 'default'): + """Plot comprehensive history analysis""" + history = self.history.get_all() + if len(history) < 2: + return None, f"Need at least 2 analyses for trends. Current: {len(history)}" + + theme_ctx = ThemeContext(theme) + + with memory_cleanup(): + fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx) + stats = self.history.get_stats() + + stats_text = f""" +**History Statistics:** +- **Total Analyses:** {stats.get('total_analyses', 0)} +- **Positive:** {stats.get('positive_count', 0)} +- **Negative:** {stats.get('negative_count', 0)} +- **Neutral:** {stats.get('neutral_count', 0)} +- **Average Confidence:** {stats.get('avg_confidence', 0):.3f} +- **Languages:** {stats.get('languages_detected', 0)} +- **Most Common Language:** {stats.get('most_common_language', 'N/A').upper()} + """ + + return fig, stats_text - return summary_text - -# Sample data -SAMPLE_TEXTS = [ - # Auto Detect - ["The film had its moments, but overall it felt a bit too long and lacked emotional depth."], - - # English - ["I was completely blown away by the movie — the performances were raw and powerful, and the story stayed with me long after the credits rolled."], - - # Chinese - ["这部电影节奏拖沓,剧情老套,完全没有让我产生任何共鸣,是一次失望的观影体验。"], - - # Spanish - ["Una obra maestra del cine contemporáneo, con actuaciones sobresalientes, un guion bien escrito y una dirección impecable."], - - # French - ["Je m'attendais à beaucoup mieux. Le scénario était confus, les dialogues ennuyeux, et je me suis presque endormi au milieu du film."], - - # German - ["Der Film war ein emotionales Erlebnis mit großartigen Bildern, einem mitreißenden Soundtrack und einer Geschichte, die zum Nachdenken anregt."], - - # Swedish - ["Filmen var en besvikelse – tråkig handling, överdrivet skådespeleri och ett slut som inte gav något avslut alls."] -] - -BATCH_SAMPLE = """I love this product! It works perfectly. -The service was terrible and slow. -Not sure if I like it or not. -Amazing quality and fast delivery! -Could be better, but it's okay.""" + @handle_errors(default_return=("No data available",)) + def get_history_status(self): + """Get current history status""" + stats = self.history.get_stats() + if not stats: + return "No analyses performed yet" + + return f""" +**Current Status:** +- **Total Analyses:** {stats['total_analyses']} +- **Recent Sentiment Distribution:** + * Positive: {stats['positive_count']} + * Negative: {stats['negative_count']} + * Neutral: {stats['neutral_count']} +- **Average Confidence:** {stats['avg_confidence']:.3f} +- **Languages Detected:** {stats['languages_detected']} + """ # Gradio Interface -with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Multilingual Sentiment Analyzer") as demo: - gr.Markdown("# 🎭 Advanced Multilingual Sentiment Analyzer") - gr.Markdown("Comprehensive sentiment analysis with batch processing, advanced analytics, and multilingual support") +def create_interface(): + """Create comprehensive Gradio interface""" + app = SentimentApp() - with gr.Tab("📝 Single Analysis"): - with gr.Row(): - with gr.Column(scale=2): - text_input = gr.Textbox( - label="Text to Analyze", - placeholder="Enter your text here... (supports multiple languages)", - lines=4 - ) - - with gr.Row(): - language_select = gr.Dropdown( - choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'], - value='Auto Detect', - label="Language" + with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo: + gr.Markdown("# 🌍 Advanced Multilingual Sentiment Analyzer") + gr.Markdown("AI-powered sentiment analysis with support for multiple languages, advanced visualizations, and explainable AI features") + + with gr.Tab("Single Analysis"): + with gr.Row(): + with gr.Column(): + text_input = gr.Textbox( + label="Enter Text for Analysis", + placeholder="Enter your text in any supported language...", + lines=5 ) - theme_select = gr.Dropdown( - choices=list(config.THEMES.keys()), - value='default', - label="Theme" + + with gr.Row(): + language_selector = gr.Dropdown( + choices=list(config.SUPPORTED_LANGUAGES.values()), + value="Auto Detect", + label="Language" + ) + theme_selector = gr.Dropdown( + choices=list(config.THEMES.keys()), + value="default", + label="Theme" + ) + + with gr.Row(): + clean_text_cb = gr.Checkbox(label="Clean Text", value=False) + remove_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False) + remove_nums_cb = gr.Checkbox(label="Remove Numbers", value=False) + + analyze_btn = gr.Button("Analyze", variant="primary", size="lg") + + gr.Examples( + examples=app.examples, + inputs=text_input, + cache_examples=False ) - with gr.Row(): - clean_text = gr.Checkbox(label="Clean Text", value=False) - remove_punct = gr.Checkbox(label="Remove Punctuation", value=True) - remove_nums = gr.Checkbox(label="Remove Numbers", value=False) - - analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg") - - gr.Examples( - examples=SAMPLE_TEXTS, - inputs=text_input, - label="Sample Texts (Multiple Languages)" - ) + with gr.Column(): + result_output = gr.Textbox(label="Analysis Results", lines=8) - with gr.Column(scale=1): - result_info = gr.Markdown("Enter text and click Analyze") - - with gr.Row(): - gauge_plot = gr.Plot(label="Sentiment Gauge") - bars_plot = gr.Plot(label="Probability Distribution") - - with gr.Tab("🔬 Advanced Analysis"): - with gr.Row(): - with gr.Column(scale=2): - advanced_input = gr.Textbox( - label="Text for Advanced Analysis", - placeholder="Enter text for explainability analysis...", - lines=4 - ) - - with gr.Row(): - advanced_language = gr.Dropdown( - choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'], - value='Auto Detect', - label="Language" + with gr.Row(): + gauge_plot = gr.Plot(label="Sentiment Gauge") + probability_plot = gr.Plot(label="Probability Distribution") + + with gr.Row(): + + keyword_plot = gr.Plot(label="Key Contributing Words") + + with gr.Tab("Batch Analysis"): + with gr.Row(): + with gr.Column(): + file_upload = gr.File( + label="Upload File (CSV/TXT)", + file_types=[".csv", ".txt"] ) - advanced_theme = gr.Dropdown( - choices=list(config.THEMES.keys()), - value='default', - label="Theme" + batch_input = gr.Textbox( + label="Batch Input (one text per line)", + placeholder="Enter multiple texts, one per line...", + lines=10 ) + + with gr.Row(): + batch_language = gr.Dropdown( + choices=list(config.SUPPORTED_LANGUAGES.values()), + value="Auto Detect", + label="Language" + ) + batch_theme = gr.Dropdown( + choices=list(config.THEMES.keys()), + value="default", + label="Theme" + ) + + with gr.Row(): + batch_clean_cb = gr.Checkbox(label="Clean Text", value=False) + batch_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False) + batch_nums_cb = gr.Checkbox(label="Remove Numbers", value=False) + + with gr.Row(): + load_file_btn = gr.Button("Load File") + analyze_batch_btn = gr.Button("Analyze Batch", variant="primary") - gr.Markdown("### 🔍 Explainability Options") - with gr.Row(): - use_lime = gr.Checkbox(label="Use LIME Analysis", value=True) - use_attention = gr.Checkbox(label="Use Attention Weights", value=True) - - lime_features = gr.Slider( - minimum=5, - maximum=20, - value=10, - step=1, - label="LIME Features Count" - ) - - advanced_analyze_btn = gr.Button("🔬 Advanced Analyze", variant="primary", size="lg") - - with gr.Column(scale=1): - advanced_result_info = gr.Markdown("Configure explainability settings and click Advanced Analyze") - - with gr.Row(): - advanced_gauge_plot = gr.Plot(label="Sentiment Gauge") - advanced_bars_plot = gr.Plot(label="Probability Distribution") - - with gr.Row(): - lime_plot = gr.Plot(label="LIME Feature Importance") - attention_plot = gr.Plot(label="Attention Weights") - - with gr.Tab("📊 Batch Analysis"): - with gr.Row(): - with gr.Column(scale=2): - batch_input = gr.Textbox( - label="Batch Text Input (One text per line)", - placeholder="Enter multiple texts, one per line...", - lines=8 - ) - - with gr.Row(): - batch_language = gr.Dropdown( - choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'], - value='Auto Detect', - label="Language" + with gr.Column(): + batch_summary = gr.Textbox(label="Batch Summary", lines=8) + batch_results_df = gr.Dataframe( + label="Detailed Results", + headers=["Index", "Text", "Sentiment", "Confidence", "Language", "Keywords"], + datatype=["number", "str", "str", "str", "str", "str"] ) - batch_theme = gr.Dropdown( + + with gr.Row(): + batch_plot = gr.Plot(label="Batch Analysis Summary") + confidence_dist_plot = gr.Plot(label="Confidence Distribution") + + with gr.Tab("History & Analytics"): + with gr.Row(): + with gr.Column(): + with gr.Row(): + refresh_history_btn = gr.Button("Refresh History") + clear_history_btn = gr.Button("Clear History", variant="stop") + status_btn = gr.Button("Get Status") + + history_theme = gr.Dropdown( choices=list(config.THEMES.keys()), - value='default', - label="Theme" + value="default", + label="Dashboard Theme" ) + + with gr.Row(): + export_csv_btn = gr.Button("Export CSV") + export_json_btn = gr.Button("Export JSON") - with gr.Row(): - batch_clean = gr.Checkbox(label="Clean Text", value=False) - batch_remove_punct = gr.Checkbox(label="Remove Punctuation", value=True) - batch_remove_nums = gr.Checkbox(label="Remove Numbers", value=False) - - batch_analyze_btn = gr.Button("🔍 Analyze Batch", variant="primary", size="lg") - - gr.Examples( - examples=[[BATCH_SAMPLE]], - inputs=batch_input, - label="Sample Batch Input" - ) + with gr.Column(): + history_status = gr.Textbox(label="History Status", lines=8) - with gr.Column(scale=1): - batch_summary = gr.Markdown("Enter texts and click Analyze Batch") + history_dashboard = gr.Plot(label="History Analytics Dashboard") + + with gr.Row(): + csv_download = gr.File(label="CSV Download", visible=True) + json_download = gr.File(label="JSON Download", visible=True) + + + + + - with gr.Row(): - batch_results_table = gr.DataFrame( - label="Detailed Results", - interactive=False - ) + # Event Handlers + analyze_btn.click( + app.analyze_single, + inputs=[text_input, language_selector, theme_selector, + clean_text_cb, remove_punct_cb, remove_nums_cb], + outputs=[result_output, gauge_plot, probability_plot, keyword_plot] + ) - with gr.Row(): - batch_summary_plot = gr.Plot(label="Sentiment Summary") - batch_confidence_plot = gr.Plot(label="Confidence Distribution") - - with gr.Tab("📈 History & Analytics"): - with gr.Row(): - with gr.Column(): - gr.Markdown("### 📊 Statistics") - stats_btn = gr.Button("📈 Get Statistics") - recent_btn = gr.Button("🕒 Recent Analyses") - stats_output = gr.Markdown("Click 'Get Statistics' to view analysis history") - - with gr.Column(): - gr.Markdown("### 🔍 Filter History") - with gr.Row(): - sentiment_filter = gr.Dropdown( - choices=["All", "Positive", "Negative", "Neutral"], - value="All", - label="Filter by Sentiment" - ) - language_filter = gr.Dropdown( - choices=["All", "English", "Chinese", "Spanish", "French", "German", "Swedish"], - value="All", - label="Filter by Language" - ) - - confidence_filter = gr.Slider( - minimum=0.0, - maximum=1.0, - value=0.0, - step=0.1, - label="Minimum Confidence" - ) - - filter_btn = gr.Button("🔍 Filter History") + load_file_btn.click( + app.data_handler.process_file, + inputs=file_upload, + outputs=batch_input + ) - with gr.Row(): - dashboard_btn = gr.Button("📊 View Dashboard") - clear_btn = gr.Button("🗑️ Clear History", variant="stop") + analyze_batch_btn.click( + app.analyze_batch, + inputs=[batch_input, batch_language, batch_theme, + batch_clean_cb, batch_punct_cb, batch_nums_cb], + outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot] + ) - with gr.Row(): - export_csv_btn = gr.Button("📄 Export CSV") - export_excel_btn = gr.Button("📊 Export Excel") + refresh_history_btn.click( + app.plot_history, + inputs=history_theme, + outputs=[history_dashboard, history_status] + ) - dashboard_plot = gr.Plot(label="Analytics Dashboard") + clear_history_btn.click( + lambda: f"Cleared {app.history.clear()} entries", + outputs=history_status + ) - with gr.Row(): - filtered_results = gr.Markdown("Use filters to view specific entries") - filtered_table = gr.DataFrame(label="Filtered History", interactive=False) + status_btn.click( + app.get_history_status, + outputs=history_status + ) - csv_file = gr.File(label="Download CSV Report") - excel_file = gr.File(label="Download Excel Report") - history_status = gr.Textbox(label="Status", interactive=False) - - # Event handlers - - # Single Analysis - analyze_btn.click( - analyze_single_text, - inputs=[text_input, language_select, theme_select, clean_text, remove_punct, remove_nums], - outputs=[result_info, gauge_plot, bars_plot] - ) - - # Batch Analysis - batch_analyze_btn.click( - analyze_batch_texts, - inputs=[batch_input, batch_language, batch_theme, batch_clean, batch_remove_punct, batch_remove_nums], - outputs=[batch_summary, batch_results_table, batch_summary_plot, batch_confidence_plot] - ) - - # Advanced Analysis - advanced_analyze_btn.click( - analyze_advanced_text, - inputs=[advanced_input, advanced_language, advanced_theme, use_lime, use_attention, lime_features], - outputs=[advanced_result_info, advanced_gauge_plot, advanced_bars_plot, lime_plot, attention_plot] - ) - - # History & Analytics - stats_btn.click( - get_history_stats, - outputs=stats_output - ) - - recent_btn.click( - get_recent_analyses, - outputs=stats_output - ) - - filter_btn.click( - filter_history_display, - inputs=[sentiment_filter, language_filter, confidence_filter], - outputs=[filtered_results, filtered_table] - ) - - dashboard_btn.click( - plot_history_dashboard, - outputs=[dashboard_plot, history_status] - ) - - export_csv_btn.click( - export_history_csv, - outputs=[csv_file, history_status] - ) - - export_excel_btn.click( - export_history_excel, - outputs=[excel_file, history_status] - ) + export_csv_btn.click( + lambda: app.data_handler.export_data(app.history.get_all(), 'csv'), + outputs=[csv_download, history_status] + ) + + export_json_btn.click( + lambda: app.data_handler.export_data(app.history.get_all(), 'json'), + outputs=[json_download, history_status] + ) - clear_btn.click( - clear_all_history, - outputs=history_status - ) + return demo +# Application Entry Point if __name__ == "__main__": - demo.launch(share=True) \ No newline at end of file + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + + try: + demo = create_interface() + demo.launch( + share=True, + server_name="0.0.0.0", + server_port=7860, + show_error=True + ) + except Exception as e: + logger.error(f"Failed to launch application: {e}") + raise \ No newline at end of file