Spaces:

entropy25
/

multilingual-sentiment-analyzer

Sleeping

App Files Files Community

entropy25 commited on 21 days ago

Commit

6e4a3ac

verified ·

1 Parent(s): 0c511f2

Update app.py

Browse files

Files changed (1) hide show

app.py +1292 -773

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import plotly.graph_objects as go
 import plotly.express as px
 from plotly.subplots import make_subplots
@@ -14,936 +14,1455 @@ import io
 import tempfile
 from datetime import datetime
 import logging
-from functools import lru_cache, wraps
 from dataclasses import dataclass
-from typing import List, Dict, Optional, Tuple, Any, Callable
-from contextlib import contextmanager
-import gc
-import base64
 # Configuration
 @dataclass
 class Config:
-    MAX_HISTORY_SIZE: int = 1000
-    BATCH_SIZE_LIMIT: int = 50
     MAX_TEXT_LENGTH: int = 512
-    MIN_WORD_LENGTH: int = 2
-    CACHE_SIZE: int = 128
-    BATCH_PROCESSING_SIZE: int = 8
-    # Visualization settings
-    FIGURE_WIDTH: int = 800
-    FIGURE_HEIGHT: int = 500
-    WORDCLOUD_SIZE: Tuple[int, int] = (800, 400)
-    THEMES = {
-        'default': {'pos': '#4ecdc4', 'neg': '#ff6b6b'},
-        'ocean': {'pos': '#0077be', 'neg': '#ff6b35'},
-        'forest': {'pos': '#228b22', 'neg': '#dc143c'},
-        'sunset': {'pos': '#ff8c00', 'neg': '#8b0000'}
     }
-    # Multi-language models
     MODELS = {
-        'multilingual': {
-            'name': 'cardiffnlp/twitter-xlm-roberta-base-sentiment',
-            'labels': ['NEGATIVE', 'NEUTRAL', 'POSITIVE']
-        },
-        'english': {
-            'name': 'cardiffnlp/twitter-roberta-base-sentiment-latest',
-            'labels': ['NEGATIVE', 'NEUTRAL', 'POSITIVE']
-        },
-        'chinese': {
-            'name': 'uer/roberta-base-finetuned-chinanews-chinese',
-            'labels': ['NEGATIVE', 'POSITIVE']
-        },
-        'spanish': {
-            'name': 'finiteautomata/beto-sentiment-analysis',
-            'labels': ['NEGATIVE', 'NEUTRAL', 'POSITIVE']
-        },
-        'french': {
-            'name': 'tblard/tf-allocine',
-            'labels': ['NEGATIVE', 'POSITIVE']
-        }
     }
-    STOP_WORDS = {
-        'en': {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'will', 'would', 'could', 'should'},
-        'zh': {'的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一', '一个', '上', '也', '很', '到', '说', '要', '去', '你', '会', '着', '没有', '看'},
-        'es': {'el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo', 'le', 'da', 'su', 'por', 'son', 'con', 'para', 'al', 'del', 'los', 'las'},
-        'fr': {'le', 'la', 'les', 'de', 'un', 'une', 'du', 'des', 'et', 'à', 'ce', 'il', 'que', 'qui', 'ne', 'se', 'pas', 'tout', 'être', 'avoir', 'sur', 'avec', 'par'},
     }
 config = Config()
 logger = logging.getLogger(__name__)
-# Decorators and Context Managers
-def handle_errors(default_return=None):
-    """Centralized error handling decorator"""
-    def decorator(func: Callable) -> Callable:
-        @wraps(func)
-        def wrapper(*args, **kwargs):
-            try:
-                return func(*args, **kwargs)
-            except Exception as e:
-                logger.error(f"{func.__name__} failed: {e}")
-                return default_return if default_return is not None else f"Error: {str(e)}"
-        return wrapper
-    return decorator
-class ThemeContext:
-    """Theme management context"""
-    def __init__(self, theme: str = 'default'):
-        self.theme = theme
-        self.colors = config.THEMES.get(theme, config.THEMES['default'])
-# Enhanced Model Manager for Multi-language Support
-class ModelManager:
-    """Multi-language model manager with lazy loading"""
-    _instance = None
-    _models = {}
-    _tokenizers = {}
-    _pipelines = {}
-    _device = None
-    def __new__(cls):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-    @property
-    def device(self):
-        if self._device is None:
-            self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        return self._device
-    def get_pipeline(self, model_key: str = 'multilingual'):
-        """Get or create sentiment analysis pipeline for specified model"""
-        if model_key not in self._pipelines:
-            try:
-                model_config = config.MODELS[model_key]
-                self._pipelines[model_key] = pipeline(
-                    "sentiment-analysis",
-                    model=model_config['name'],
-                    tokenizer=model_config['name'],
-                    device=0 if torch.cuda.is_available() else -1,
-                    top_k=None
-                )
-                logger.info(f"Model {model_key} loaded successfully")
-            except Exception as e:
-                logger.error(f"Failed to load model {model_key}: {e}")
-                # Fallback to multilingual model
-                if model_key != 'multilingual':
-                    return self.get_pipeline('multilingual')
-                raise
-        return self._pipelines[model_key]
-    def get_model_and_tokenizer(self, model_key: str = 'multilingual'):
-        """Get model and tokenizer for attention extraction"""
-        if model_key not in self._models:
-            try:
-                model_config = config.MODELS[model_key]
-                self._tokenizers[model_key] = AutoTokenizer.from_pretrained(model_config['name'])
-                self._models[model_key] = AutoModelForSequenceClassification.from_pretrained(model_config['name'])
-                self._models[model_key].to(self.device)
-                logger.info(f"Model and tokenizer {model_key} loaded for attention extraction")
-            except Exception as e:
-                logger.error(f"Failed to load model/tokenizer {model_key}: {e}")
-                if model_key != 'multilingual':
-                    return self.get_model_and_tokenizer('multilingual')
-                raise
-        return self._models[model_key], self._tokenizers[model_key]
-# Language Detection
-class LanguageDetector:
-    """Simple language detection based on character patterns"""
     @staticmethod
     def detect_language(text: str) -> str:
-        """Detect language based on character patterns"""
-        # Chinese characters
-        if re.search(r'[\u4e00-\u9fff]', text):
-            return 'chinese'
-        # Spanish patterns
-        elif re.search(r'[ñáéíóúü]', text.lower()):
-            return 'spanish'
-        # French patterns
-        elif re.search(r'[àâäçéèêëïîôùûüÿ]', text.lower()):
-            return 'french'
-        # Default to English/Multilingual
-        else:
-            return 'multilingual'
-# Simplified Core Classes
-class TextProcessor:
-    """Optimized text processing with multi-language support"""
-    @staticmethod
-    @lru_cache(maxsize=config.CACHE_SIZE)
-    def clean_text(text: str, language: str = 'en') -> Tuple[str, ...]:
-        """Single-pass text cleaning with language-specific stop words"""
-        words = re.findall(r'\b\w{2,}\b', text.lower())
-        stop_words = config.STOP_WORDS.get(language, config.STOP_WORDS['en'])
-        return tuple(w for w in words if w not in stop_words and len(w) >= config.MIN_WORD_LENGTH)
 class HistoryManager:
-    """Simplified history management"""
     def __init__(self):
         self._history = []
-    def add(self, entry: Dict):
-        self._history.append({**entry, 'timestamp': datetime.now().isoformat()})
         if len(self._history) > config.MAX_HISTORY_SIZE:
             self._history = self._history[-config.MAX_HISTORY_SIZE:]
-    def get_all(self) -> List[Dict]:
         return self._history.copy()
     def clear(self) -> int:
         count = len(self._history)
         self._history.clear()
         return count
-    def size(self) -> int:
-        return len(self._history)
-# Core Analysis Engine with Multi-language Support
-class SentimentEngine:
-    """Multi-language sentiment analysis with attention-based keyword extraction"""
-    def __init__(self):
-        self.model_manager = ModelManager()
-        self.language_detector = LanguageDetector()
-    def extract_key_words(self, text: str, model_key: str = 'multilingual', top_k: int = 10) -> List[Tuple[str, float]]:
-        """Extract contributing words using attention weights"""
         try:
-            model, tokenizer = self.model_manager.get_model_and_tokenizer(model_key)
-            inputs = tokenizer(
-                text, return_tensors="pt", padding=True,
-                truncation=True, max_length=config.MAX_TEXT_LENGTH
-            ).to(self.model_manager.device)
-            # Get model outputs with attention weights
             with torch.no_grad():
-                outputs = model(**inputs, output_attentions=True)
-                attention = outputs.attentions
-                # Use the last layer's attention, average over all heads
-                last_attention = attention[-1]
-                avg_attention = last_attention.mean(dim=1)
-                # Focus on attention to [CLS] token
-                cls_attention = avg_attention[0, 0, :]
-            # Get tokens and their attention scores
-            tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
-            attention_scores = cls_attention.cpu().numpy()
-            # Filter out special tokens and combine subword tokens
-            word_scores = {}
-            current_word = ""
-            current_score = 0.0
-            for i, (token, score) in enumerate(zip(tokens, attention_scores)):
-                if token in ['[CLS]', '[SEP]', '[PAD]', '<s>', '</s>', '<pad>']:
-                    continue
-                if token.startswith('##') or token.startswith('▁'):
-                    # Subword token
-                    current_word += token[2:] if token.startswith('##') else token[1:]
-                    current_score = max(current_score, score)
-                else:
-                    # New word, save previous if exists
-                    if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
-                        word_scores[current_word.lower()] = current_score
-                    current_word = token
-                    current_score = score
-            # Don't forget the last word
-            if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
-                word_scores[current_word.lower()] = current_score
-            # Filter out stop words and sort by attention score
-            lang_code = 'zh' if model_key == 'chinese' else 'es' if model_key == 'spanish' else 'fr' if model_key == 'french' else 'en'
-            stop_words = config.STOP_WORDS.get(lang_code, config.STOP_WORDS['en'])
-            filtered_words = {
-                word: score for word, score in word_scores.items()
-                if word not in stop_words and len(word) >= config.MIN_WORD_LENGTH
             }
-            # Sort by attention score and return top_k
-            sorted_words = sorted(filtered_words.items(), key=lambda x: x[1], reverse=True)
-            return sorted_words[:top_k]
         except Exception as e:
-            logger.error(f"Key word extraction failed: {e}")
-            return []
-    @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'key_words': []})
-    def analyze_single(self, text: str, model_key: str = None) -> Dict:
-        """Analyze single text with automatic language detection"""
-        if not text.strip():
-            raise ValueError("Empty text")
-        # Auto-detect language if not specified
-        if model_key is None:
-            detected_lang = self.language_detector.detect_language(text)
-            model_key = detected_lang if detected_lang in config.MODELS else 'multilingual'
-        # Get sentiment analysis pipeline
-        classifier = self.model_manager.get_pipeline(model_key)
-        results = classifier(text)
-        # Process results based on model output format
-        if isinstance(results[0], list):
-            results = results[0]
-        # Map results to standardized format
-        sentiment_map = {'POSITIVE': 'Positive', 'NEGATIVE': 'Negative', 'NEUTRAL': 'Neutral'}
-        # Find positive and negative scores
-        pos_score = 0.0
-        neg_score = 0.0
-        neutral_score = 0.0
-        for result in results:
-            label = result['label']
-            score = result['score']
-            if 'POSITIVE' in label:
-                pos_score = score
-            elif 'NEGATIVE' in label:
-                neg_score = score
-            elif 'NEUTRAL' in label:
-                neutral_score = score
-        # Determine final sentiment
-        if pos_score > neg_score and pos_score > neutral_score:
-            sentiment = 'Positive'
-            confidence = pos_score
-        elif neg_score > pos_score and neg_score > neutral_score:
-            sentiment = 'Negative'
-            confidence = neg_score
         else:
-            sentiment = 'Neutral'
-            confidence = neutral_score
-        # Extract key contributing words
-        key_words = self.extract_key_words(text, model_key)
-        return {
-            'sentiment': sentiment,
-            'confidence': float(confidence),
-            'pos_prob': float(pos_score),
-            'neg_prob': float(neg_score),
-            'neutral_prob': float(neutral_score),
-            'key_words': key_words,
-            'language': model_key
-        }
-    @handle_errors(default_return=[])
-    def analyze_batch(self, texts: List[str], model_key: str = None, progress_callback=None) -> List[Dict]:
-        """Optimized batch processing with key words"""
-        if len(texts) > config.BATCH_SIZE_LIMIT:
-            texts = texts[:config.BATCH_SIZE_LIMIT]
-        results = []
-        for i, text in enumerate(texts):
-            if progress_callback:
-                progress_callback((i + 1) / len(texts))
-            result = self.analyze_single(text, model_key)
-            result['text'] = text[:50] + '...' if len(text) > 50 else text
-            result['full_text'] = text
-            results.append(result)
-        return results
-# Plotly Visualization System
-class PlotFactory:
-    """Factory for creating Plotly visualizations"""
     @staticmethod
-    @handle_errors(default_return=None)
-    def create_sentiment_bars(result: Dict, theme: ThemeContext) -> go.Figure:
-        """Create sentiment probability bars using Plotly"""
-        labels = []
-        values = []
-        colors = []
-        if 'neg_prob' in result and result['neg_prob'] > 0:
-            labels.append("Negative")
-            values.append(result['neg_prob'])
-            colors.append(theme.colors['neg'])
-        if 'neutral_prob' in result and result['neutral_prob'] > 0:
-            labels.append("Neutral")
-            values.append(result['neutral_prob'])
-            colors.append('#FFA500')  # Orange for neutral
-        if 'pos_prob' in result and result['pos_prob'] > 0:
-            labels.append("Positive")
-            values.append(result['pos_prob'])
-            colors.append(theme.colors['pos'])
         fig = go.Figure(data=[
-            go.Bar(
-                x=labels,
-                y=values,
-                marker_color=colors,
-                text=[f'{v:.3f}' for v in values],
-                textposition='auto',
-            )
         ])
         fig.update_layout(
             title="Sentiment Probabilities",
-            xaxis_title="Sentiment",
             yaxis_title="Probability",
-            yaxis=dict(range=[0, 1]),
-            width=config.FIGURE_WIDTH,
-            height=config.FIGURE_HEIGHT,
             showlegend=False
         )
         return fig
     @staticmethod
-    @handle_errors(default_return=None)
-    def create_confidence_gauge(confidence: float, sentiment: str, theme: ThemeContext) -> go.Figure:
-        """Create confidence gauge using Plotly"""
-        color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg'] if sentiment == 'Negative' else '#FFA500'
-        fig = go.Figure(go.Indicator(
-            mode = "gauge+number+delta",
-            value = confidence,
-            domain = {'x': [0, 1], 'y': [0, 1]},
-            title = {'text': f"{sentiment} Confidence"},
-            delta = {'reference': 0.5},
-            gauge = {
-                'axis': {'range': [None, 1]},
-                'bar': {'color': color},
-                'steps': [
-                    {'range': [0, 0.5], 'color': "lightgray"},
-                    {'range': [0.5, 1], 'color': "gray"}
-                ],
-                'threshold': {
-                    'line': {'color': "red", 'width': 4},
-                    'thickness': 0.75,
-                    'value': 0.9
-                }
-            }
-        ))
         fig.update_layout(
-            width=config.FIGURE_WIDTH,
-            height=config.FIGURE_HEIGHT
         )
         return fig
     @staticmethod
-    @handle_errors(default_return=None)
-    def create_keyword_chart(key_words: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> Optional[go.Figure]:
-        """Create horizontal bar chart for key contributing words"""
-        if not key_words:
-            return None
-        words = [word for word, score in key_words]
-        scores = [score for word, score in key_words]
-        # Choose color based on sentiment
-        color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg'] if sentiment == 'Negative' else '#FFA500'
-        fig = go.Figure(go.Bar(
-            x=scores,
-            y=words,
-            orientation='h',
-            marker_color=color,
-            text=[f'{score:.3f}' for score in scores],
-            textposition='auto',
-        ))
         fig.update_layout(
-            title=f'Top Contributing Words ({sentiment})',
-            xaxis_title='Attention Weight',
-            yaxis_title='Words',
-            width=config.FIGURE_WIDTH,
-            height=config.FIGURE_HEIGHT,
-            yaxis={'categoryorder': 'total ascending'}
         )
         return fig
     @staticmethod
-    @handle_errors(default_return=None)
-    def create_wordcloud_plot(text: str, sentiment: str, theme: ThemeContext) -> Optional[go.Figure]:
-        """Create word cloud visualization"""
-        if len(text.split()) < 3:
-            return None
-        try:
-            colormap = 'Greens' if sentiment == 'Positive' else 'Reds' if sentiment == 'Negative' else 'Blues'
-            wc = WordCloud(
-                width=config.WORDCLOUD_SIZE[0],
-                height=config.WORDCLOUD_SIZE[1],
-                background_color='white',
-                colormap=colormap,
-                max_words=30
-            ).generate(text)
-            # Convert to image
-            img_array = wc.to_array()
-            fig = go.Figure()
-            fig.add_trace(go.Image(z=img_array))
-            fig.update_layout(
-                title=f'{sentiment} Word Cloud',
-                xaxis={'visible': False},
-                yaxis={'visible': False},
-                width=config.FIGURE_WIDTH,
-                height=config.FIGURE_HEIGHT,
-                margin=dict(l=0, r=0, t=30, b=0)
-            )
-            return fig
-        except Exception as e:
-            logger.error(f"Word cloud generation failed: {e}")
-            return None
-    @staticmethod
-    @handle_errors(default_return=None)
-    def create_batch_analysis(results: List[Dict], theme: ThemeContext) -> go.Figure:
-        """Create comprehensive batch visualization using Plotly subplots"""
         fig = make_subplots(
             rows=2, cols=2,
-            subplot_titles=['Sentiment Distribution', 'Confidence Distribution',
-                           'Sentiment Progression', 'Language Distribution'],
-            specs=[[{"type": "pie"}, {"type": "histogram"}],
-                   [{"type": "scatter", "colspan": 2}, None]]
         )
-        # Sentiment distribution (pie chart)
-        sent_counts = Counter([r['sentiment'] for r in results])
-        colors_pie = [theme.colors['pos'] if s == 'Positive' else theme.colors['neg'] if s == 'Negative' else '#FFA500' for s in sent_counts.keys()]
         fig.add_trace(
-            go.Pie(labels=list(sent_counts.keys()), values=list(sent_counts.values()),
-                   marker_colors=colors_pie, name="Sentiment"),
             row=1, col=1
         )
-        # Confidence histogram
-        confs = [r['confidence'] for r in results]
         fig.add_trace(
-            go.Histogram(x=confs, nbinsx=8, marker_color='skyblue', name="Confidence"),
             row=1, col=2
         )
-        # Sentiment progression
-        pos_probs = [r.get('pos_prob', 0) for r in results]
-        indices = list(range(len(results)))
-        colors_scatter = [theme.colors['pos'] if r['sentiment'] == 'Positive'
-                         else theme.colors['neg'] if r['sentiment'] == 'Negative'
-                         else '#FFA500' for r in results]
         fig.add_trace(
-            go.Scatter(x=indices, y=pos_probs, mode='markers',
-                      marker=dict(color=colors_scatter, size=8),
-                      name="Sentiment Progression"),
             row=2, col=1
         )
-        # Add horizontal line at 0.5
-        fig.add_hline(y=0.5, line_dash="dash", line_color="gray", row=2, col=1)
-        fig.update_layout(
-            height=800,
-            width=1000,
-            showlegend=False,
-            title_text="Batch Analysis Results"
         )
         return fig
-# Unified Data Handler (unchanged)
-class DataHandler:
-    """Handles all data operations"""
-    @staticmethod
-    @handle_errors(default_return=(None, "Export failed"))
-    def export_data(data: List[Dict], format_type: str) -> Tuple[Optional[str], str]:
-        """Universal data export"""
-        if not data:
-            return None, "No data to export"
-        temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False,
-                                               suffix=f'.{format_type}', encoding='utf-8')
-        if format_type == 'csv':
-            writer = csv.writer(temp_file)
-            writer.writerow(['Timestamp', 'Text', 'Sentiment', 'Confidence', 'Pos_Prob', 'Neg_Prob', 'Neutral_Prob', 'Language', 'Key_Words'])
-            for entry in data:
-                writer.writerow([
-                    entry.get('timestamp', ''),
-                    entry.get('text', ''),
-                    entry.get('sentiment', ''),
-                    f"{entry.get('confidence', 0):.4f}",
-                    f"{entry.get('pos_prob', 0):.4f}",
-                    f"{entry.get('neg_prob', 0):.4f}",
-                    f"{entry.get('neutral_prob', 0):.4f}",
-                    entry.get('language', ''),
-                    "|".join([f"{word}:{score:.3f}" for word, score in entry.get('key_words', [])])
-                ])
-        elif format_type == 'json':
-            json.dump(data, temp_file, indent=2, ensure_ascii=False)
-        temp_file.close()
-        return temp_file.name, f"Exported {len(data)} entries"
-    @staticmethod
-    @handle_errors(default_return="")
-    def process_file(file) -> str:
-        """Process uploaded file"""
-        if not file:
-            return ""
-        content = file.read().decode('utf-8')
-        if file.name.endswith('.csv'):
-            import io
-            csv_file = io.StringIO(content)
-            reader = csv.reader(csv_file)
-            try:
-                next(reader)
-                texts = []
-                for row in reader:
-                    if row and row[0].strip():
-                        text = row[0].strip().strip('"')
-                        if text:
-                            texts.append(text)
-                return '\n'.join(texts)
-            except Exception as e:
-                lines = content.strip().split('\n')[1:]
-                texts = []
-                for line in lines:
-                    if line.strip():
-                        text = line.strip().strip('"')
-                        if text:
-                            texts.append(text)
-                return '\n'.join(texts)
-        return content
-# Main Application with Multi-language Support
-class SentimentApp:
-    """Main application orchestrator with multi-language support"""
-    def __init__(self):
-        self.engine = SentimentEngine()
-        self.history = HistoryManager()
-        self.data_handler = DataHandler()
-        # Multi-language examples
-        self.examples = [
-            ["While the film's visual effects were undeniably impressive, the story lacked emotional weight, and the pacing felt inconsistent throughout."],
-            ["这部电影的视觉效果令人印象深刻，但故事缺乏情感深度，节奏感也不够连贯。"],
-            ["Aunque los efectos visuales de la película fueron innegablemente impresionantes, la historia carecía de peso emocional."],
-            ["Bien que les effets visuels du film soient indéniablement impressionnants, l'histoire manquait de poids émotionnel."],
-            ["An extraordinary achievement in filmmaking — the direction was masterful, the script was sharp, and every performance added depth and realism."]
-        ]
-    @handle_errors(default_return=("Please enter text", None, None, None, None))
-    def analyze_single(self, text: str, model_key: str = 'multilingual', theme: str = 'default'):
-        """Single text analysis with multi-language support"""
         if not text.strip():
-            return "Please enter text", None, None, None, None
-        result = self.engine.analyze_single(text, model_key)
         # Add to history
-        self.history.add({
-            'text': text[:100],
             'full_text': text,
-            **result
-        })
         # Create visualizations
-        theme_ctx = ThemeContext(theme)
-        prob_plot = PlotFactory.create_sentiment_bars(result, theme_ctx)
-        gauge_plot = PlotFactory.create_confidence_gauge(result['confidence'], result['sentiment'], theme_ctx)
-        cloud_plot = PlotFactory.create_wordcloud_plot(text, result['sentiment'], theme_ctx)
-        keyword_plot = PlotFactory.create_keyword_chart(result['key_words'], result['sentiment'], theme_ctx)
-        # Format result text with key words
-        key_words_str = ", ".join([f"{word}({score:.3f})" for word, score in result['key_words'][:5]])
-        result_text = (f"Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.3f})\n"
-                      f"Language: {result['language']}\n"
-                      f"Key Words: {key_words_str}")
-        return result_text, prob_plot, gauge_plot, cloud_plot, keyword_plot
-    @handle_errors(default_return=None)
-    def analyze_batch(self, reviews: str, model_key: str = 'multilingual', progress=None):
-        """Batch analysis with multi-language support"""
-        if not reviews.strip():
-            return None
-        texts = [r.strip() for r in reviews.split('\n') if r.strip()]
-        if len(texts) < 2:
-            return None
-        results = self.engine.analyze_batch(texts, model_key, progress)
         # Add to history
-        for result in results:
-            self.history.add(result)
-        # Create visualization
-        theme_ctx = ThemeContext('default')
-        return PlotFactory.create_batch_analysis(results, theme_ctx)
-    @handle_errors(default_return=(None, "No history available"))
-    def plot_history(self, theme: str = 'default'):
-        """Plot analysis history using Plotly"""
-        history = self.history.get_all()
-        if len(history) < 2:
-            return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
-        theme_ctx = ThemeContext(theme)
-        # Create subplots
-        fig = make_subplots(
-            rows=2, cols=1,
-            subplot_titles=['Sentiment History', 'Confidence Over Time'],
-            vertical_spacing=0.12
-        )
-        indices = list(range(len(history)))
-        pos_probs = [item.get('pos_prob', 0) for item in history]
-        confs = [item['confidence'] for item in history]
-        # Sentiment trend
-        colors = [theme_ctx.colors['pos'] if p > 0.5 else theme_ctx.colors['neg'] for p in pos_probs]
-        fig.add_trace(
-            go.Scatter(
-                x=indices,
-                y=pos_probs,
-                mode='markers+lines',
-                marker=dict(color=colors, size=8),
-                line=dict(color='gray', width=2),
-                name='Sentiment Trend'
-            ),
-            row=1, col=1
-        )
-        # Add horizontal line at 0.5
-        fig.add_hline(y=0.5, line_dash="dash", line_color="gray", row=1, col=1)
-        # Confidence trend
-        fig.add_trace(
-            go.Bar(
-                x=indices,
-                y=confs,
-                marker_color='lightblue',
-                marker_line_color='navy',
-                marker_line_width=1,
-                name='Confidence'
-            ),
-            row=2, col=1
-        )
-        fig.update_layout(
-            height=800,
-            width=1000,
-            showlegend=False,
-            title_text="Analysis History"
-        )
-        fig.update_xaxes(title_text="Analysis Number", row=2, col=1)
-        fig.update_yaxes(title_text="Positive Probability", row=1, col=1)
-        fig.update_yaxes(title_text="Confidence", row=2, col=1)
-        return fig, f"History: {len(history)} analyses"
-# Gradio Interface Setup with Multi-language Support
-def create_interface():
-    """Create streamlined Gradio interface with multi-language support"""
-    app = SentimentApp()
-    with gr.Blocks(theme=gr.themes.Soft(), title="Multi-language Sentiment Analyzer") as demo:
-        gr.Markdown("# 🌍 AI Multi-language Sentiment Analyzer")
-        gr.Markdown("Advanced sentiment analysis supporting multiple languages with Plotly visualizations and key word extraction")
-        with gr.Tab("Single Analysis"):
-            with gr.Row():
-                with gr.Column():
-                    text_input = gr.Textbox(
-                        label="Review Text (Multiple Languages Supported)",
-                        placeholder="Enter your review in any supported language...",
-                        lines=5
                     )
-                    with gr.Row():
-                        analyze_btn = gr.Button("Analyze", variant="primary")
-                        model_selector = gr.Dropdown(
-                            choices=[
-                                ('Auto-detect', 'multilingual'),
-                                ('Multilingual', 'multilingual'),
-                                ('English', 'english'),
-                                ('Chinese 中文', 'chinese'),
-                                ('Spanish Español', 'spanish'),
-                                ('French Français', 'french')
-                            ],
-                            value="multilingual",
-                            label="Language Model"
-                        )
-                        theme_selector = gr.Dropdown(
-                            choices=list(config.THEMES.keys()),
-                            value="default",
-                            label="Theme"
-                        )
-                    gr.Examples(
-                        examples=app.examples,
-                        inputs=text_input,
-                        label="Multi-language Examples"
                     )
-                with gr.Column():
-                    result_output = gr.Textbox(label="Analysis Result", lines=4)
-            with gr.Row():
-                prob_plot = gr.Plot(label="Sentiment Probabilities")
-                gauge_plot = gr.Plot(label="Confidence Gauge")
-            with gr.Row():
-                wordcloud_plot = gr.Plot(label="Word Cloud")
-                keyword_plot = gr.Plot(label="Key Contributing Words")
-        with gr.Tab("Batch Analysis"):
-            with gr.Row():
-                with gr.Column():
-                    file_upload = gr.File(label="Upload File", file_types=[".csv", ".txt"])
-                    batch_input = gr.Textbox(
-                        label="Reviews (one per line, mixed languages supported)",
-                        lines=8,
-                        placeholder="Enter multiple reviews, one per line...\nSupports mixed languages in the same batch!"
                     )
-                with gr.Column():
-                    load_btn = gr.Button("Load File")
-                    with gr.Row():
-                        batch_btn = gr.Button("Analyze Batch", variant="primary")
-                        batch_model_selector = gr.Dropdown(
-                            choices=[
-                                ('Auto-detect', 'multilingual'),
-                                ('Multilingual', 'multilingual'),
-                                ('English', 'english'),
-                                ('Chinese 中文', 'chinese'),
-                                ('Spanish Español', 'spanish'),
-                                ('French Français', 'french')
-                            ],
-                            value="multilingual",
-                            label="Batch Model"
-                        )
-            batch_plot = gr.Plot(label="Batch Analysis Results")
-        with gr.Tab("History & Export"):
-            with gr.Row():
-                refresh_btn = gr.Button("Refresh History")
-                clear_btn = gr.Button("Clear History", variant="stop")
-                status_btn = gr.Button("Show Status")
-            with gr.Row():
-                csv_btn = gr.Button("Export CSV")
-                json_btn = gr.Button("Export JSON")
-            history_status = gr.Textbox(label="Status Information")
-            history_plot = gr.Plot(label="History Trends")
-            csv_file = gr.File(label="CSV Download", visible=True)
-            json_file = gr.File(label="JSON Download", visible=True)
-        with gr.Tab("Model Information"):
-            gr.Markdown("""
-            ## Supported Languages and Models
-            | Language | Model | Description |
-            |----------|-------|-------------|
-            | **Multilingual** | XLM-RoBERTa | Supports 100+ languages automatically |
-            | **English** | RoBERTa-base | Optimized for English text |
-            | **Chinese 中文** | RoBERTa-Chinese | Specialized for Chinese language |
-            | **Spanish Español** | BETO | Fine-tuned for Spanish sentiment |
-            | **French Français** | tf-allocine | Trained on French movie reviews |
-            ### Features:
-            - **Automatic Language Detection**: The system can automatically detect the input language
-            - **Attention-based Keywords**: Extract words that contribute most to sentiment prediction
-            - **Interactive Visualizations**: Plotly-powered charts and graphs
-            - **Batch Processing**: Analyze multiple texts at once
-            - **Export Capabilities**: Save results in CSV or JSON format
-            - **Multi-language Support**: Mix different languages in batch analysis
-            """)
-        # Event bindings
-        analyze_btn.click(
-            app.analyze_single,
-            inputs=[text_input, model_selector, theme_selector],
-            outputs=[result_output, prob_plot, gauge_plot, wordcloud_plot, keyword_plot]
-        )
-        load_btn.click(
-            app.data_handler.process_file,
-            inputs=file_upload,
-            outputs=batch_input
-        )
-        batch_btn.click(
-            app.analyze_batch,
-            inputs=[batch_input, batch_model_selector],
-            outputs=batch_plot
-        )
-        refresh_btn.click(
-            lambda theme: app.plot_history(theme),
-            inputs=theme_selector,
-            outputs=[history_plot, history_status]
-        )
-        clear_btn.click(
-            lambda: f"Cleared {app.history.clear()} entries",
-            outputs=history_status
-        )
-        status_btn.click(
-            lambda: f"History: {app.history.size()} entries | Available Models: {', '.join(config.MODELS.keys())}",
-            outputs=history_status
-        )
-        csv_btn.click(
-            lambda: app.data_handler.export_data(app.history.get_all(), 'csv'),
-            outputs=[csv_file, history_status]
-        )
-        json_btn.click(
-            lambda: app.data_handler.export_data(app.history.get_all(), 'json'),
-            outputs=[json_file, history_status]
-        )
-    return demo
-# Application Entry Point
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    demo = create_interface()
-    demo.launch(
-        share=True,
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import plotly.graph_objects as go
 import plotly.express as px
 from plotly.subplots import make_subplots
 import tempfile
 from datetime import datetime
 import logging
+from functools import lru_cache
 from dataclasses import dataclass
+from typing import List, Dict, Optional, Tuple
+import nltk
+from nltk.corpus import stopwords
+import langdetect
+import pandas as pd
+# Try to import SHAP and LIME, fall back to basic analysis if not available
+try:
+    import shap
+    SHAP_AVAILABLE = True
+except ImportError:
+    SHAP_AVAILABLE = False
+    logger.warning("SHAP not available, using basic analysis")
+try:
+    from lime.lime_text import LimeTextExplainer
+    LIME_AVAILABLE = True
+except ImportError:
+    LIME_AVAILABLE = False
+    logger.warning("LIME not available, using basic analysis")
 # Configuration
 @dataclass
 class Config:
+    MAX_HISTORY_SIZE: int = 500
+    BATCH_SIZE_LIMIT: int = 30
     MAX_TEXT_LENGTH: int = 512
+    CACHE_SIZE: int = 64
+    # Supported languages and models
+    SUPPORTED_LANGUAGES = {
+        'auto': 'Auto Detect',
+        'en': 'English',
+        'zh': 'Chinese',
+        'es': 'Spanish',
+        'fr': 'French',
+        'de': 'German',
+        'sv': 'Swedish'
     }
     MODELS = {
+        'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
+        'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
+        'zh': "uer/roberta-base-finetuned-dianping-chinese"
     }
+    # Color themes
+    THEMES = {
+        'default': {'pos': '#4CAF50', 'neg': '#F44336', 'neu': '#FF9800'},
+        'ocean': {'pos': '#0077BE', 'neg': '#FF6B35', 'neu': '#00BCD4'},
+        'dark': {'pos': '#66BB6A', 'neg': '#EF5350', 'neu': '#FFA726'},
+        'rainbow': {'pos': '#9C27B0', 'neg': '#E91E63', 'neu': '#FF5722'}
     }
 config = Config()
+# Logging setup
+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Initialize NLTK
+try:
+    nltk.download('stopwords', quiet=True)
+    nltk.download('punkt', quiet=True)
+    STOP_WORDS = set(stopwords.words('english'))
+except:
+    STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
+class ModelManager:
+    """Manages multiple language models"""
+    def __init__(self):
+        self.models = {}
+        self.tokenizers = {}
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self._load_default_model()
+    def _load_default_model(self):
+        """Load the default models"""
+        try:
+            # Load multilingual model as default
+            model_name = config.MODELS['multilingual']
+            self.tokenizers['default'] = AutoTokenizer.from_pretrained(model_name)
+            self.models['default'] = AutoModelForSequenceClassification.from_pretrained(model_name)
+            self.models['default'].to(self.device)
+            logger.info(f"Default model loaded: {model_name}")
+            # Load Chinese model
+            zh_model_name = config.MODELS['zh']
+            self.tokenizers['zh'] = AutoTokenizer.from_pretrained(zh_model_name)
+            self.models['zh'] = AutoModelForSequenceClassification.from_pretrained(zh_model_name)
+            self.models['zh'].to(self.device)
+            logger.info(f"Chinese model loaded: {zh_model_name}")
+        except Exception as e:
+            logger.error(f"Failed to load models: {e}")
+            raise
+    def get_model(self, language='en'):
+        """Get model for specific language"""
+        if language == 'zh':
+            return self.models['zh'], self.tokenizers['zh']
+        elif language in ['en', 'auto'] or language not in config.SUPPORTED_LANGUAGES:
+            return self.models['default'], self.tokenizers['default']
+        return self.models['default'], self.tokenizers['default']  # Use multilingual for other languages
     @staticmethod
     def detect_language(text: str) -> str:
+        """Detect text language properly"""
+        try:
+            # Use langdetect for all languages
+            detected = langdetect.detect(text)
+            # Map some common langdetect codes to our supported languages
+            language_mapping = {
+                'zh-cn': 'zh',
+                'zh-tw': 'zh'
+            }
+            detected = language_mapping.get(detected, detected)
+            return detected if detected in config.SUPPORTED_LANGUAGES else 'en'
+        except:
+            return 'en'
+model_manager = ModelManager()
 class HistoryManager:
+    """Enhanced history manager with more features"""
     def __init__(self):
         self._history = []
+    def add_entry(self, entry: Dict):
+        self._history.append(entry)
         if len(self._history) > config.MAX_HISTORY_SIZE:
             self._history = self._history[-config.MAX_HISTORY_SIZE:]
+    def add_batch_entries(self, entries: List[Dict]):
+        """Add multiple entries at once"""
+        for entry in entries:
+            self.add_entry(entry)
+    def get_history(self) -> List[Dict]:
         return self._history.copy()
+    def get_recent_history(self, n: int = 10) -> List[Dict]:
+        """Get n most recent entries"""
+        return self._history[-n:] if self._history else []
+    def filter_history(self, sentiment: str = None, language: str = None,
+                      min_confidence: float = None) -> List[Dict]:
+        """Filter history by criteria"""
+        filtered = self._history
+        if sentiment:
+            filtered = [h for h in filtered if h['sentiment'] == sentiment]
+        if language:
+            filtered = [h for h in filtered if h.get('language', 'en') == language]
+        if min_confidence:
+            filtered = [h for h in filtered if h['confidence'] >= min_confidence]
+        return filtered
     def clear(self) -> int:
         count = len(self._history)
         self._history.clear()
         return count
+    def get_stats(self) -> Dict:
+        if not self._history:
+            return {}
+        sentiments = [item['sentiment'] for item in self._history]
+        confidences = [item['confidence'] for item in self._history]
+        languages = [item.get('language', 'en') for item in self._history]
+        return {
+            'total_analyses': len(self._history),
+            'positive_count': sentiments.count('Positive'),
+            'negative_count': sentiments.count('Negative'),
+            'neutral_count': sentiments.count('Neutral'),
+            'avg_confidence': np.mean(confidences),
+            'max_confidence': np.max(confidences),
+            'min_confidence': np.min(confidences),
+            'languages_detected': len(set(languages)),
+            'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en',
+            'avg_text_length': np.mean([len(item.get('full_text', '')) for item in self._history])
+        }
+history_manager = HistoryManager()
+class TextProcessor:
+    """Enhanced text processing"""
+    @staticmethod
+    @lru_cache(maxsize=config.CACHE_SIZE)
+    def clean_text(text: str, remove_punctuation: bool = True, remove_numbers: bool = False) -> str:
+        """Clean text with options"""
+        text = text.lower().strip()
+        if remove_numbers:
+            text = re.sub(r'\d+', '', text)
+        if remove_punctuation:
+            text = re.sub(r'[^\w\s]', '', text)
+        words = text.split()
+        cleaned_words = [w for w in words if w not in STOP_WORDS and len(w) > 2]
+        return ' '.join(cleaned_words)
+    @staticmethod
+    def extract_keywords(text: str, top_k: int = 5) -> List[str]:
+        """Extract key words from text"""
+        # For Chinese text, extract characters
+        if re.search(r'[\u4e00-\u9fff]', text):
+            words = re.findall(r'[\u4e00-\u9fff]+', text)
+            all_chars = ''.join(words)
+            char_freq = Counter(all_chars)
+            return [char for char, _ in char_freq.most_common(top_k)]
+        else:
+            # For other languages, use word-based extraction
+            cleaned = TextProcessor.clean_text(text)
+            words = cleaned.split()
+            word_freq = Counter(words)
+            return [word for word, _ in word_freq.most_common(top_k)]
+    @staticmethod
+    def parse_batch_input(text: str) -> List[str]:
+        """Parse batch input from textarea"""
+        lines = text.strip().split('\n')
+        return [line.strip() for line in lines if line.strip()]
+class SentimentAnalyzer:
+    """Enhanced sentiment analysis"""
+    @staticmethod
+    def analyze_text(text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
+        """Analyze single text with language support"""
+        if not text.strip():
+            raise ValueError("Empty text provided")
+        # Detect language if auto
+        if language == 'auto':
+            detected_lang = model_manager.detect_language(text)
+        else:
+            detected_lang = language
+        # Get appropriate model
+        model, tokenizer = model_manager.get_model(detected_lang)
+        # Preprocessing options - don't clean Chinese text
+        options = preprocessing_options or {}
+        processed_text = text
+        if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
+            processed_text = TextProcessor.clean_text(
+                text,
+                options.get('remove_punctuation', True),
+                options.get('remove_numbers', False)
+            )
         try:
+            # Tokenize and analyze
+            inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
+                             truncation=True, max_length=config.MAX_TEXT_LENGTH).to(model_manager.device)
             with torch.no_grad():
+                outputs = model(**inputs)
+                probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
+            # Handle different model outputs
+            if len(probs) == 3:  # negative, neutral, positive
+                sentiment_idx = np.argmax(probs)
+                sentiment_labels = ['Negative', 'Neutral', 'Positive']
+                sentiment = sentiment_labels[sentiment_idx]
+                confidence = float(probs[sentiment_idx])
+                result = {
+                    'sentiment': sentiment,
+                    'confidence': confidence,
+                    'neg_prob': float(probs[0]),
+                    'neu_prob': float(probs[1]),
+                    'pos_prob': float(probs[2]),
+                    'has_neutral': True
+                }
+            else:  # negative, positive
+                pred = np.argmax(probs)
+                sentiment = "Positive" if pred == 1 else "Negative"
+                confidence = float(probs[pred])
+                result = {
+                    'sentiment': sentiment,
+                    'confidence': confidence,
+                    'neg_prob': float(probs[0]),
+                    'pos_prob': float(probs[1]),
+                    'neu_prob': 0.0,
+                    'has_neutral': False
+                }
+            # Add metadata
+            result.update({
+                'language': detected_lang,
+                'keywords': TextProcessor.extract_keywords(text),
+                'word_count': len(text.split()),
+                'char_count': len(text)
+            })
+            return result
+        except Exception as e:
+            logger.error(f"Analysis failed: {e}")
+            raise
+    @staticmethod
+    def analyze_batch(texts: List[str], language: str = 'auto',
+                     preprocessing_options: Dict = None) -> List[Dict]:
+        """Analyze multiple texts"""
+        results = []
+        for i, text in enumerate(texts):
+            try:
+                result = SentimentAnalyzer.analyze_text(text, language, preprocessing_options)
+                result['batch_index'] = i
+                results.append(result)
+            except Exception as e:
+                # Add error result
+                results.append({
+                    'sentiment': 'Error',
+                    'confidence': 0.0,
+                    'error': str(e),
+                    'batch_index': i,
+                    'text': text
+                })
+        return results
+class ExplainabilityAnalyzer:
+    """SHAP and LIME explainability analysis with fallbacks"""
+    @staticmethod
+    def create_prediction_function(model, tokenizer, device):
+        """Create prediction function for LIME"""
+        def predict_proba(texts):
+            if isinstance(texts, str):
+                texts = [texts]
+            results = []
+            for text in texts:
+                try:
+                    inputs = tokenizer(text, return_tensors="pt", padding=True,
+                                     truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
+                    with torch.no_grad():
+                        outputs = model(**inputs)
+                        probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
+                    results.append(probs)
+                except Exception as e:
+                    # Return neutral probabilities on error
+                    if len(results) > 0:
+                        results.append(results[0])  # Use previous result
+                    else:
+                        results.append(np.array([0.33, 0.33, 0.34]))  # Neutral fallback
+            return np.array(results)
+        return predict_proba
+    @staticmethod
+    def analyze_with_lime(text: str, model, tokenizer, device, num_features: int = 10) -> Dict:
+        """Analyze text with LIME"""
+        if not LIME_AVAILABLE:
+            return {'method': 'LIME', 'error': 'LIME library not available'}
+        try:
+            # Create prediction function
+            predict_fn = ExplainabilityAnalyzer.create_prediction_function(model, tokenizer, device)
+            # Test prediction function first
+            test_probs = predict_fn([text])
+            if len(test_probs) == 0:
+                return {'method': 'LIME', 'error': 'Prediction function failed'}
+            # Determine class names based on model output
+            num_classes = len(test_probs[0])
+            if num_classes == 3:
+                class_names = ['Negative', 'Neutral', 'Positive']
+            else:
+                class_names = ['Negative', 'Positive']
+            # Initialize LIME explainer
+            explainer = LimeTextExplainer(
+                class_names=class_names,
+                feature_selection='auto',
+                split_expression=r'\W+',
+                bow=False
+            )
+            # Generate explanation
+            explanation = explainer.explain_instance(
+                text,
+                predict_fn,
+                num_features=min(num_features, len(text.split())),
+                num_samples=50  # Reduced for faster processing
+            )
+            # Extract feature importance
+            feature_importance = explanation.as_list()
+            return {
+                'method': 'LIME',
+                'feature_importance': feature_importance,
+                'class_names': class_names
             }
+        except Exception as e:
+            logger.error(f"LIME analysis failed: {e}")
+            return {'method': 'LIME', 'error': str(e)}
+    @staticmethod
+    def analyze_with_attention(text: str, model, tokenizer, device) -> Dict:
+        """Analyze text with attention weights - simplified version"""
+        try:
+            # Tokenize input
+            inputs = tokenizer(text, return_tensors="pt", padding=True,
+                             truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
+            # Get tokens for display
+            tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
+            # Simple attention simulation based on input importance
+            # This is a fallback when model doesn't support attention output
+            try:
+                with torch.no_grad():
+                    outputs = model(**inputs, output_attentions=True)
+                    if hasattr(outputs, 'attentions') and outputs.attentions is not None:
+                        attentions = outputs.attentions
+                        # Average attention across layers and heads
+                        avg_attention = torch.mean(torch.stack(attentions), dim=(0, 1, 2)).cpu().numpy()
+                    else:
+                        raise AttributeError("No attention outputs")
+            except:
+                # Fallback: simulate attention based on token position and type
+                avg_attention = np.random.uniform(0.1, 1.0, len(tokens))
+                # Give higher attention to non-special tokens
+                for i, token in enumerate(tokens):
+                    if token in ['[CLS]', '[SEP]', '<s>', '</s>', '<pad>']:
+                        avg_attention[i] *= 0.3
+            # Create attention weights for each token
+            attention_weights = []
+            for i, token in enumerate(tokens):
+                if i < len(avg_attention):
+                    # Clean token for display
+                    clean_token = token.replace('Ġ', '').replace('##', '')
+                    if clean_token.strip():
+                        attention_weights.append((clean_token, float(avg_attention[i])))
+            return {
+                'method': 'Attention',
+                'tokens': [t[0] for t in attention_weights],
+                'attention_weights': attention_weights
+            }
         except Exception as e:
+            logger.error(f"Attention analysis failed: {e}")
+            return {'method': 'Attention', 'error': str(e)}
+class AdvancedVisualizer:
+    """Visualizations for explainability analysis"""
+    @staticmethod
+    def create_lime_plot(lime_result: Dict, theme: str = 'default') -> go.Figure:
+        """Create LIME feature importance plot"""
+        if 'error' in lime_result:
+            fig = go.Figure()
+            fig.add_annotation(text=f"LIME Error: {lime_result['error']}",
+                             x=0.5, y=0.5, showarrow=False)
+            return fig
+        features, scores = zip(*lime_result['feature_importance'])
+        colors = ['red' if score < 0 else 'green' for score in scores]
+        fig = go.Figure(data=[
+            go.Bar(
+                y=features,
+                x=scores,
+                orientation='h',
+                marker_color=colors,
+                text=[f'{score:.3f}' for score in scores],
+                textposition='auto'
+            )
+        ])
+        fig.update_layout(
+            title="LIME Feature Importance",
+            xaxis_title="Importance Score",
+            yaxis_title="Features",
+            height=400,
+            showlegend=False
+        )
+        return fig
+    @staticmethod
+    def create_attention_plot(attention_result: Dict, theme: str = 'default') -> go.Figure:
+        """Create attention weights visualization"""
+        if 'error' in attention_result:
+            fig = go.Figure()
+            fig.add_annotation(
+                text=f"Attention Error: {attention_result['error']}",
+                x=0.5, y=0.5,
+                xref="paper", yref="paper",
+                showarrow=False,
+                font=dict(size=14)
+            )
+            fig.update_layout(height=400, title="Attention Analysis Error")
+            return fig
+        if not attention_result.get('attention_weights'):
+            fig = go.Figure()
+            fig.add_annotation(
+                text="No attention weights available",
+                x=0.5, y=0.5,
+                xref="paper", yref="paper",
+                showarrow=False
+            )
+            fig.update_layout(height=400, title="No Attention Data")
+            return fig
+        tokens, weights = zip(*attention_result['attention_weights'])
+        # Normalize weights for better visualization
+        weights = np.array(weights)
+        if weights.max() > weights.min():
+            normalized_weights = (weights - weights.min()) / (weights.max() - weights.min())
         else:
+            normalized_weights = weights
+        # Limit display to top 15 tokens for readability
+        if len(tokens) > 15:
+            # Get top 15 by attention weight
+            top_indices = np.argsort(weights)[-15:]
+            tokens = [tokens[i] for i in top_indices]
+            normalized_weights = normalized_weights[top_indices]
+        fig = go.Figure(data=[
+            go.Bar(
+                x=list(range(len(tokens))),
+                y=normalized_weights,
+                text=tokens,
+                textposition='outside',
+                marker_color=normalized_weights,
+                colorscale='Viridis',
+                hovertemplate='<b>%{text}</b><br>Weight: %{y:.3f}<extra></extra>'
+            )
+        ])
+        fig.update_layout(
+            title="Attention Weights (Top Tokens)",
+            xaxis_title="Token Position",
+            yaxis_title="Attention Weight (Normalized)",
+            height=400,
+            showlegend=False,
+            xaxis=dict(tickmode='array', tickvals=list(range(len(tokens))), ticktext=tokens)
+        )
+        return fig
+    """Enhanced visualizations with Plotly"""
+    @staticmethod
+    def create_sentiment_gauge(result: Dict, theme: str = 'default') -> go.Figure:
+        """Create an animated sentiment gauge"""
+        colors = config.THEMES[theme]
+        if result['has_neutral']:
+            # Three-way gauge
+            fig = go.Figure(go.Indicator(
+                mode = "gauge+number+delta",
+                value = result['pos_prob'] * 100,
+                domain = {'x': [0, 1], 'y': [0, 1]},
+                title = {'text': f"Sentiment: {result['sentiment']}"},
+                delta = {'reference': 50},
+                gauge = {
+                    'axis': {'range': [None, 100]},
+                    'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
+                    'steps': [
+                        {'range': [0, 33], 'color': colors['neg']},
+                        {'range': [33, 67], 'color': colors['neu']},
+                        {'range': [67, 100], 'color': colors['pos']}
+                    ],
+                    'threshold': {
+                        'line': {'color': "red", 'width': 4},
+                        'thickness': 0.75,
+                        'value': 90
+                    }
+                }
+            ))
+        else:
+            # Two-way gauge
+            fig = go.Figure(go.Indicator(
+                mode = "gauge+number",
+                value = result['confidence'] * 100,
+                domain = {'x': [0, 1], 'y': [0, 1]},
+                title = {'text': f"Confidence: {result['sentiment']}"},
+                gauge = {
+                    'axis': {'range': [None, 100]},
+                    'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
+                    'steps': [
+                        {'range': [0, 50], 'color': "lightgray"},
+                        {'range': [50, 100], 'color': "gray"}
+                    ]
+                }
+            ))
+        fig.update_layout(height=400, font={'size': 16})
+        return fig
     @staticmethod
+    def create_probability_bars(result: Dict, theme: str = 'default') -> go.Figure:
+        """Create probability bar chart"""
+        colors = config.THEMES[theme]
+        if result['has_neutral']:
+            labels = ['Negative', 'Neutral', 'Positive']
+            values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
+            bar_colors = [colors['neg'], colors['neu'], colors['pos']]
+        else:
+            labels = ['Negative', 'Positive']
+            values = [result['neg_prob'], result['pos_prob']]
+            bar_colors = [colors['neg'], colors['pos']]
         fig = go.Figure(data=[
+            go.Bar(x=labels, y=values, marker_color=bar_colors, text=[f'{v:.3f}' for v in values])
         ])
+        fig.update_traces(texttemplate='%{text}', textposition='outside')
         fig.update_layout(
             title="Sentiment Probabilities",
             yaxis_title="Probability",
+            height=400,
             showlegend=False
         )
         return fig
     @staticmethod
+    def create_batch_summary(results: List[Dict], theme: str = 'default') -> go.Figure:
+        """Create batch analysis summary"""
+        colors = config.THEMES[theme]
+        # Count sentiments
+        sentiments = [r['sentiment'] for r in results if 'sentiment' in r]
+        sentiment_counts = Counter(sentiments)
+        # Create pie chart
+        fig = go.Figure(data=[go.Pie(
+            labels=list(sentiment_counts.keys()),
+            values=list(sentiment_counts.values()),
+            marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
+            textinfo='label+percent',
+            hole=0.3
+        )])
         fig.update_layout(
+            title=f"Batch Analysis Summary ({len(results)} texts)",
+            height=400
         )
         return fig
     @staticmethod
+    def create_confidence_distribution(results: List[Dict]) -> go.Figure:
+        """Create confidence distribution plot"""
+        confidences = [r['confidence'] for r in results if 'confidence' in r and r['sentiment'] != 'Error']
+        if not confidences:
+            return go.Figure()
+        fig = go.Figure(data=[go.Histogram(
+            x=confidences,
+            nbinsx=20,
+            marker_color='skyblue',
+            opacity=0.7
+        )])
         fig.update_layout(
+            title="Confidence Distribution",
+            xaxis_title="Confidence Score",
+            yaxis_title="Frequency",
+            height=400
         )
         return fig
     @staticmethod
+    def create_history_dashboard(history: List[Dict]) -> go.Figure:
+        """Create comprehensive history dashboard"""
+        if len(history) < 2:
+            return go.Figure()
+        # Create subplots
         fig = make_subplots(
             rows=2, cols=2,
+            subplot_titles=['Sentiment Timeline', 'Confidence Distribution',
+                           'Language Distribution', 'Sentiment Summary'],
+            specs=[[{"secondary_y": False}, {"secondary_y": False}],
+                   [{"type": "pie"}, {"type": "bar"}]]
         )
+        # Extract data
+        indices = list(range(len(history)))
+        pos_probs = [item['pos_prob'] for item in history]
+        confidences = [item['confidence'] for item in history]
+        sentiments = [item['sentiment'] for item in history]
+        languages = [item.get('language', 'en') for item in history]
+        # Sentiment timeline
+        colors = ['#4CAF50' if s == 'Positive' else '#F44336' for s in sentiments]
         fig.add_trace(
+            go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
+                      marker=dict(color=colors, size=8),
+                      name='Positive Probability'),
             row=1, col=1
         )
+        # Confidence distribution
         fig.add_trace(
+            go.Histogram(x=confidences, nbinsx=10, name='Confidence'),
             row=1, col=2
         )
+        # Language distribution
+        lang_counts = Counter(languages)
         fig.add_trace(
+            go.Pie(labels=list(lang_counts.keys()), values=list(lang_counts.values()),
+                   name="Languages"),
             row=2, col=1
         )
+        # Sentiment summary
+        sent_counts = Counter(sentiments)
+        fig.add_trace(
+            go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
+                   marker_color=['#4CAF50' if k == 'Positive' else '#F44336' for k in sent_counts.keys()]),
+            row=2, col=2
         )
+        fig.update_layout(height=800, showlegend=False)
         return fig
+# Main application functions
+def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
+                       remove_punct: bool, remove_nums: bool):
+    """Enhanced single text analysis"""
+    try:
         if not text.strip():
+            return "Please enter text", None, None
+        # Map display names back to language codes
+        language_map = {
+            'Auto Detect': 'auto',
+            'English': 'en',
+            'Chinese': 'zh',
+            'Spanish': 'es',
+            'French': 'fr',
+            'German': 'de',
+            'Swedish': 'sv'
+        }
+        language_code = language_map.get(language, 'auto')
+        preprocessing_options = {
+            'clean_text': clean_text,
+            'remove_punctuation': remove_punct,
+            'remove_numbers': remove_nums
+        }
+        result = SentimentAnalyzer.analyze_text(text, language_code, preprocessing_options)
         # Add to history
+        history_entry = {
+            'text': text[:100] + '...' if len(text) > 100 else text,
             'full_text': text,
+            'sentiment': result['sentiment'],
+            'confidence': result['confidence'],
+            'pos_prob': result['pos_prob'],
+            'neg_prob': result['neg_prob'],
+            'neu_prob': result.get('neu_prob', 0),
+            'language': result['language'],
+            'timestamp': datetime.now().isoformat(),
+            'analysis_type': 'single'
+        }
+        history_manager.add_entry(history_entry)
         # Create visualizations
+        gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme)
+        bars_fig = PlotlyVisualizer.create_probability_bars(result, theme)
+        # Create info text
+        info_text = f"""
+**Analysis Results:**
+- **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
+- **Language:** {result['language'].upper()}
+- **Keywords:** {', '.join(result['keywords'])}
+- **Stats:** {result['word_count']} words, {result['char_count']} characters
+        """
+        return info_text, gauge_fig, bars_fig
+    except Exception as e:
+        logger.error(f"Analysis failed: {e}")
+        return f"Error: {str(e)}", None, None
+def analyze_batch_texts(batch_text: str, language: str, theme: str,
+                       clean_text: bool, remove_punct: bool, remove_nums: bool):
+    """Batch text analysis"""
+    try:
+        if not batch_text.strip():
+            return "Please enter texts (one per line)", None, None, None
+        # Parse batch input
+        texts = TextProcessor.parse_batch_input(batch_text)
+        if len(texts) > config.BATCH_SIZE_LIMIT:
+            return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
+        if not texts:
+            return "No valid texts found", None, None, None
+        # Map display names back to language codes
+        language_map = {
+            'Auto Detect': 'auto',
+            'English': 'en',
+            'Chinese': 'zh',
+            'Spanish': 'es',
+            'French': 'fr',
+            'German': 'de',
+            'Swedish': 'sv'
+        }
+        language_code = language_map.get(language, 'auto')
+        preprocessing_options = {
+            'clean_text': clean_text,
+            'remove_punctuation': remove_punct,
+            'remove_numbers': remove_nums
+        }
+        # Analyze all texts
+        results = SentimentAnalyzer.analyze_batch(texts, language_code, preprocessing_options)
         # Add to history
+        batch_entries = []
+        for i, (text, result) in enumerate(zip(texts, results)):
+            if 'error' not in result:
+                entry = {
+                    'text': text[:100] + '...' if len(text) > 100 else text,
+                    'full_text': text,
+                    'sentiment': result['sentiment'],
+                    'confidence': result['confidence'],
+                    'pos_prob': result['pos_prob'],
+                    'neg_prob': result['neg_prob'],
+                    'neu_prob': result.get('neu_prob', 0),
+                    'language': result['language'],
+                    'timestamp': datetime.now().isoformat(),
+                    'analysis_type': 'batch',
+                    'batch_index': i
+                }
+                batch_entries.append(entry)
+        history_manager.add_batch_entries(batch_entries)
+        # Create visualizations
+        summary_fig = PlotlyVisualizer.create_batch_summary(results, theme)
+        confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
+        # Create results table
+        df_data = []
+        for i, (text, result) in enumerate(zip(texts, results)):
+            if 'error' in result:
+                df_data.append({
+                    'Index': i+1,
+                    'Text': text[:50] + '...' if len(text) > 50 else text,
+                    'Sentiment': 'Error',
+                    'Confidence': 0.0,
+                    'Language': 'Unknown',
+                    'Error': result['error']
+                })
+            else:
+                df_data.append({
+                    'Index': i+1,
+                    'Text': text[:50] + '...' if len(text) > 50 else text,
+                    'Sentiment': result['sentiment'],
+                    'Confidence': f"{result['confidence']:.3f}",
+                    'Language': result['language'].upper(),
+                    'Keywords': ', '.join(result['keywords'][:3])
+                })
+        df = pd.DataFrame(df_data)
+        # Summary info
+        successful_results = [r for r in results if 'error' not in r]
+        error_count = len(results) - len(successful_results)
+        if successful_results:
+            sentiment_counts = Counter([r['sentiment'] for r in successful_results])
+            avg_confidence = np.mean([r['confidence'] for r in successful_results])
+            summary_text = f"""
+**Batch Analysis Summary:**
+- **Total Texts:** {len(texts)}
+- **Successful:** {len(successful_results)}
+- **Errors:** {error_count}
+- **Average Confidence:** {avg_confidence:.3f}
+- **Sentiments:** {dict(sentiment_counts)}
+            """
+        else:
+            summary_text = f"All {len(texts)} texts failed to analyze."
+        return summary_text, df, summary_fig, confidence_fig
+    except Exception as e:
+        logger.error(f"Batch analysis failed: {e}")
+        return f"Error: {str(e)}", None, None, None
+def analyze_advanced_text(text: str, language: str, theme: str, use_lime: bool,
+                         use_attention: bool, lime_features: int):
+    """Advanced analysis with SHAP and LIME explainability"""
+    try:
+        if not text.strip():
+            return "Please enter text", None, None, None, None
+        # Map display names back to language codes
+        language_map = {
+            'Auto Detect': 'auto',
+            'English': 'en',
+            'Chinese': 'zh',
+            'Spanish': 'es',
+            'French': 'fr',
+            'German': 'de',
+            'Swedish': 'sv'
+        }
+        language_code = language_map.get(language, 'auto')
+        # Basic sentiment analysis first
+        result = SentimentAnalyzer.analyze_text(text, language_code)
+        # Create basic visualizations first
+        gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme)
+        bars_fig = PlotlyVisualizer.create_probability_bars(result, theme)
+        # Initialize explainability results
+        lime_result = None
+        attention_result = None
+        lime_plot = None
+        attention_plot = None
+        # Get model for explainability analysis
+        try:
+            model, tokenizer = model_manager.get_model(language_code)
+            # LIME Analysis
+            if use_lime:
+                lime_result = ExplainabilityAnalyzer.analyze_with_lime(
+                    text, model, tokenizer, model_manager.device, lime_features
+                )
+                lime_plot = AdvancedVisualizer.create_lime_plot(lime_result, theme)
+            else:
+                # Create empty plot
+                lime_plot = go.Figure()
+                lime_plot.add_annotation(text="LIME analysis disabled", x=0.5, y=0.5,
+                                       xref="paper", yref="paper", showarrow=False)
+                lime_plot.update_layout(height=400, title="LIME Analysis (Disabled)")
+            # Attention Analysis
+            if use_attention:
+                attention_result = ExplainabilityAnalyzer.analyze_with_attention(
+                    text, model, tokenizer, model_manager.device
+                )
+                attention_plot = AdvancedVisualizer.create_attention_plot(attention_result, theme)
+            else:
+                # Create empty plot
+                attention_plot = go.Figure()
+                attention_plot.add_annotation(text="Attention analysis disabled", x=0.5, y=0.5,
+                                            xref="paper", yref="paper", showarrow=False)
+                attention_plot.update_layout(height=400, title="Attention Analysis (Disabled)")
+        except Exception as e:
+            logger.error(f"Explainability analysis failed: {e}")
+            # Create error plots
+            lime_plot = go.Figure()
+            lime_plot.add_annotation(text=f"Analysis Error: {str(e)}", x=0.5, y=0.5,
+                                   xref="paper", yref="paper", showarrow=False)
+            lime_plot.update_layout(height=400, title="Analysis Error")
+            attention_plot = go.Figure()
+            attention_plot.add_annotation(text=f"Analysis Error: {str(e)}", x=0.5, y=0.5,
+                                        xref="paper", yref="paper", showarrow=False)
+            attention_plot.update_layout(height=400, title="Analysis Error")
+        # Add to history
+        history_entry = {
+            'text': text[:100] + '...' if len(text) > 100 else text,
+            'full_text': text,
+            'sentiment': result['sentiment'],
+            'confidence': result['confidence'],
+            'pos_prob': result['pos_prob'],
+            'neg_prob': result['neg_prob'],
+            'neu_prob': result.get('neu_prob', 0),
+            'language': result['language'],
+            'timestamp': datetime.now().isoformat(),
+            'analysis_type': 'advanced',
+            'explainability_used': use_lime or use_attention
+        }
+        history_manager.add_entry(history_entry)
+        # Create detailed info text
+        info_text = f"""
+**Advanced Analysis Results:**
+- **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
+- **Language:** {result['language'].upper()}
+- **Text Statistics:**
+  - Words: {result['word_count']}
+  - Characters: {result['char_count']}
+  - Average word length: {result['char_count']/max(result['word_count'], 1):.1f}
+- **Keywords:** {', '.join(result['keywords'])}
+**Explainability Analysis:**
+        """
+        if use_lime:
+            if lime_result and 'error' not in lime_result:
+                info_text += f"\n- **LIME:** ✅ Analyzed top {lime_features} features"
+            else:
+                error_msg = lime_result.get('error', 'Unknown error') if lime_result else 'Not available'
+                info_text += f"\n- **LIME:** ❌ {error_msg}"
+        else:
+            info_text += f"\n- **LIME:** ⏸️ Disabled"
+        if use_attention:
+            if attention_result and 'error' not in attention_result:
+                info_text += f"\n- **Attention:** ✅ Token-level attention weights computed"
+            else:
+                error_msg = attention_result.get('error', 'Unknown error') if attention_result else 'Not available'
+                info_text += f"\n- **Attention:** ❌ {error_msg}"
+        else:
+            info_text += f"\n- **Attention:** ⏸️ Disabled"
+        return info_text, gauge_fig, bars_fig, lime_plot, attention_plot
+    except Exception as e:
+        logger.error(f"Advanced analysis failed: {e}")
+        # Return basic empty plots on complete failure
+        empty_fig = go.Figure()
+        empty_fig.add_annotation(text=f"Analysis failed: {str(e)}", x=0.5, y=0.5,
+                               xref="paper", yref="paper", showarrow=False)
+        empty_fig.update_layout(height=400)
+        return f"Error: {str(e)}", empty_fig, empty_fig, empty_fig, empty_fig
+def get_history_stats():
+    """Get enhanced history statistics"""
+    stats = history_manager.get_stats()
+    if not stats:
+        return "No analysis history available"
+    return f"""
+**Comprehensive History Statistics:**
+**Analysis Counts:**
+- Total Analyses: {stats['total_analyses']}
+- Positive: {stats['positive_count']}
+- Negative: {stats['negative_count']}
+- Neutral: {stats['neutral_count']}
+**Confidence Metrics:**
+- Average Confidence: {stats['avg_confidence']:.3f}
+- Highest Confidence: {stats['max_confidence']:.3f}
+- Lowest Confidence: {stats['min_confidence']:.3f}
+**Language Statistics:**
+- Languages Detected: {stats['languages_detected']}
+- Most Common Language: {stats['most_common_language'].upper()}
+**Text Statistics:**
+- Average Text Length: {stats['avg_text_length']:.1f} characters
+    """
+def filter_history_display(sentiment_filter: str, language_filter: str, min_confidence: float):
+    """Display filtered history"""
+    # Convert filters
+    sentiment = sentiment_filter if sentiment_filter != "All" else None
+    language = language_filter.lower() if language_filter != "All" else None
+    filtered_history = history_manager.filter_history(
+        sentiment=sentiment,
+        language=language,
+        min_confidence=min_confidence if min_confidence > 0 else None
+    )
+    if not filtered_history:
+        return "No entries match the filter criteria", None
+    # Create DataFrame for display
+    df_data = []
+    for entry in filtered_history[-20:]:  # Show last 20 entries
+        df_data.append({
+            'Timestamp': entry['timestamp'][:16],  # YYYY-MM-DD HH:MM
+            'Text': entry['text'],
+            'Sentiment': entry['sentiment'],
+            'Confidence': f"{entry['confidence']:.3f}",
+            'Language': entry['language'].upper(),
+            'Type': entry.get('analysis_type', 'single')
+        })
+    df = pd.DataFrame(df_data)
+    summary = f"""
+**Filtered Results:**
+- Found {len(filtered_history)} entries matching criteria
+- Showing most recent {min(20, len(filtered_history))} entries
+    """
+    return summary, df
+def plot_history_dashboard():
+    """Create history dashboard"""
+    history = history_manager.get_history()
+    if len(history) < 2:
+        return None, "Need at least 2 analyses for dashboard"
+    fig = PlotlyVisualizer.create_history_dashboard(history)
+    return fig, f"Dashboard showing {len(history)} analyses"
+def export_history_csv():
+    """Export history to CSV"""
+    history = history_manager.get_history()
+    if not history:
+        return None, "No history to export"
+    try:
+        df = pd.DataFrame(history)
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w')
+        df.to_csv(temp_file.name, index=False)
+        return temp_file.name, f"Exported {len(history)} entries to CSV"
+    except Exception as e:
+        return None, f"Export failed: {str(e)}"
+def export_history_excel():
+    """Export history to Excel"""
+    history = history_manager.get_history()
+    if not history:
+        return None, "No history to export"
+    try:
+        df = pd.DataFrame(history)
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
+        df.to_excel(temp_file.name, index=False)
+        return temp_file.name, f"Exported {len(history)} entries to Excel"
+    except Exception as e:
+        return None, f"Export failed: {str(e)}"
+def clear_all_history():
+    """Clear analysis history"""
+    count = history_manager.clear()
+    return f"Cleared {count} entries from history"
+def get_recent_analyses():
+    """Get recent analysis summary"""
+    recent = history_manager.get_recent_history(10)
+    if not recent:
+        return "No recent analyses available"
+    summary_text = "**Recent Analyses (Last 10):**\n\n"
+    for i, entry in enumerate(recent, 1):
+        summary_text += f"{i}. **{entry['sentiment']}** ({entry['confidence']:.3f}) - {entry['text']}\n"
+    return summary_text
+# Sample data
+SAMPLE_TEXTS = [
+    # Auto Detect
+    ["The film had its moments, but overall it felt a bit too long and lacked emotional depth."],
+    # English
+    ["I was completely blown away by the movie — the performances were raw and powerful, and the story stayed with me long after the credits rolled."],
+    # Chinese
+    ["这部电影节奏拖沓，剧情老套，完全没有让我产生任何共鸣，是一次失望的观影体验。"],
+    # Spanish
+    ["Una obra maestra del cine contemporáneo, con actuaciones sobresalientes, un guion bien escrito y una dirección impecable."],
+    # French
+    ["Je m'attendais à beaucoup mieux. Le scénario était confus, les dialogues ennuyeux, et je me suis presque endormi au milieu du film."],
+    # German
+    ["Der Film war ein emotionales Erlebnis mit großartigen Bildern, einem mitreißenden Soundtrack und einer Geschichte, die zum Nachdenken anregt."],
+    # Swedish
+    ["Filmen var en besvikelse – tråkig handling, överdrivet skådespeleri och ett slut som inte gav något avslut alls."]
+]
+BATCH_SAMPLE = """I love this product! It works perfectly.
+The service was terrible and slow.
+Not sure if I like it or not.
+Amazing quality and fast delivery!
+Could be better, but it's okay."""
+# Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Multilingual Sentiment Analyzer") as demo:
+    gr.Markdown("# 🎭 Advanced Multilingual Sentiment Analyzer")
+    gr.Markdown("Comprehensive sentiment analysis with batch processing, advanced analytics, and multilingual support")
+    with gr.Tab("📝 Single Analysis"):
+        with gr.Row():
+            with gr.Column(scale=2):
+                text_input = gr.Textbox(
+                    label="Text to Analyze",
+                    placeholder="Enter your text here... (supports multiple languages)",
+                    lines=4
+                )
+                with gr.Row():
+                    language_select = gr.Dropdown(
+                        choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
+                        value='Auto Detect',
+                        label="Language"
                     )
+                    theme_select = gr.Dropdown(
+                        choices=list(config.THEMES.keys()),
+                        value='default',
+                        label="Theme"
                     )
+                with gr.Row():
+                    clean_text = gr.Checkbox(label="Clean Text", value=False)
+                    remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
+                    remove_nums = gr.Checkbox(label="Remove Numbers", value=False)
+                analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")
+                gr.Examples(
+                    examples=SAMPLE_TEXTS,
+                    inputs=text_input,
+                    label="Sample Texts (Multiple Languages)"
+                )
+            with gr.Column(scale=1):
+                result_info = gr.Markdown("Enter text and click Analyze")
+        with gr.Row():
+            gauge_plot = gr.Plot(label="Sentiment Gauge")
+            bars_plot = gr.Plot(label="Probability Distribution")
+    with gr.Tab("🔬 Advanced Analysis"):
+        with gr.Row():
+            with gr.Column(scale=2):
+                advanced_input = gr.Textbox(
+                    label="Text for Advanced Analysis",
+                    placeholder="Enter text for explainability analysis...",
+                    lines=4
+                )
+                with gr.Row():
+                    advanced_language = gr.Dropdown(
+                        choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
+                        value='Auto Detect',
+                        label="Language"
+                    )
+                    advanced_theme = gr.Dropdown(
+                        choices=list(config.THEMES.keys()),
+                        value='default',
+                        label="Theme"
                     )
+                gr.Markdown("### 🔍 Explainability Options")
+                with gr.Row():
+                    use_lime = gr.Checkbox(label="Use LIME Analysis", value=True)
+                    use_attention = gr.Checkbox(label="Use Attention Weights", value=True)
+                lime_features = gr.Slider(
+                    minimum=5,
+                    maximum=20,
+                    value=10,
+                    step=1,
+                    label="LIME Features Count"
+                )
+                advanced_analyze_btn = gr.Button("🔬 Advanced Analyze", variant="primary", size="lg")
+            with gr.Column(scale=1):
+                advanced_result_info = gr.Markdown("Configure explainability settings and click Advanced Analyze")
+        with gr.Row():
+            advanced_gauge_plot = gr.Plot(label="Sentiment Gauge")
+            advanced_bars_plot = gr.Plot(label="Probability Distribution")
+        with gr.Row():
+            lime_plot = gr.Plot(label="LIME Feature Importance")
+            attention_plot = gr.Plot(label="Attention Weights")
+    with gr.Tab("📊 Batch Analysis"):
+        with gr.Row():
+            with gr.Column(scale=2):
+                batch_input = gr.Textbox(
+                    label="Batch Text Input (One text per line)",
+                    placeholder="Enter multiple texts, one per line...",
+                    lines=8
+                )
+                with gr.Row():
+                    batch_language = gr.Dropdown(
+                        choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
+                        value='Auto Detect',
+                        label="Language"
+                    )
+                    batch_theme = gr.Dropdown(
+                        choices=list(config.THEMES.keys()),
+                        value='default',
+                        label="Theme"
+                    )
+                with gr.Row():
+                    batch_clean = gr.Checkbox(label="Clean Text", value=False)
+                    batch_remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
+                    batch_remove_nums = gr.Checkbox(label="Remove Numbers", value=False)
+                batch_analyze_btn = gr.Button("🔍 Analyze Batch", variant="primary", size="lg")
+                gr.Examples(
+                    examples=[[BATCH_SAMPLE]],
+                    inputs=batch_input,
+                    label="Sample Batch Input"
+                )
+            with gr.Column(scale=1):
+                batch_summary = gr.Markdown("Enter texts and click Analyze Batch")
+        with gr.Row():
+            batch_results_table = gr.DataFrame(
+                label="Detailed Results",
+                interactive=False
+            )
+        with gr.Row():
+            batch_summary_plot = gr.Plot(label="Sentiment Summary")
+            batch_confidence_plot = gr.Plot(label="Confidence Distribution")
+    with gr.Tab("📈 History & Analytics"):
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### 📊 Statistics")
+                stats_btn = gr.Button("📈 Get Statistics")
+                recent_btn = gr.Button("🕒 Recent Analyses")
+                stats_output = gr.Markdown("Click 'Get Statistics' to view analysis history")
+            with gr.Column():
+                gr.Markdown("### 🔍 Filter History")
+                with gr.Row():
+                    sentiment_filter = gr.Dropdown(
+                        choices=["All", "Positive", "Negative", "Neutral"],
+                        value="All",
+                        label="Filter by Sentiment"
+                    )
+                    language_filter = gr.Dropdown(
+                        choices=["All", "English", "Chinese", "Spanish", "French", "German", "Swedish"],
+                        value="All",
+                        label="Filter by Language"
+                    )
+                confidence_filter = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.0,
+                    step=0.1,
+                    label="Minimum Confidence"
+                )
+                filter_btn = gr.Button("🔍 Filter History")
+        with gr.Row():
+            dashboard_btn = gr.Button("📊 View Dashboard")
+            clear_btn = gr.Button("🗑️ Clear History", variant="stop")
+        with gr.Row():
+            export_csv_btn = gr.Button("📄 Export CSV")
+            export_excel_btn = gr.Button("📊 Export Excel")
+        dashboard_plot = gr.Plot(label="Analytics Dashboard")
+        with gr.Row():
+            filtered_results = gr.Markdown("Use filters to view specific entries")
+            filtered_table = gr.DataFrame(label="Filtered History", interactive=False)
+        csv_file = gr.File(label="Download CSV Report")
+        excel_file = gr.File(label="Download Excel Report")
+        history_status = gr.Textbox(label="Status", interactive=False)
+    # Event handlers
+    # Single Analysis
+    analyze_btn.click(
+        analyze_single_text,
+        inputs=[text_input, language_select, theme_select, clean_text, remove_punct, remove_nums],
+        outputs=[result_info, gauge_plot, bars_plot]
+    )
+    # Batch Analysis
+    batch_analyze_btn.click(
+        analyze_batch_texts,
+        inputs=[batch_input, batch_language, batch_theme, batch_clean, batch_remove_punct, batch_remove_nums],
+        outputs=[batch_summary, batch_results_table, batch_summary_plot, batch_confidence_plot]
+    )
+    # Advanced Analysis
+    advanced_analyze_btn.click(
+        analyze_advanced_text,
+        inputs=[advanced_input, advanced_language, advanced_theme, use_lime, use_attention, lime_features],
+        outputs=[advanced_result_info, advanced_gauge_plot, advanced_bars_plot, lime_plot, attention_plot]
+    )
+    # History & Analytics
+    stats_btn.click(
+        get_history_stats,
+        outputs=stats_output
+    )
+    recent_btn.click(
+        get_recent_analyses,
+        outputs=stats_output
+    )
+    filter_btn.click(
+        filter_history_display,
+        inputs=[sentiment_filter, language_filter, confidence_filter],
+        outputs=[filtered_results, filtered_table]
+    )
+    dashboard_btn.click(
+        plot_history_dashboard,
+        outputs=[dashboard_plot, history_status]
+    )
+    export_csv_btn.click(
+        export_history_csv,
+        outputs=[csv_file, history_status]
+    )
+    export_excel_btn.click(
+        export_history_excel,
+        outputs=[excel_file, history_status]
+    )
+    clear_btn.click(
+        clear_all_history,
+        outputs=history_status
+    )
 if __name__ == "__main__":
+    demo.launch(share=True)