Spaces:

entropy25
/

multilingual-sentiment-analyzer

Sleeping

App Files Files Community

entropy25 commited on 20 days ago

Commit

219103c

verified ·

1 Parent(s): 1ad3bd1

Update app.py

Browse files

Files changed (1) hide show

app.py +291 -480

app.py CHANGED Viewed

@@ -7,6 +7,14 @@ from plotly.subplots import make_subplots
 import numpy as np
 from wordcloud import WordCloud
 from collections import Counter, defaultdict
 from dataclasses import dataclass
 from typing import List, Dict, Optional, Tuple, Any, Callable
 from contextlib import contextmanager
@@ -16,7 +24,18 @@ import langdetect
 import pandas as pd
 import gc
 # Configuration
     CACHE_SIZE: int = 128
     BATCH_PROCESSING_SIZE: int = 8
@@ -35,7 +54,6 @@ import gc
         'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
         'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
         'zh': "uer/roberta-base-finetuned-dianping-chinese"
     }
     # Color themes for Plotly
@@ -63,19 +81,28 @@ except:
 # Decorators and Context Managers
 def handle_errors(default_return=None):
     """Centralized error handling decorator"""
     return decorator
 @contextmanager
 def memory_cleanup():
     """Context manager for memory cleanup"""
     try:
         yield
     finally:
         gc.collect()
 class ThemeContext:
         self.theme = theme
         self.colors = config.THEMES.get(theme, config.THEMES['default'])
@@ -83,9 +110,6 @@ class ThemeContext:
 class ModelManager:
     """Multi-language model manager with lazy loading"""
     _instance = None
     def __new__(cls):
         if cls._instance is None:
@@ -103,16 +127,6 @@ class ModelManager:
     def _load_default_models(self):
         """Load default models"""
         try:
             # Load multilingual model as default
             model_name = config.MODELS['multilingual']
@@ -241,6 +255,7 @@ class HistoryManager:
     def clear(self) -> int:
         count = len(self._history)
         self._history.clear()
     def size(self) -> int:
         return len(self._history)
@@ -266,93 +281,16 @@ class HistoryManager:
             'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en'
         }
-# Core Sentiment Analysis Engine
 class SentimentEngine:
     """Multi-language sentiment analysis engine"""
     def __init__(self):
         self.model_manager = ModelManager()
-    def extract_attention_keywords(self, text: str, language: str = 'auto', top_k: int = 10) -> List[Tuple[str, float]]:
-        """Extract keywords using attention weights"""
-        try:
-            if language == 'auto':
-                language = self.model_manager.detect_language(text)
-            model, tokenizer = self.model_manager.get_model(language)
-            inputs = tokenizer(
-                text, return_tensors="pt", padding=True,
-                truncation=True, max_length=config.MAX_TEXT_LENGTH
-            ).to(self.model_manager.device)
-            with torch.no_grad():
-                outputs = model(**inputs, output_attentions=True)
-                if hasattr(outputs, 'attentions') and outputs.attentions:
-                    # Use attention weights
-                    attention = outputs.attentions[-1]
-                    avg_attention = attention.mean(dim=1)[0, 0, :]
-                    tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
-                    attention_scores = avg_attention.cpu().numpy()
-                    # Process tokens and scores
-                    word_scores = {}
-                    current_word = ""
-                    current_score = 0.0
-                    for token, score in zip(tokens, attention_scores):
-                        if token in ['[CLS]', '[SEP]', '[PAD]', '<s>', '</s>']:
-                            continue
-                        if token.startswith('##') or token.startswith('▁'):
-                            current_word += token.replace('##', '').replace('▁', '')
-                            current_score = max(current_score, score)
-                        else:
-                            if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
-                                word_scores[current_word.lower()] = current_score
-                            current_word = token
-                            current_score = score
-                    if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
-                        word_scores[current_word.lower()] = current_score
-                    # Filter and sort
-                    filtered_words = {
-                        word: score for word, score in word_scores.items()
-                        if word not in STOP_WORDS and len(word) >= config.MIN_WORD_LENGTH
-                    }
-                    sorted_words = sorted(filtered_words.items(), key=lambda x: x[1], reverse=True)
-                    return sorted_words[:top_k]
-        except Exception as e:
-            logger.error(f"Attention keyword extraction failed: {e}")
-        # Fallback to simple keyword extraction
-        keywords = TextProcessor.extract_keywords(text, top_k)
-        return [(word, 0.1) for word in keywords]
     @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'keywords': []})
     def analyze_single(self, text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
-        """Analyze single text with enhanced features"""
         if not text.strip():
             raise ValueError("Empty text provided")
@@ -378,7 +316,6 @@ class SentimentEngine:
         # Tokenize and analyze
         inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
                          truncation=True, max_length=config.MAX_TEXT_LENGTH).to(self.model_manager.device)
         with torch.no_grad():
             outputs = model(**inputs)
@@ -413,13 +350,14 @@ class SentimentEngine:
                 'has_neutral': False
             }
-        # Extract keywords
-        keywords = self.extract_attention_keywords(text, detected_lang)
         # Add metadata
         result.update({
             'language': detected_lang,
-            'keywords': keywords,
             'word_count': len(text.split()),
             'char_count': len(text)
         })
@@ -433,6 +371,12 @@ class SentimentEngine:
         if len(texts) > config.BATCH_SIZE_LIMIT:
             texts = texts[:config.BATCH_SIZE_LIMIT]
             if progress_callback:
                 progress_callback((i + len(batch)) / len(texts))
@@ -452,17 +396,191 @@ class SentimentEngine:
                         'text': text[:100] + '...' if len(text) > 100 else text,
                         'full_text': text
                     })
         return results
-# Advanced Plotly Visualization System
 class PlotlyVisualizer:
     """Enhanced Plotly visualizations"""
@@ -547,31 +665,12 @@ class PlotlyVisualizer:
     @staticmethod
     @handle_errors(default_return=None)
     def create_keyword_chart(keywords: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> go.Figure:
-        """Create keyword importance chart"""
         if not keywords:
             fig = go.Figure()
             fig.add_annotation(text="No keywords extracted",
                              xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
             fig.update_layout(height=400, title="Keywords")
             return fig
         words = [word for word, score in keywords]
@@ -592,7 +691,7 @@ class PlotlyVisualizer:
         fig.update_layout(
             title=f"Top Keywords ({sentiment})",
-            xaxis_title="Attention Weight",
             yaxis_title="Keywords",
             height=400,
             showlegend=False
@@ -625,14 +724,6 @@ class PlotlyVisualizer:
         )
         return fig
     @staticmethod
     @handle_errors(default_return=None)
@@ -730,6 +821,8 @@ class DataHandler:
         if not data:
             return None, "No data to export"
         if format_type == 'csv':
             writer = csv.writer(temp_file)
@@ -751,10 +844,10 @@ class DataHandler:
                 ])
         elif format_type == 'json':
             json.dump(data, temp_file, indent=2, ensure_ascii=False)
         temp_file.close()
         return temp_file.name, f"Exported {len(data)} entries"
     @staticmethod
     @handle_errors(default_return="")
     def process_file(file) -> str:
@@ -765,7 +858,6 @@ class DataHandler:
         content = file.read().decode('utf-8')
         if file.name.endswith('.csv'):
             csv_file = io.StringIO(content)
             reader = csv.reader(csv_file)
             try:
@@ -782,6 +874,7 @@ class DataHandler:
                 texts = []
                 for line in lines:
                     if line.strip():
                         if text:
                             texts.append(text)
                 return '\n'.join(texts)
@@ -794,6 +887,7 @@ class SentimentApp:
     def __init__(self):
         self.engine = SentimentEngine()
         self.history = HistoryManager()
         self.data_handler = DataHandler()
@@ -805,12 +899,11 @@ class SentimentApp:
             ["Esta película fue increíble, me encantó la cinematografía."],  # Spanish
             ["Ce film était magnifique, j'ai adoré la réalisation."],  # French
         ]
     @handle_errors(default_return=("Please enter text", None, None, None))
     def analyze_single(self, text: str, language: str, theme: str, clean_text: bool,
                        remove_punct: bool, remove_nums: bool):
-        """Single text analysis with enhanced visualizations"""
         if not text.strip():
             return "Please enter text", None, None, None
@@ -966,6 +1059,23 @@ class SentimentApp:
             return summary_text, df, summary_fig, confidence_fig
     @handle_errors(default_return=(None, "No history available"))
     def plot_history(self, theme: str = 'default'):
         """Plot comprehensive history analysis"""
@@ -973,9 +1083,7 @@ class SentimentApp:
         if len(history) < 2:
             return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
         theme_ctx = ThemeContext(theme)
         with memory_cleanup():
             fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx)
@@ -1012,9 +1120,9 @@ class SentimentApp:
 - **Languages Detected:** {stats['languages_detected']}
         """
-# Gradio Interface
 def create_interface():
-    """Create comprehensive Gradio interface"""
     app = SentimentApp()
     with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
@@ -1063,351 +1171,42 @@ def create_interface():
                 probability_plot = gr.Plot(label="Probability Distribution")
             with gr.Row():
-                keyword_plot = gr.Plot(label="Key Contributing Words")
-        with gr.Tab("Batch Analysis"):
-            with gr.Row():
-                with gr.Column():
-                    file_upload = gr.File(
-                        label="Upload File (CSV/TXT)",
-                        file_types=[".csv", ".txt"]
-                    )
-                    batch_input = gr.Textbox(
-                        label="Batch Input (one text per line)",
-                        placeholder="Enter multiple texts, one per line...",
-                        lines=10
-                    )
-                    with gr.Row():
-                        batch_language = gr.Dropdown(
-                            choices=list(config.SUPPORTED_LANGUAGES.values()),
-                            value="Auto Detect",
-                            label="Language"
-                        )
-                        batch_theme = gr.Dropdown(
-                            choices=list(config.THEMES.keys()),
-                            value="default",
-                            label="Theme"
-                        )
-                    with gr.Row():
-                        batch_clean_cb = gr.Checkbox(label="Clean Text", value=False)
-                        batch_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
-                        batch_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
-                    with gr.Row():
-                        load_file_btn = gr.Button("Load File")
-                        analyze_batch_btn = gr.Button("Analyze Batch", variant="primary")
-                with gr.Column():
-                    batch_summary = gr.Textbox(label="Batch Summary", lines=8)
-                    batch_results_df = gr.Dataframe(
-                        label="Detailed Results",
-                        headers=["Index", "Text", "Sentiment", "Confidence", "Language", "Keywords"],
-                        datatype=["number", "str", "str", "str", "str", "str"]
-                    )
-            with gr.Row():
-                batch_plot = gr.Plot(label="Batch Analysis Summary")
-                confidence_dist_plot = gr.Plot(label="Confidence Distribution")
-        with gr.Tab("History & Analytics"):
             with gr.Row():
                 with gr.Column():
-                    with gr.Row():
-                        refresh_history_btn = gr.Button("Refresh History")
-                        clear_history_btn = gr.Button("Clear History", variant="stop")
-                        status_btn = gr.Button("Get Status")
-                    history_theme = gr.Dropdown(
-                        choices=list(config.THEMES.keys()),
-                        value="default",
-                        label="Dashboard Theme"
                     )
-                    with gr.Row():
-                        export_csv_btn = gr.Button("Export CSV")
-                        export_json_btn = gr.Button("Export JSON")
-                with gr.Column():
-                    history_status = gr.Textbox(label="History Status", lines=8)
-            history_dashboard = gr.Plot(label="History Analytics Dashboard")
-            with gr.Row():
-                csv_download = gr.File(label="CSV Download", visible=True)
-                json_download = gr.File(label="JSON Download", visible=True)
-        # Event Handlers
-        analyze_btn.click(
-            app.analyze_single,
-            inputs=[text_input, language_selector, theme_selector,
-                   clean_text_cb, remove_punct_cb, remove_nums_cb],
-            outputs=[result_output, gauge_plot, probability_plot, keyword_plot]
-        )
-        load_file_btn.click(
-            app.data_handler.process_file,
-            inputs=file_upload,
-            outputs=batch_input
-        )
-        analyze_batch_btn.click(
-            app.analyze_batch,
-            inputs=[batch_input, batch_language, batch_theme,
-                   batch_clean_cb, batch_punct_cb, batch_nums_cb],
-            outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot]
-        )
-        refresh_history_btn.click(
-            app.plot_history,
-            inputs=history_theme,
-            outputs=[history_dashboard, history_status]
-        )
-        clear_history_btn.click(
-            lambda: f"Cleared {app.history.clear()} entries",
-            outputs=history_status
-        )
-        status_btn.click(
-            app.get_history_status,
-            outputs=history_status
-        )
-        export_csv_btn.click(
-            lambda: app.data_handler.export_data(app.history.get_all(), 'csv'),
-            outputs=[csv_download, history_status]
-        )
-        export_json_btn.click(
-            lambda: app.data_handler.export_data(app.history.get_all(), 'json'),
-            outputs=[json_download, history_status]
-        )
-    return demo
-# Application Entry Point
-if __name__ == "__main__":
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-    )
-    try:
-        demo = create_interface()
-        demo.launch(
-            share=True,
-            server_name="0.0.0.0",
-            server_port=7860,
-            show_error=True
-        )
-    except Exception as e:
-        logger.error(f"Failed to launch application: {e}")
-        raise
-    @handle_errors(default_return=("Please enter texts", None, None, None))
-    def analyze_batch(self, batch_text: str, language: str, theme: str,
-                     clean_text: bool, remove_punct: bool, remove_nums: bool):
-        """Enhanced batch analysis"""
-        if not batch_text.strip():
-            return "Please enter texts (one per line)", None, None, None
-        # Parse batch input
-        texts = TextProcessor.parse_batch_input(batch_text)
-        if len(texts) > config.BATCH_SIZE_LIMIT:
-            return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
-        if not texts:
-            return "No valid texts found", None, None, None
-        # Map display names to language codes
-        language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
-        language_code = language_map.get(language, 'auto')
-        preprocessing_options = {
-            'clean_text': clean_text,
-            'remove_punctuation': remove_punct,
-            'remove_numbers': remove_nums
-        }
-        with memory_cleanup():
-            results = self.engine.analyze_batch(texts, language_code, preprocessing_options)
-            # Add to history
-            batch_entries = []
-            for result in results:
-                if 'error' not in result:
-                    entry = {
-                        'text': result['text'],
-                        'full_text': result['full_text'],
-                        'sentiment': result['sentiment'],
-                        'confidence': result['confidence'],
-                        'pos_prob': result.get('pos_prob', 0),
-                        'neg_prob': result.get('neg_prob', 0),
-                        'neu_prob': result.get('neu_prob', 0),
-                        'language': result['language'],
-                        'keywords': result['keywords'],
-                        'word_count': result['word_count'],
-                        'analysis_type': 'batch',
-                        'batch_index': result['batch_index']
-                    }
-                    batch_entries.append(entry)
-            self.history.add_batch(batch_entries)
-            # Create visualizations
-            theme_ctx = ThemeContext(theme)
-            summary_fig = PlotlyVisualizer.create_batch_summary(results, theme_ctx)
-            confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
-            # Create results DataFrame
-            df_data = []
-            for result in results:
-                if 'error' in result:
-                    df_data.append({
-                        'Index': result['batch_index'] + 1,
-                        'Text': result['text'],
-                        'Sentiment': 'Error',
-                        'Confidence': 0.0,
-                        'Language': 'Unknown',
-                        'Error': result['error']
-                    })
-                else:
-                    keywords_str = ', '.join([word for word, _ in result['keywords'][:3]])
-                    df_data.append({
-                        'Index': result['batch_index'] + 1,
-                        'Text': result['text'],
-                        'Sentiment': result['sentiment'],
-                        'Confidence': f"{result['confidence']:.3f}",
-                        'Language': result['language'].upper(),
-                        'Keywords': keywords_str
-                    })
-            df = pd.DataFrame(df_data)
-            # Create summary text
-            successful_results = [r for r in results if 'error' not in r]
-            error_count = len(results) - len(successful_results)
-            if successful_results:
-                sentiment_counts = Counter([r['sentiment'] for r in successful_results])
-                avg_confidence = np.mean([r['confidence'] for r in successful_results])
-                languages = Counter([r['language'] for r in successful_results])
-                summary_text = f"""
-**Batch Analysis Summary:**
-- **Total Texts:** {len(texts)}
-- **Successful:** {len(successful_results)}
-- **Errors:** {error_count}
-- **Average Confidence:** {avg_confidence:.3f}
-- **Sentiments:** {dict(sentiment_counts)}
-- **Languages Detected:** {dict(languages)}
-                """
-            else:
-                summary_text = f"All {len(texts)} texts failed to analyze."
-            return summary_text, df, summary_fig, confidence_fig
-    @handle_errors(default_return=(None, "No history available"))
-    def plot_history(self, theme: str = 'default'):
-        """Plot comprehensive history analysis"""
-        history = self.history.get_all()
-        if len(history) < 2:
-            return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
-        theme_ctx = ThemeContext(theme)
-        with memory_cleanup():
-            fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx)
-            stats = self.history.get_stats()
-            stats_text = f"""
-**History Statistics:**
-- **Total Analyses:** {stats.get('total_analyses', 0)}
-- **Positive:** {stats.get('positive_count', 0)}
-- **Negative:** {stats.get('negative_count', 0)}
-- **Neutral:** {stats.get('neutral_count', 0)}
-- **Average Confidence:** {stats.get('avg_confidence', 0):.3f}
-- **Languages:** {stats.get('languages_detected', 0)}
-- **Most Common Language:** {stats.get('most_common_language', 'N/A').upper()}
-            """
-            return fig, stats_text
-    @handle_errors(default_return=("No data available",))
-    def get_history_status(self):
-        """Get current history status"""
-        stats = self.history.get_stats()
-        if not stats:
-            return "No analyses performed yet"
-        return f"""
-**Current Status:**
-- **Total Analyses:** {stats['total_analyses']}
-- **Recent Sentiment Distribution:**
-  * Positive: {stats['positive_count']}
-  * Negative: {stats['negative_count']}
-  * Neutral: {stats['neutral_count']}
-- **Average Confidence:** {stats['avg_confidence']:.3f}
-- **Languages Detected:** {stats['languages_detected']}
-        """
-# Gradio Interface
-def create_interface():
-    """Create comprehensive Gradio interface"""
-    app = SentimentApp()
-    with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
-        gr.Markdown("# 🌍 Advanced Multilingual Sentiment Analyzer")
-        gr.Markdown("AI-powered sentiment analysis with support for multiple languages, advanced visualizations, and explainable AI features")
-        with gr.Tab("Single Analysis"):
-            with gr.Row():
-                with gr.Column():
-                    text_input = gr.Textbox(
-                        label="Enter Text for Analysis",
-                        placeholder="Enter your text in any supported language...",
-                        lines=5
                     )
                     with gr.Row():
-                        language_selector = gr.Dropdown(
-                            choices=list(config.SUPPORTED_LANGUAGES.values()),
-                            value="Auto Detect",
-                            label="Language"
-                        )
-                        theme_selector = gr.Dropdown(
-                            choices=list(config.THEMES.keys()),
-                            value="default",
-                            label="Theme"
-                        )
-                    with gr.Row():
-                        clean_text_cb = gr.Checkbox(label="Clean Text", value=False)
-                        remove_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
-                        remove_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
-                    analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
-                    gr.Examples(
-                        examples=app.examples,
-                        inputs=text_input,
-                        cache_examples=False
-                    )
                 with gr.Column():
-                    result_output = gr.Textbox(label="Analysis Results", lines=8)
             with gr.Row():
-                gauge_plot = gr.Plot(label="Sentiment Gauge")
-                probability_plot = gr.Plot(label="Probability Distribution")
-            with gr.Row():
-                keyword_plot = gr.Plot(label="Key Contributing Words")
         with gr.Tab("Batch Analysis"):
             with gr.Row():
@@ -1481,13 +1280,10 @@ def create_interface():
             with gr.Row():
                 csv_download = gr.File(label="CSV Download", visible=True)
                 json_download = gr.File(label="JSON Download", visible=True)
         # Event Handlers
         analyze_btn.click(
             app.analyze_single,
             inputs=[text_input, language_selector, theme_selector,
@@ -1495,6 +1291,20 @@ def create_interface():
             outputs=[result_output, gauge_plot, probability_plot, keyword_plot]
         )
         load_file_btn.click(
             app.data_handler.process_file,
             inputs=file_upload,
@@ -1508,6 +1318,7 @@ def create_interface():
             outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot]
         )
         refresh_history_btn.click(
             app.plot_history,
             inputs=history_theme,

 import numpy as np
 from wordcloud import WordCloud
 from collections import Counter, defaultdict
+import re
+import json
+import csv
+import io
+import tempfile
+from datetime import datetime
+import logging
+from functools import lru_cache, wraps
 from dataclasses import dataclass
 from typing import List, Dict, Optional, Tuple, Any, Callable
 from contextlib import contextmanager
 import pandas as pd
 import gc
+# Advanced analysis imports
+import shap
+import lime
+from lime.lime_text import LimeTextExplainer
 # Configuration
+@dataclass
+class Config:
+    MAX_HISTORY_SIZE: int = 1000
+    BATCH_SIZE_LIMIT: int = 50
+    MAX_TEXT_LENGTH: int = 512
+    MIN_WORD_LENGTH: int = 2
     CACHE_SIZE: int = 128
     BATCH_PROCESSING_SIZE: int = 8
         'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
         'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
         'zh': "uer/roberta-base-finetuned-dianping-chinese"
     }
     # Color themes for Plotly
 # Decorators and Context Managers
 def handle_errors(default_return=None):
     """Centralized error handling decorator"""
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except Exception as e:
+                logger.error(f"{func.__name__} failed: {e}")
+                return default_return if default_return is not None else f"Error: {str(e)}"
+        return wrapper
     return decorator
 @contextmanager
 def memory_cleanup():
     """Context manager for memory cleanup"""
     try:
         yield
     finally:
         gc.collect()
 class ThemeContext:
+    """Theme management context"""
+    def __init__(self, theme: str = 'default'):
         self.theme = theme
         self.colors = config.THEMES.get(theme, config.THEMES['default'])
 class ModelManager:
     """Multi-language model manager with lazy loading"""
     _instance = None
     def __new__(cls):
         if cls._instance is None:
     def _load_default_models(self):
         """Load default models"""
         try:
             # Load multilingual model as default
             model_name = config.MODELS['multilingual']
     def clear(self) -> int:
         count = len(self._history)
         self._history.clear()
+        return count
     def size(self) -> int:
         return len(self._history)
             'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en'
         }
+# Core Sentiment Analysis Engine (Modified - removed attention analysis)
 class SentimentEngine:
     """Multi-language sentiment analysis engine"""
     def __init__(self):
         self.model_manager = ModelManager()
     @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'keywords': []})
     def analyze_single(self, text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
+        """Analyze single text with basic features"""
         if not text.strip():
             raise ValueError("Empty text provided")
         # Tokenize and analyze
         inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
                          truncation=True, max_length=config.MAX_TEXT_LENGTH).to(self.model_manager.device)
         with torch.no_grad():
             outputs = model(**inputs)
                 'has_neutral': False
             }
+        # Extract basic keywords
+        keywords = TextProcessor.extract_keywords(text, 10)
+        keyword_tuples = [(word, 0.1) for word in keywords]  # Simple keyword extraction
         # Add metadata
         result.update({
             'language': detected_lang,
+            'keywords': keyword_tuples,
             'word_count': len(text.split()),
             'char_count': len(text)
         })
         if len(texts) > config.BATCH_SIZE_LIMIT:
             texts = texts[:config.BATCH_SIZE_LIMIT]
+        results = []
+        batch_size = config.BATCH_PROCESSING_SIZE
+        for i in range(0, len(texts), batch_size):
+            batch = texts[i:i+batch_size]
             if progress_callback:
                 progress_callback((i + len(batch)) / len(texts))
                         'text': text[:100] + '...' if len(text) > 100 else text,
                         'full_text': text
                     })
         return results
+# Advanced Analysis Engine (NEW)
+class AdvancedAnalysisEngine:
+    """Advanced analysis using SHAP and LIME"""
+    def __init__(self):
+        self.model_manager = ModelManager()
+    def create_prediction_function(self, model, tokenizer, device):
+        """Create prediction function for LIME/SHAP"""
+        def predict_proba(texts):
+            results = []
+            for text in texts:
+                inputs = tokenizer(text, return_tensors="pt", padding=True,
+                                 truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
+                with torch.no_grad():
+                    outputs = model(**inputs)
+                    probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
+                results.append(probs)
+            return np.array(results)
+        return predict_proba
+    @handle_errors(default_return=("Analysis failed", None, None))
+    def analyze_with_shap(self, text: str, language: str = 'auto') -> Tuple[str, go.Figure, Dict]:
+        """Perform SHAP analysis"""
+        if not text.strip():
+            return "Please enter text for analysis", None, {}
+        # Detect language and get model
+        if language == 'auto':
+            detected_lang = self.model_manager.detect_language(text)
+        else:
+            detected_lang = language
+        model, tokenizer = self.model_manager.get_model(detected_lang)
+        # Create prediction function
+        predict_fn = self.create_prediction_function(model, tokenizer, self.model_manager.device)
+        try:
+            # Initialize SHAP explainer
+            explainer = shap.Explainer(predict_fn, tokenizer)
+            # Get SHAP values
+            shap_values = explainer([text])
+            # Extract token importance
+            tokens = shap_values.data[0]
+            values = shap_values.values[0]
+            # Create visualization data
+            if len(values.shape) > 1:
+                # Multi-class case
+                pos_values = values[:, -1] if values.shape[1] == 3 else values[:, 1]
+            else:
+                pos_values = values
+            # Create SHAP plot
+            fig = go.Figure()
+            colors = ['red' if v < 0 else 'green' for v in pos_values]
+            fig.add_trace(go.Bar(
+                x=list(range(len(tokens))),
+                y=pos_values,
+                text=tokens,
+                textposition='outside',
+                marker_color=colors,
+                name='SHAP Values'
+            ))
+            fig.update_layout(
+                title="SHAP Analysis - Token Importance",
+                xaxis_title="Token Index",
+                yaxis_title="SHAP Value",
+                height=500,
+                xaxis=dict(tickmode='array', tickvals=list(range(len(tokens))), ticktext=tokens)
+            )
+            # Create analysis summary
+            analysis_data = {
+                'method': 'SHAP',
+                'language': detected_lang,
+                'total_tokens': len(tokens),
+                'positive_influence': sum(1 for v in pos_values if v > 0),
+                'negative_influence': sum(1 for v in pos_values if v < 0),
+                'most_important_tokens': [(tokens[i], float(pos_values[i]))
+                                        for i in np.argsort(np.abs(pos_values))[-5:]]
+            }
+            summary_text = f"""
+**SHAP Analysis Results:**
+- **Language:** {detected_lang.upper()}
+- **Total Tokens:** {analysis_data['total_tokens']}
+- **Positive Influence Tokens:** {analysis_data['positive_influence']}
+- **Negative Influence Tokens:** {analysis_data['negative_influence']}
+- **Most Important Tokens:** {', '.join([f"{token}({score:.3f})" for token, score in analysis_data['most_important_tokens']])}
+            """
+            return summary_text, fig, analysis_data
+        except Exception as e:
+            logger.error(f"SHAP analysis failed: {e}")
+            return f"SHAP analysis failed: {str(e)}", None, {}
+    @handle_errors(default_return=("Analysis failed", None, None))
+    def analyze_with_lime(self, text: str, language: str = 'auto') -> Tuple[str, go.Figure, Dict]:
+        """Perform LIME analysis"""
+        if not text.strip():
+            return "Please enter text for analysis", None, {}
+        # Detect language and get model
+        if language == 'auto':
+            detected_lang = self.model_manager.detect_language(text)
+        else:
+            detected_lang = language
+        model, tokenizer = self.model_manager.get_model(detected_lang)
+        # Create prediction function
+        predict_fn = self.create_prediction_function(model, tokenizer, self.model_manager.device)
+        try:
+            # Initialize LIME explainer
+            explainer = LimeTextExplainer(class_names=['Negative', 'Neutral', 'Positive'])
+            # Get LIME explanation
+            exp = explainer.explain_instance(text, predict_fn, num_features=20)
+            # Extract feature importance
+            lime_data = exp.as_list()
+            # Create visualization
+            words = [item[0] for item in lime_data]
+            scores = [item[1] for item in lime_data]
+            fig = go.Figure()
+            colors = ['red' if s < 0 else 'green' for s in scores]
+            fig.add_trace(go.Bar(
+                y=words,
+                x=scores,
+                orientation='h',
+                marker_color=colors,
+                text=[f'{s:.3f}' for s in scores],
+                textposition='auto',
+                name='LIME Importance'
+            ))
+            fig.update_layout(
+                title="LIME Analysis - Feature Importance",
+                xaxis_title="Importance Score",
+                yaxis_title="Words/Phrases",
+                height=500
+            )
+            # Create analysis summary
+            analysis_data = {
+                'method': 'LIME',
+                'language': detected_lang,
+                'features_analyzed': len(lime_data),
+                'positive_features': sum(1 for _, score in lime_data if score > 0),
+                'negative_features': sum(1 for _, score in lime_data if score < 0),
+                'feature_importance': lime_data
+            }
+            summary_text = f"""
+**LIME Analysis Results:**
+- **Language:** {detected_lang.upper()}
+- **Features Analyzed:** {analysis_data['features_analyzed']}
+- **Positive Features:** {analysis_data['positive_features']}
+- **Negative Features:** {analysis_data['negative_features']}
+- **Top Features:** {', '.join([f"{word}({score:.3f})" for word, score in lime_data[:5]])}
+            """
+            return summary_text, fig, analysis_data
+        except Exception as e:
+            logger.error(f"LIME analysis failed: {e}")
+            return f"LIME analysis failed: {str(e)}", None, {}
+# Advanced Plotly Visualization System (Updated - removed attention visualization)
 class PlotlyVisualizer:
     """Enhanced Plotly visualizations"""
     @staticmethod
     @handle_errors(default_return=None)
     def create_keyword_chart(keywords: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> go.Figure:
+        """Create basic keyword chart"""
         if not keywords:
             fig = go.Figure()
             fig.add_annotation(text="No keywords extracted",
                              xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
             fig.update_layout(height=400, title="Keywords")
             return fig
         words = [word for word, score in keywords]
         fig.update_layout(
             title=f"Top Keywords ({sentiment})",
+            xaxis_title="Frequency Score",
             yaxis_title="Keywords",
             height=400,
             showlegend=False
         )
         return fig
     @staticmethod
     @handle_errors(default_return=None)
         if not data:
             return None, "No data to export"
+        temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False,
+                                               suffix=f'.{format_type}', encoding='utf-8')
         if format_type == 'csv':
             writer = csv.writer(temp_file)
                 ])
         elif format_type == 'json':
             json.dump(data, temp_file, indent=2, ensure_ascii=False)
         temp_file.close()
         return temp_file.name, f"Exported {len(data)} entries"
     @staticmethod
     @handle_errors(default_return="")
     def process_file(file) -> str:
         content = file.read().decode('utf-8')
         if file.name.endswith('.csv'):
             csv_file = io.StringIO(content)
             reader = csv.reader(csv_file)
             try:
                 texts = []
                 for line in lines:
                     if line.strip():
+                        text = line.strip().strip('"')
                         if text:
                             texts.append(text)
                 return '\n'.join(texts)
     def __init__(self):
         self.engine = SentimentEngine()
+        self.advanced_engine = AdvancedAnalysisEngine()  # NEW
         self.history = HistoryManager()
         self.data_handler = DataHandler()
             ["Esta película fue increíble, me encantó la cinematografía."],  # Spanish
             ["Ce film était magnifique, j'ai adoré la réalisation."],  # French
         ]
     @handle_errors(default_return=("Please enter text", None, None, None))
     def analyze_single(self, text: str, language: str, theme: str, clean_text: bool,
                        remove_punct: bool, remove_nums: bool):
+        """Single text analysis with basic visualizations (removed attention analysis)"""
         if not text.strip():
             return "Please enter text", None, None, None
             return summary_text, df, summary_fig, confidence_fig
+    # NEW: Advanced analysis methods
+    @handle_errors(default_return=("Please enter text", None))
+    def analyze_with_shap(self, text: str, language: str):
+        """Perform SHAP analysis"""
+        language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
+        language_code = language_map.get(language, 'auto')
+        return self.advanced_engine.analyze_with_shap(text, language_code)
+    @handle_errors(default_return=("Please enter text", None))
+    def analyze_with_lime(self, text: str, language: str):
+        """Perform LIME analysis"""
+        language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
+        language_code = language_map.get(language, 'auto')
+        return self.advanced_engine.analyze_with_lime(text, language_code)
     @handle_errors(default_return=(None, "No history available"))
     def plot_history(self, theme: str = 'default'):
         """Plot comprehensive history analysis"""
         if len(history) < 2:
             return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
         theme_ctx = ThemeContext(theme)
         with memory_cleanup():
             fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx)
 - **Languages Detected:** {stats['languages_detected']}
         """
+# Gradio Interface (Updated with Advanced Analysis tab)
 def create_interface():
+    """Create comprehensive Gradio interface with Advanced Analysis tab"""
     app = SentimentApp()
     with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
                 probability_plot = gr.Plot(label="Probability Distribution")
             with gr.Row():
+                keyword_plot = gr.Plot(label="Basic Keywords")
+        # NEW: Advanced Analysis Tab
+        with gr.Tab("Advanced Analysis"):
+            gr.Markdown("## 🔬 Explainable AI Analysis")
+            gr.Markdown("Use SHAP and LIME to understand which words and phrases most influence the sentiment prediction.")
             with gr.Row():
                 with gr.Column():
+                    advanced_text_input = gr.Textbox(
+                        label="Enter Text for Advanced Analysis",
+                        placeholder="Enter text to analyze with SHAP and LIME...",
+                        lines=6
                     )
+                    advanced_language = gr.Dropdown(
+                        choices=list(config.SUPPORTED_LANGUAGES.values()),
+                        value="Auto Detect",
+                        label="Language"
                     )
                     with gr.Row():
+                        shap_btn = gr.Button("SHAP Analysis", variant="primary")
+                        lime_btn = gr.Button("LIME Analysis", variant="secondary")
+                    gr.Markdown("""
+                    **Analysis Methods:**
+                    - **SHAP**: Shows token-level importance scores
+                    - **LIME**: Explains predictions by perturbing input features
+                    """)
                 with gr.Column():
+                    advanced_results = gr.Textbox(label="Analysis Summary", lines=10)
             with gr.Row():
+                advanced_plot = gr.Plot(label="Feature Importance Visualization")
         with gr.Tab("Batch Analysis"):
             with gr.Row():
             with gr.Row():
                 csv_download = gr.File(label="CSV Download", visible=True)
                 json_download = gr.File(label="JSON Download", visible=True)
         # Event Handlers
+        # Single Analysis
         analyze_btn.click(
             app.analyze_single,
             inputs=[text_input, language_selector, theme_selector,
             outputs=[result_output, gauge_plot, probability_plot, keyword_plot]
         )
+        # Advanced Analysis (NEW)
+        shap_btn.click(
+            app.analyze_with_shap,
+            inputs=[advanced_text_input, advanced_language],
+            outputs=[advanced_results, advanced_plot]
+        )
+        lime_btn.click(
+            app.analyze_with_lime,
+            inputs=[advanced_text_input, advanced_language],
+            outputs=[advanced_results, advanced_plot]
+        )
+        # Batch Analysis
         load_file_btn.click(
             app.data_handler.process_file,
             inputs=file_upload,
             outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot]
         )
+        # History & Analytics
         refresh_history_btn.click(
             app.plot_history,
             inputs=history_theme,