entropy25 commited on
Commit
6e4a3ac
·
verified ·
1 Parent(s): 0c511f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1292 -773
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import torch
2
  import gradio as gr
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
4
  import plotly.graph_objects as go
5
  import plotly.express as px
6
  from plotly.subplots import make_subplots
@@ -14,936 +14,1455 @@ import io
14
  import tempfile
15
  from datetime import datetime
16
  import logging
17
- from functools import lru_cache, wraps
18
  from dataclasses import dataclass
19
- from typing import List, Dict, Optional, Tuple, Any, Callable
20
- from contextlib import contextmanager
21
- import gc
22
- import base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # Configuration
25
  @dataclass
26
  class Config:
27
- MAX_HISTORY_SIZE: int = 1000
28
- BATCH_SIZE_LIMIT: int = 50
29
  MAX_TEXT_LENGTH: int = 512
30
- MIN_WORD_LENGTH: int = 2
31
- CACHE_SIZE: int = 128
32
- BATCH_PROCESSING_SIZE: int = 8
33
-
34
- # Visualization settings
35
- FIGURE_WIDTH: int = 800
36
- FIGURE_HEIGHT: int = 500
37
- WORDCLOUD_SIZE: Tuple[int, int] = (800, 400)
38
 
39
- THEMES = {
40
- 'default': {'pos': '#4ecdc4', 'neg': '#ff6b6b'},
41
- 'ocean': {'pos': '#0077be', 'neg': '#ff6b35'},
42
- 'forest': {'pos': '#228b22', 'neg': '#dc143c'},
43
- 'sunset': {'pos': '#ff8c00', 'neg': '#8b0000'}
 
 
 
 
44
  }
45
 
46
- # Multi-language models
47
  MODELS = {
48
- 'multilingual': {
49
- 'name': 'cardiffnlp/twitter-xlm-roberta-base-sentiment',
50
- 'labels': ['NEGATIVE', 'NEUTRAL', 'POSITIVE']
51
- },
52
- 'english': {
53
- 'name': 'cardiffnlp/twitter-roberta-base-sentiment-latest',
54
- 'labels': ['NEGATIVE', 'NEUTRAL', 'POSITIVE']
55
- },
56
- 'chinese': {
57
- 'name': 'uer/roberta-base-finetuned-chinanews-chinese',
58
- 'labels': ['NEGATIVE', 'POSITIVE']
59
- },
60
- 'spanish': {
61
- 'name': 'finiteautomata/beto-sentiment-analysis',
62
- 'labels': ['NEGATIVE', 'NEUTRAL', 'POSITIVE']
63
- },
64
- 'french': {
65
- 'name': 'tblard/tf-allocine',
66
- 'labels': ['NEGATIVE', 'POSITIVE']
67
- }
68
  }
69
 
70
- STOP_WORDS = {
71
- 'en': {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'will', 'would', 'could', 'should'},
72
- 'zh': {'', '', '', '', '', '', '和', '就', '不', '人', '都', '一', '一个', '上', '也', '很', '到', '说', '要', '去', '你', '会', '着', '没有', '看'},
73
- 'es': {'el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo', 'le', 'da', 'su', 'por', 'son', 'con', 'para', 'al', 'del', 'los', 'las'},
74
- 'fr': {'le', 'la', 'les', 'de', 'un', 'une', 'du', 'des', 'et', 'à', 'ce', 'il', 'que', 'qui', 'ne', 'se', 'pas', 'tout', 'être', 'avoir', 'sur', 'avec', 'par'},
 
75
  }
76
 
77
  config = Config()
 
 
 
78
  logger = logging.getLogger(__name__)
79
 
80
- # Decorators and Context Managers
81
- def handle_errors(default_return=None):
82
- """Centralized error handling decorator"""
83
- def decorator(func: Callable) -> Callable:
84
- @wraps(func)
85
- def wrapper(*args, **kwargs):
86
- try:
87
- return func(*args, **kwargs)
88
- except Exception as e:
89
- logger.error(f"{func.__name__} failed: {e}")
90
- return default_return if default_return is not None else f"Error: {str(e)}"
91
- return wrapper
92
- return decorator
93
-
94
- class ThemeContext:
95
- """Theme management context"""
96
- def __init__(self, theme: str = 'default'):
97
- self.theme = theme
98
- self.colors = config.THEMES.get(theme, config.THEMES['default'])
99
-
100
- # Enhanced Model Manager for Multi-language Support
101
- class ModelManager:
102
- """Multi-language model manager with lazy loading"""
103
- _instance = None
104
- _models = {}
105
- _tokenizers = {}
106
- _pipelines = {}
107
- _device = None
108
-
109
- def __new__(cls):
110
- if cls._instance is None:
111
- cls._instance = super().__new__(cls)
112
- return cls._instance
113
-
114
- @property
115
- def device(self):
116
- if self._device is None:
117
- self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
118
- return self._device
119
-
120
- def get_pipeline(self, model_key: str = 'multilingual'):
121
- """Get or create sentiment analysis pipeline for specified model"""
122
- if model_key not in self._pipelines:
123
- try:
124
- model_config = config.MODELS[model_key]
125
- self._pipelines[model_key] = pipeline(
126
- "sentiment-analysis",
127
- model=model_config['name'],
128
- tokenizer=model_config['name'],
129
- device=0 if torch.cuda.is_available() else -1,
130
- top_k=None
131
- )
132
- logger.info(f"Model {model_key} loaded successfully")
133
- except Exception as e:
134
- logger.error(f"Failed to load model {model_key}: {e}")
135
- # Fallback to multilingual model
136
- if model_key != 'multilingual':
137
- return self.get_pipeline('multilingual')
138
- raise
139
- return self._pipelines[model_key]
140
-
141
- def get_model_and_tokenizer(self, model_key: str = 'multilingual'):
142
- """Get model and tokenizer for attention extraction"""
143
- if model_key not in self._models:
144
- try:
145
- model_config = config.MODELS[model_key]
146
- self._tokenizers[model_key] = AutoTokenizer.from_pretrained(model_config['name'])
147
- self._models[model_key] = AutoModelForSequenceClassification.from_pretrained(model_config['name'])
148
- self._models[model_key].to(self.device)
149
- logger.info(f"Model and tokenizer {model_key} loaded for attention extraction")
150
- except Exception as e:
151
- logger.error(f"Failed to load model/tokenizer {model_key}: {e}")
152
- if model_key != 'multilingual':
153
- return self.get_model_and_tokenizer('multilingual')
154
- raise
155
- return self._models[model_key], self._tokenizers[model_key]
156
 
157
- # Language Detection
158
- class LanguageDetector:
159
- """Simple language detection based on character patterns"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  @staticmethod
162
  def detect_language(text: str) -> str:
163
- """Detect language based on character patterns"""
164
- # Chinese characters
165
- if re.search(r'[\u4e00-\u9fff]', text):
166
- return 'chinese'
167
- # Spanish patterns
168
- elif re.search(r'[ñáéíóúü]', text.lower()):
169
- return 'spanish'
170
- # French patterns
171
- elif re.search(r'[àâäçéèêëïîôùûüÿ]', text.lower()):
172
- return 'french'
173
- # Default to English/Multilingual
174
- else:
175
- return 'multilingual'
176
 
177
- # Simplified Core Classes
178
- class TextProcessor:
179
- """Optimized text processing with multi-language support"""
180
- @staticmethod
181
- @lru_cache(maxsize=config.CACHE_SIZE)
182
- def clean_text(text: str, language: str = 'en') -> Tuple[str, ...]:
183
- """Single-pass text cleaning with language-specific stop words"""
184
- words = re.findall(r'\b\w{2,}\b', text.lower())
185
- stop_words = config.STOP_WORDS.get(language, config.STOP_WORDS['en'])
186
- return tuple(w for w in words if w not in stop_words and len(w) >= config.MIN_WORD_LENGTH)
187
 
188
  class HistoryManager:
189
- """Simplified history management"""
190
  def __init__(self):
191
  self._history = []
192
 
193
- def add(self, entry: Dict):
194
- self._history.append({**entry, 'timestamp': datetime.now().isoformat()})
195
  if len(self._history) > config.MAX_HISTORY_SIZE:
196
  self._history = self._history[-config.MAX_HISTORY_SIZE:]
197
 
198
- def get_all(self) -> List[Dict]:
 
 
 
 
 
199
  return self._history.copy()
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  def clear(self) -> int:
202
  count = len(self._history)
203
  self._history.clear()
204
  return count
205
 
206
- def size(self) -> int:
207
- return len(self._history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
- # Core Analysis Engine with Multi-language Support
210
- class SentimentEngine:
211
- """Multi-language sentiment analysis with attention-based keyword extraction"""
212
- def __init__(self):
213
- self.model_manager = ModelManager()
214
- self.language_detector = LanguageDetector()
215
 
216
- def extract_key_words(self, text: str, model_key: str = 'multilingual', top_k: int = 10) -> List[Tuple[str, float]]:
217
- """Extract contributing words using attention weights"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  try:
219
- model, tokenizer = self.model_manager.get_model_and_tokenizer(model_key)
220
-
221
- inputs = tokenizer(
222
- text, return_tensors="pt", padding=True,
223
- truncation=True, max_length=config.MAX_TEXT_LENGTH
224
- ).to(self.model_manager.device)
225
 
226
- # Get model outputs with attention weights
227
  with torch.no_grad():
228
- outputs = model(**inputs, output_attentions=True)
229
- attention = outputs.attentions
230
 
231
- # Use the last layer's attention, average over all heads
232
- last_attention = attention[-1]
233
- avg_attention = last_attention.mean(dim=1)
 
 
 
234
 
235
- # Focus on attention to [CLS] token
236
- cls_attention = avg_attention[0, 0, :]
 
 
 
 
 
 
 
 
 
 
237
 
238
- # Get tokens and their attention scores
239
- tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
240
- attention_scores = cls_attention.cpu().numpy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
- # Filter out special tokens and combine subword tokens
243
- word_scores = {}
244
- current_word = ""
245
- current_score = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
- for i, (token, score) in enumerate(zip(tokens, attention_scores)):
248
- if token in ['[CLS]', '[SEP]', '[PAD]', '<s>', '</s>', '<pad>']:
249
- continue
250
-
251
- if token.startswith('##') or token.startswith('▁'):
252
- # Subword token
253
- current_word += token[2:] if token.startswith('##') else token[1:]
254
- current_score = max(current_score, score)
255
- else:
256
- # New word, save previous if exists
257
- if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
258
- word_scores[current_word.lower()] = current_score
259
-
260
- current_word = token
261
- current_score = score
262
 
263
- # Don't forget the last word
264
- if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
265
- word_scores[current_word.lower()] = current_score
 
 
 
 
 
 
 
 
 
266
 
267
- # Filter out stop words and sort by attention score
268
- lang_code = 'zh' if model_key == 'chinese' else 'es' if model_key == 'spanish' else 'fr' if model_key == 'french' else 'en'
269
- stop_words = config.STOP_WORDS.get(lang_code, config.STOP_WORDS['en'])
 
270
 
271
- filtered_words = {
272
- word: score for word, score in word_scores.items()
273
- if word not in stop_words and len(word) >= config.MIN_WORD_LENGTH
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  }
275
 
276
- # Sort by attention score and return top_k
277
- sorted_words = sorted(filtered_words.items(), key=lambda x: x[1], reverse=True)
278
- return sorted_words[:top_k]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
  except Exception as e:
281
- logger.error(f"Key word extraction failed: {e}")
282
- return []
 
 
 
283
 
284
- @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'key_words': []})
285
- def analyze_single(self, text: str, model_key: str = None) -> Dict:
286
- """Analyze single text with automatic language detection"""
287
- if not text.strip():
288
- raise ValueError("Empty text")
 
 
 
289
 
290
- # Auto-detect language if not specified
291
- if model_key is None:
292
- detected_lang = self.language_detector.detect_language(text)
293
- model_key = detected_lang if detected_lang in config.MODELS else 'multilingual'
294
 
295
- # Get sentiment analysis pipeline
296
- classifier = self.model_manager.get_pipeline(model_key)
297
- results = classifier(text)
 
 
 
 
 
 
 
298
 
299
- # Process results based on model output format
300
- if isinstance(results[0], list):
301
- results = results[0]
 
 
 
 
302
 
303
- # Map results to standardized format
304
- sentiment_map = {'POSITIVE': 'Positive', 'NEGATIVE': 'Negative', 'NEUTRAL': 'Neutral'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
- # Find positive and negative scores
307
- pos_score = 0.0
308
- neg_score = 0.0
309
- neutral_score = 0.0
 
 
 
 
 
 
310
 
311
- for result in results:
312
- label = result['label']
313
- score = result['score']
314
-
315
- if 'POSITIVE' in label:
316
- pos_score = score
317
- elif 'NEGATIVE' in label:
318
- neg_score = score
319
- elif 'NEUTRAL' in label:
320
- neutral_score = score
321
-
322
- # Determine final sentiment
323
- if pos_score > neg_score and pos_score > neutral_score:
324
- sentiment = 'Positive'
325
- confidence = pos_score
326
- elif neg_score > pos_score and neg_score > neutral_score:
327
- sentiment = 'Negative'
328
- confidence = neg_score
329
  else:
330
- sentiment = 'Neutral'
331
- confidence = neutral_score
332
 
333
- # Extract key contributing words
334
- key_words = self.extract_key_words(text, model_key)
 
 
 
 
335
 
336
- return {
337
- 'sentiment': sentiment,
338
- 'confidence': float(confidence),
339
- 'pos_prob': float(pos_score),
340
- 'neg_prob': float(neg_score),
341
- 'neutral_prob': float(neutral_score),
342
- 'key_words': key_words,
343
- 'language': model_key
344
- }
345
-
346
- @handle_errors(default_return=[])
347
- def analyze_batch(self, texts: List[str], model_key: str = None, progress_callback=None) -> List[Dict]:
348
- """Optimized batch processing with key words"""
349
- if len(texts) > config.BATCH_SIZE_LIMIT:
350
- texts = texts[:config.BATCH_SIZE_LIMIT]
351
 
352
- results = []
 
 
 
 
 
 
 
353
 
354
- for i, text in enumerate(texts):
355
- if progress_callback:
356
- progress_callback((i + 1) / len(texts))
357
-
358
- result = self.analyze_single(text, model_key)
359
- result['text'] = text[:50] + '...' if len(text) > 50 else text
360
- result['full_text'] = text
361
- results.append(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
 
363
- return results
364
-
365
- # Plotly Visualization System
366
- class PlotFactory:
367
- """Factory for creating Plotly visualizations"""
368
 
369
  @staticmethod
370
- @handle_errors(default_return=None)
371
- def create_sentiment_bars(result: Dict, theme: ThemeContext) -> go.Figure:
372
- """Create sentiment probability bars using Plotly"""
373
- labels = []
374
- values = []
375
- colors = []
376
-
377
- if 'neg_prob' in result and result['neg_prob'] > 0:
378
- labels.append("Negative")
379
- values.append(result['neg_prob'])
380
- colors.append(theme.colors['neg'])
381
-
382
- if 'neutral_prob' in result and result['neutral_prob'] > 0:
383
- labels.append("Neutral")
384
- values.append(result['neutral_prob'])
385
- colors.append('#FFA500') # Orange for neutral
386
-
387
- if 'pos_prob' in result and result['pos_prob'] > 0:
388
- labels.append("Positive")
389
- values.append(result['pos_prob'])
390
- colors.append(theme.colors['pos'])
391
 
392
  fig = go.Figure(data=[
393
- go.Bar(
394
- x=labels,
395
- y=values,
396
- marker_color=colors,
397
- text=[f'{v:.3f}' for v in values],
398
- textposition='auto',
399
- )
400
  ])
401
 
 
402
  fig.update_layout(
403
  title="Sentiment Probabilities",
404
- xaxis_title="Sentiment",
405
  yaxis_title="Probability",
406
- yaxis=dict(range=[0, 1]),
407
- width=config.FIGURE_WIDTH,
408
- height=config.FIGURE_HEIGHT,
409
  showlegend=False
410
  )
411
 
412
  return fig
413
-
414
  @staticmethod
415
- @handle_errors(default_return=None)
416
- def create_confidence_gauge(confidence: float, sentiment: str, theme: ThemeContext) -> go.Figure:
417
- """Create confidence gauge using Plotly"""
418
- color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg'] if sentiment == 'Negative' else '#FFA500'
419
-
420
- fig = go.Figure(go.Indicator(
421
- mode = "gauge+number+delta",
422
- value = confidence,
423
- domain = {'x': [0, 1], 'y': [0, 1]},
424
- title = {'text': f"{sentiment} Confidence"},
425
- delta = {'reference': 0.5},
426
- gauge = {
427
- 'axis': {'range': [None, 1]},
428
- 'bar': {'color': color},
429
- 'steps': [
430
- {'range': [0, 0.5], 'color': "lightgray"},
431
- {'range': [0.5, 1], 'color': "gray"}
432
- ],
433
- 'threshold': {
434
- 'line': {'color': "red", 'width': 4},
435
- 'thickness': 0.75,
436
- 'value': 0.9
437
- }
438
- }
439
- ))
440
 
441
  fig.update_layout(
442
- width=config.FIGURE_WIDTH,
443
- height=config.FIGURE_HEIGHT
444
  )
445
 
446
  return fig
447
-
448
  @staticmethod
449
- @handle_errors(default_return=None)
450
- def create_keyword_chart(key_words: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> Optional[go.Figure]:
451
- """Create horizontal bar chart for key contributing words"""
452
- if not key_words:
453
- return None
454
-
455
- words = [word for word, score in key_words]
456
- scores = [score for word, score in key_words]
457
-
458
- # Choose color based on sentiment
459
- color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg'] if sentiment == 'Negative' else '#FFA500'
460
-
461
- fig = go.Figure(go.Bar(
462
- x=scores,
463
- y=words,
464
- orientation='h',
465
- marker_color=color,
466
- text=[f'{score:.3f}' for score in scores],
467
- textposition='auto',
468
- ))
469
 
470
  fig.update_layout(
471
- title=f'Top Contributing Words ({sentiment})',
472
- xaxis_title='Attention Weight',
473
- yaxis_title='Words',
474
- width=config.FIGURE_WIDTH,
475
- height=config.FIGURE_HEIGHT,
476
- yaxis={'categoryorder': 'total ascending'}
477
  )
478
 
479
  return fig
480
 
481
  @staticmethod
482
- @handle_errors(default_return=None)
483
- def create_wordcloud_plot(text: str, sentiment: str, theme: ThemeContext) -> Optional[go.Figure]:
484
- """Create word cloud visualization"""
485
- if len(text.split()) < 3:
486
- return None
487
 
488
- try:
489
- colormap = 'Greens' if sentiment == 'Positive' else 'Reds' if sentiment == 'Negative' else 'Blues'
490
- wc = WordCloud(
491
- width=config.WORDCLOUD_SIZE[0],
492
- height=config.WORDCLOUD_SIZE[1],
493
- background_color='white',
494
- colormap=colormap,
495
- max_words=30
496
- ).generate(text)
497
-
498
- # Convert to image
499
- img_array = wc.to_array()
500
-
501
- fig = go.Figure()
502
- fig.add_trace(go.Image(z=img_array))
503
- fig.update_layout(
504
- title=f'{sentiment} Word Cloud',
505
- xaxis={'visible': False},
506
- yaxis={'visible': False},
507
- width=config.FIGURE_WIDTH,
508
- height=config.FIGURE_HEIGHT,
509
- margin=dict(l=0, r=0, t=30, b=0)
510
- )
511
-
512
- return fig
513
-
514
- except Exception as e:
515
- logger.error(f"Word cloud generation failed: {e}")
516
- return None
517
-
518
- @staticmethod
519
- @handle_errors(default_return=None)
520
- def create_batch_analysis(results: List[Dict], theme: ThemeContext) -> go.Figure:
521
- """Create comprehensive batch visualization using Plotly subplots"""
522
  fig = make_subplots(
523
  rows=2, cols=2,
524
- subplot_titles=['Sentiment Distribution', 'Confidence Distribution',
525
- 'Sentiment Progression', 'Language Distribution'],
526
- specs=[[{"type": "pie"}, {"type": "histogram"}],
527
- [{"type": "scatter", "colspan": 2}, None]]
528
  )
529
 
530
- # Sentiment distribution (pie chart)
531
- sent_counts = Counter([r['sentiment'] for r in results])
532
- colors_pie = [theme.colors['pos'] if s == 'Positive' else theme.colors['neg'] if s == 'Negative' else '#FFA500' for s in sent_counts.keys()]
 
 
 
533
 
 
 
534
  fig.add_trace(
535
- go.Pie(labels=list(sent_counts.keys()), values=list(sent_counts.values()),
536
- marker_colors=colors_pie, name="Sentiment"),
 
537
  row=1, col=1
538
  )
539
 
540
- # Confidence histogram
541
- confs = [r['confidence'] for r in results]
542
  fig.add_trace(
543
- go.Histogram(x=confs, nbinsx=8, marker_color='skyblue', name="Confidence"),
544
  row=1, col=2
545
  )
546
 
547
- # Sentiment progression
548
- pos_probs = [r.get('pos_prob', 0) for r in results]
549
- indices = list(range(len(results)))
550
- colors_scatter = [theme.colors['pos'] if r['sentiment'] == 'Positive'
551
- else theme.colors['neg'] if r['sentiment'] == 'Negative'
552
- else '#FFA500' for r in results]
553
-
554
  fig.add_trace(
555
- go.Scatter(x=indices, y=pos_probs, mode='markers',
556
- marker=dict(color=colors_scatter, size=8),
557
- name="Sentiment Progression"),
558
  row=2, col=1
559
  )
560
 
561
- # Add horizontal line at 0.5
562
- fig.add_hline(y=0.5, line_dash="dash", line_color="gray", row=2, col=1)
563
-
564
- fig.update_layout(
565
- height=800,
566
- width=1000,
567
- showlegend=False,
568
- title_text="Batch Analysis Results"
569
  )
570
 
 
571
  return fig
572
 
573
- # Unified Data Handler (unchanged)
574
- class DataHandler:
575
- """Handles all data operations"""
576
-
577
- @staticmethod
578
- @handle_errors(default_return=(None, "Export failed"))
579
- def export_data(data: List[Dict], format_type: str) -> Tuple[Optional[str], str]:
580
- """Universal data export"""
581
- if not data:
582
- return None, "No data to export"
583
-
584
- temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False,
585
- suffix=f'.{format_type}', encoding='utf-8')
586
-
587
- if format_type == 'csv':
588
- writer = csv.writer(temp_file)
589
- writer.writerow(['Timestamp', 'Text', 'Sentiment', 'Confidence', 'Pos_Prob', 'Neg_Prob', 'Neutral_Prob', 'Language', 'Key_Words'])
590
- for entry in data:
591
- writer.writerow([
592
- entry.get('timestamp', ''),
593
- entry.get('text', ''),
594
- entry.get('sentiment', ''),
595
- f"{entry.get('confidence', 0):.4f}",
596
- f"{entry.get('pos_prob', 0):.4f}",
597
- f"{entry.get('neg_prob', 0):.4f}",
598
- f"{entry.get('neutral_prob', 0):.4f}",
599
- entry.get('language', ''),
600
- "|".join([f"{word}:{score:.3f}" for word, score in entry.get('key_words', [])])
601
- ])
602
- elif format_type == 'json':
603
- json.dump(data, temp_file, indent=2, ensure_ascii=False)
604
-
605
- temp_file.close()
606
- return temp_file.name, f"Exported {len(data)} entries"
607
-
608
- @staticmethod
609
- @handle_errors(default_return="")
610
- def process_file(file) -> str:
611
- """Process uploaded file"""
612
- if not file:
613
- return ""
614
-
615
- content = file.read().decode('utf-8')
616
-
617
- if file.name.endswith('.csv'):
618
- import io
619
- csv_file = io.StringIO(content)
620
- reader = csv.reader(csv_file)
621
- try:
622
- next(reader)
623
- texts = []
624
- for row in reader:
625
- if row and row[0].strip():
626
- text = row[0].strip().strip('"')
627
- if text:
628
- texts.append(text)
629
- return '\n'.join(texts)
630
- except Exception as e:
631
- lines = content.strip().split('\n')[1:]
632
- texts = []
633
- for line in lines:
634
- if line.strip():
635
- text = line.strip().strip('"')
636
- if text:
637
- texts.append(text)
638
- return '\n'.join(texts)
639
- return content
640
-
641
- # Main Application with Multi-language Support
642
- class SentimentApp:
643
- """Main application orchestrator with multi-language support"""
644
-
645
- def __init__(self):
646
- self.engine = SentimentEngine()
647
- self.history = HistoryManager()
648
- self.data_handler = DataHandler()
649
-
650
- # Multi-language examples
651
- self.examples = [
652
- ["While the film's visual effects were undeniably impressive, the story lacked emotional weight, and the pacing felt inconsistent throughout."],
653
- ["这部电影的视觉效果令人印象深刻,但故事缺乏情感深度,节奏感也不够连贯。"],
654
- ["Aunque los efectos visuales de la película fueron innegablemente impresionantes, la historia carecía de peso emocional."],
655
- ["Bien que les effets visuels du film soient indéniablement impressionnants, l'histoire manquait de poids émotionnel."],
656
- ["An extraordinary achievement in filmmaking — the direction was masterful, the script was sharp, and every performance added depth and realism."]
657
- ]
658
-
659
- @handle_errors(default_return=("Please enter text", None, None, None, None))
660
- def analyze_single(self, text: str, model_key: str = 'multilingual', theme: str = 'default'):
661
- """Single text analysis with multi-language support"""
662
  if not text.strip():
663
- return "Please enter text", None, None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
664
 
665
- result = self.engine.analyze_single(text, model_key)
666
 
667
  # Add to history
668
- self.history.add({
669
- 'text': text[:100],
670
  'full_text': text,
671
- **result
672
- })
 
 
 
 
 
 
 
 
673
 
674
  # Create visualizations
675
- theme_ctx = ThemeContext(theme)
676
-
677
- prob_plot = PlotFactory.create_sentiment_bars(result, theme_ctx)
678
- gauge_plot = PlotFactory.create_confidence_gauge(result['confidence'], result['sentiment'], theme_ctx)
679
- cloud_plot = PlotFactory.create_wordcloud_plot(text, result['sentiment'], theme_ctx)
680
- keyword_plot = PlotFactory.create_keyword_chart(result['key_words'], result['sentiment'], theme_ctx)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
 
682
- # Format result text with key words
683
- key_words_str = ", ".join([f"{word}({score:.3f})" for word, score in result['key_words'][:5]])
684
- result_text = (f"Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.3f})\n"
685
- f"Language: {result['language']}\n"
686
- f"Key Words: {key_words_str}")
687
 
688
- return result_text, prob_plot, gauge_plot, cloud_plot, keyword_plot
689
-
690
- @handle_errors(default_return=None)
691
- def analyze_batch(self, reviews: str, model_key: str = 'multilingual', progress=None):
692
- """Batch analysis with multi-language support"""
693
- if not reviews.strip():
694
- return None
 
 
 
 
 
 
 
 
 
 
695
 
696
- texts = [r.strip() for r in reviews.split('\n') if r.strip()]
697
- if len(texts) < 2:
698
- return None
 
 
699
 
700
- results = self.engine.analyze_batch(texts, model_key, progress)
 
701
 
702
  # Add to history
703
- for result in results:
704
- self.history.add(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
 
706
- # Create visualization
707
- theme_ctx = ThemeContext('default')
708
- return PlotFactory.create_batch_analysis(results, theme_ctx)
709
-
710
- @handle_errors(default_return=(None, "No history available"))
711
- def plot_history(self, theme: str = 'default'):
712
- """Plot analysis history using Plotly"""
713
- history = self.history.get_all()
714
- if len(history) < 2:
715
- return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
716
 
717
- theme_ctx = ThemeContext(theme)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718
 
719
- # Create subplots
720
- fig = make_subplots(
721
- rows=2, cols=1,
722
- subplot_titles=['Sentiment History', 'Confidence Over Time'],
723
- vertical_spacing=0.12
724
- )
725
 
726
- indices = list(range(len(history)))
727
- pos_probs = [item.get('pos_prob', 0) for item in history]
728
- confs = [item['confidence'] for item in history]
 
 
 
 
 
 
 
729
 
730
- # Sentiment trend
731
- colors = [theme_ctx.colors['pos'] if p > 0.5 else theme_ctx.colors['neg'] for p in pos_probs]
 
 
 
 
 
 
 
 
 
732
 
733
- fig.add_trace(
734
- go.Scatter(
735
- x=indices,
736
- y=pos_probs,
737
- mode='markers+lines',
738
- marker=dict(color=colors, size=8),
739
- line=dict(color='gray', width=2),
740
- name='Sentiment Trend'
741
- ),
742
- row=1, col=1
743
- )
744
 
745
- # Add horizontal line at 0.5
746
- fig.add_hline(y=0.5, line_dash="dash", line_color="gray", row=1, col=1)
 
747
 
748
- # Confidence trend
749
- fig.add_trace(
750
- go.Bar(
751
- x=indices,
752
- y=confs,
753
- marker_color='lightblue',
754
- marker_line_color='navy',
755
- marker_line_width=1,
756
- name='Confidence'
757
- ),
758
- row=2, col=1
759
- )
760
 
761
- fig.update_layout(
762
- height=800,
763
- width=1000,
764
- showlegend=False,
765
- title_text="Analysis History"
766
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767
 
768
- fig.update_xaxes(title_text="Analysis Number", row=2, col=1)
769
- fig.update_yaxes(title_text="Positive Probability", row=1, col=1)
770
- fig.update_yaxes(title_text="Confidence", row=2, col=1)
771
 
772
- return fig, f"History: {len(history)} analyses"
 
 
 
 
 
 
 
 
773
 
774
- # Gradio Interface Setup with Multi-language Support
775
- def create_interface():
776
- """Create streamlined Gradio interface with multi-language support"""
777
- app = SentimentApp()
 
778
 
779
- with gr.Blocks(theme=gr.themes.Soft(), title="Multi-language Sentiment Analyzer") as demo:
780
- gr.Markdown("# 🌍 AI Multi-language Sentiment Analyzer")
781
- gr.Markdown("Advanced sentiment analysis supporting multiple languages with Plotly visualizations and key word extraction")
782
-
783
- with gr.Tab("Single Analysis"):
784
- with gr.Row():
785
- with gr.Column():
786
- text_input = gr.Textbox(
787
- label="Review Text (Multiple Languages Supported)",
788
- placeholder="Enter your review in any supported language...",
789
- lines=5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
790
  )
791
- with gr.Row():
792
- analyze_btn = gr.Button("Analyze", variant="primary")
793
- model_selector = gr.Dropdown(
794
- choices=[
795
- ('Auto-detect', 'multilingual'),
796
- ('Multilingual', 'multilingual'),
797
- ('English', 'english'),
798
- ('Chinese 中文', 'chinese'),
799
- ('Spanish Español', 'spanish'),
800
- ('French Français', 'french')
801
- ],
802
- value="multilingual",
803
- label="Language Model"
804
- )
805
- theme_selector = gr.Dropdown(
806
- choices=list(config.THEMES.keys()),
807
- value="default",
808
- label="Theme"
809
- )
810
-
811
- gr.Examples(
812
- examples=app.examples,
813
- inputs=text_input,
814
- label="Multi-language Examples"
815
  )
816
 
817
- with gr.Column():
818
- result_output = gr.Textbox(label="Analysis Result", lines=4)
819
-
820
- with gr.Row():
821
- prob_plot = gr.Plot(label="Sentiment Probabilities")
822
- gauge_plot = gr.Plot(label="Confidence Gauge")
 
 
 
 
 
 
823
 
824
- with gr.Row():
825
- wordcloud_plot = gr.Plot(label="Word Cloud")
826
- keyword_plot = gr.Plot(label="Key Contributing Words")
827
-
828
- with gr.Tab("Batch Analysis"):
829
- with gr.Row():
830
- with gr.Column():
831
- file_upload = gr.File(label="Upload File", file_types=[".csv", ".txt"])
832
- batch_input = gr.Textbox(
833
- label="Reviews (one per line, mixed languages supported)",
834
- lines=8,
835
- placeholder="Enter multiple reviews, one per line...\nSupports mixed languages in the same batch!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
836
  )
837
 
838
- with gr.Column():
839
- load_btn = gr.Button("Load File")
840
- with gr.Row():
841
- batch_btn = gr.Button("Analyze Batch", variant="primary")
842
- batch_model_selector = gr.Dropdown(
843
- choices=[
844
- ('Auto-detect', 'multilingual'),
845
- ('Multilingual', 'multilingual'),
846
- ('English', 'english'),
847
- ('Chinese 中文', 'chinese'),
848
- ('Spanish Español', 'spanish'),
849
- ('French Français', 'french')
850
- ],
851
- value="multilingual",
852
- label="Batch Model"
853
- )
854
 
855
- batch_plot = gr.Plot(label="Batch Analysis Results")
 
856
 
857
- with gr.Tab("History & Export"):
858
- with gr.Row():
859
- refresh_btn = gr.Button("Refresh History")
860
- clear_btn = gr.Button("Clear History", variant="stop")
861
- status_btn = gr.Button("Show Status")
862
-
863
- with gr.Row():
864
- csv_btn = gr.Button("Export CSV")
865
- json_btn = gr.Button("Export JSON")
866
-
867
- history_status = gr.Textbox(label="Status Information")
868
- history_plot = gr.Plot(label="History Trends")
869
- csv_file = gr.File(label="CSV Download", visible=True)
870
- json_file = gr.File(label="JSON Download", visible=True)
871
-
872
- with gr.Tab("Model Information"):
873
- gr.Markdown("""
874
- ## Supported Languages and Models
875
-
876
- | Language | Model | Description |
877
- |----------|-------|-------------|
878
- | **Multilingual** | XLM-RoBERTa | Supports 100+ languages automatically |
879
- | **English** | RoBERTa-base | Optimized for English text |
880
- | **Chinese 中文** | RoBERTa-Chinese | Specialized for Chinese language |
881
- | **Spanish Español** | BETO | Fine-tuned for Spanish sentiment |
882
- | **French Français** | tf-allocine | Trained on French movie reviews |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
883
 
884
- ### Features:
885
- - **Automatic Language Detection**: The system can automatically detect the input language
886
- - **Attention-based Keywords**: Extract words that contribute most to sentiment prediction
887
- - **Interactive Visualizations**: Plotly-powered charts and graphs
888
- - **Batch Processing**: Analyze multiple texts at once
889
- - **Export Capabilities**: Save results in CSV or JSON format
890
- - **Multi-language Support**: Mix different languages in batch analysis
891
- """)
892
-
893
- # Event bindings
894
- analyze_btn.click(
895
- app.analyze_single,
896
- inputs=[text_input, model_selector, theme_selector],
897
- outputs=[result_output, prob_plot, gauge_plot, wordcloud_plot, keyword_plot]
898
- )
899
 
900
- load_btn.click(
901
- app.data_handler.process_file,
902
- inputs=file_upload,
903
- outputs=batch_input
904
- )
905
 
906
- batch_btn.click(
907
- app.analyze_batch,
908
- inputs=[batch_input, batch_model_selector],
909
- outputs=batch_plot
910
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
911
 
912
- refresh_btn.click(
913
- lambda theme: app.plot_history(theme),
914
- inputs=theme_selector,
915
- outputs=[history_plot, history_status]
916
- )
917
 
918
- clear_btn.click(
919
- lambda: f"Cleared {app.history.clear()} entries",
920
- outputs=history_status
921
- )
922
 
923
- status_btn.click(
924
- lambda: f"History: {app.history.size()} entries | Available Models: {', '.join(config.MODELS.keys())}",
925
- outputs=history_status
926
- )
927
 
928
- csv_btn.click(
929
- lambda: app.data_handler.export_data(app.history.get_all(), 'csv'),
930
- outputs=[csv_file, history_status]
931
- )
932
 
933
- json_btn.click(
934
- lambda: app.data_handler.export_data(app.history.get_all(), 'json'),
935
- outputs=[json_file, history_status]
936
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
937
 
938
- return demo
 
 
 
939
 
940
- # Application Entry Point
941
  if __name__ == "__main__":
942
- logging.basicConfig(level=logging.INFO)
943
- demo = create_interface()
944
- demo.launch(
945
- share=True,
946
- server_name="0.0.0.0",
947
- server_port=7860,
948
- show_error=True
949
- )
 
1
  import torch
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import plotly.graph_objects as go
5
  import plotly.express as px
6
  from plotly.subplots import make_subplots
 
14
  import tempfile
15
  from datetime import datetime
16
  import logging
17
+ from functools import lru_cache
18
  from dataclasses import dataclass
19
+ from typing import List, Dict, Optional, Tuple
20
+ import nltk
21
+ from nltk.corpus import stopwords
22
+ import langdetect
23
+ import pandas as pd
24
+
25
+ # Try to import SHAP and LIME, fall back to basic analysis if not available
26
+ try:
27
+ import shap
28
+ SHAP_AVAILABLE = True
29
+ except ImportError:
30
+ SHAP_AVAILABLE = False
31
+ logger.warning("SHAP not available, using basic analysis")
32
+
33
+ try:
34
+ from lime.lime_text import LimeTextExplainer
35
+ LIME_AVAILABLE = True
36
+ except ImportError:
37
+ LIME_AVAILABLE = False
38
+ logger.warning("LIME not available, using basic analysis")
39
 
40
  # Configuration
41
  @dataclass
42
  class Config:
43
+ MAX_HISTORY_SIZE: int = 500
44
+ BATCH_SIZE_LIMIT: int = 30
45
  MAX_TEXT_LENGTH: int = 512
46
+ CACHE_SIZE: int = 64
 
 
 
 
 
 
 
47
 
48
+ # Supported languages and models
49
+ SUPPORTED_LANGUAGES = {
50
+ 'auto': 'Auto Detect',
51
+ 'en': 'English',
52
+ 'zh': 'Chinese',
53
+ 'es': 'Spanish',
54
+ 'fr': 'French',
55
+ 'de': 'German',
56
+ 'sv': 'Swedish'
57
  }
58
 
 
59
  MODELS = {
60
+ 'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
61
+ 'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
62
+ 'zh': "uer/roberta-base-finetuned-dianping-chinese"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  }
64
 
65
+ # Color themes
66
+ THEMES = {
67
+ 'default': {'pos': '#4CAF50', 'neg': '#F44336', 'neu': '#FF9800'},
68
+ 'ocean': {'pos': '#0077BE', 'neg': '#FF6B35', 'neu': '#00BCD4'},
69
+ 'dark': {'pos': '#66BB6A', 'neg': '#EF5350', 'neu': '#FFA726'},
70
+ 'rainbow': {'pos': '#9C27B0', 'neg': '#E91E63', 'neu': '#FF5722'}
71
  }
72
 
73
  config = Config()
74
+
75
+ # Logging setup
76
+ logging.basicConfig(level=logging.INFO)
77
  logger = logging.getLogger(__name__)
78
 
79
+ # Initialize NLTK
80
+ try:
81
+ nltk.download('stopwords', quiet=True)
82
+ nltk.download('punkt', quiet=True)
83
+ STOP_WORDS = set(stopwords.words('english'))
84
+ except:
85
+ STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ class ModelManager:
88
+ """Manages multiple language models"""
89
+ def __init__(self):
90
+ self.models = {}
91
+ self.tokenizers = {}
92
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
93
+ self._load_default_model()
94
+
95
+ def _load_default_model(self):
96
+ """Load the default models"""
97
+ try:
98
+ # Load multilingual model as default
99
+ model_name = config.MODELS['multilingual']
100
+ self.tokenizers['default'] = AutoTokenizer.from_pretrained(model_name)
101
+ self.models['default'] = AutoModelForSequenceClassification.from_pretrained(model_name)
102
+ self.models['default'].to(self.device)
103
+ logger.info(f"Default model loaded: {model_name}")
104
+
105
+ # Load Chinese model
106
+ zh_model_name = config.MODELS['zh']
107
+ self.tokenizers['zh'] = AutoTokenizer.from_pretrained(zh_model_name)
108
+ self.models['zh'] = AutoModelForSequenceClassification.from_pretrained(zh_model_name)
109
+ self.models['zh'].to(self.device)
110
+ logger.info(f"Chinese model loaded: {zh_model_name}")
111
+
112
+ except Exception as e:
113
+ logger.error(f"Failed to load models: {e}")
114
+ raise
115
+
116
+ def get_model(self, language='en'):
117
+ """Get model for specific language"""
118
+ if language == 'zh':
119
+ return self.models['zh'], self.tokenizers['zh']
120
+ elif language in ['en', 'auto'] or language not in config.SUPPORTED_LANGUAGES:
121
+ return self.models['default'], self.tokenizers['default']
122
+ return self.models['default'], self.tokenizers['default'] # Use multilingual for other languages
123
 
124
  @staticmethod
125
  def detect_language(text: str) -> str:
126
+ """Detect text language properly"""
127
+ try:
128
+ # Use langdetect for all languages
129
+ detected = langdetect.detect(text)
130
+ # Map some common langdetect codes to our supported languages
131
+ language_mapping = {
132
+ 'zh-cn': 'zh',
133
+ 'zh-tw': 'zh'
134
+ }
135
+ detected = language_mapping.get(detected, detected)
136
+ return detected if detected in config.SUPPORTED_LANGUAGES else 'en'
137
+ except:
138
+ return 'en'
139
 
140
+ model_manager = ModelManager()
 
 
 
 
 
 
 
 
 
141
 
142
  class HistoryManager:
143
+ """Enhanced history manager with more features"""
144
  def __init__(self):
145
  self._history = []
146
 
147
+ def add_entry(self, entry: Dict):
148
+ self._history.append(entry)
149
  if len(self._history) > config.MAX_HISTORY_SIZE:
150
  self._history = self._history[-config.MAX_HISTORY_SIZE:]
151
 
152
+ def add_batch_entries(self, entries: List[Dict]):
153
+ """Add multiple entries at once"""
154
+ for entry in entries:
155
+ self.add_entry(entry)
156
+
157
+ def get_history(self) -> List[Dict]:
158
  return self._history.copy()
159
 
160
+ def get_recent_history(self, n: int = 10) -> List[Dict]:
161
+ """Get n most recent entries"""
162
+ return self._history[-n:] if self._history else []
163
+
164
+ def filter_history(self, sentiment: str = None, language: str = None,
165
+ min_confidence: float = None) -> List[Dict]:
166
+ """Filter history by criteria"""
167
+ filtered = self._history
168
+
169
+ if sentiment:
170
+ filtered = [h for h in filtered if h['sentiment'] == sentiment]
171
+ if language:
172
+ filtered = [h for h in filtered if h.get('language', 'en') == language]
173
+ if min_confidence:
174
+ filtered = [h for h in filtered if h['confidence'] >= min_confidence]
175
+
176
+ return filtered
177
+
178
  def clear(self) -> int:
179
  count = len(self._history)
180
  self._history.clear()
181
  return count
182
 
183
+ def get_stats(self) -> Dict:
184
+ if not self._history:
185
+ return {}
186
+
187
+ sentiments = [item['sentiment'] for item in self._history]
188
+ confidences = [item['confidence'] for item in self._history]
189
+ languages = [item.get('language', 'en') for item in self._history]
190
+
191
+ return {
192
+ 'total_analyses': len(self._history),
193
+ 'positive_count': sentiments.count('Positive'),
194
+ 'negative_count': sentiments.count('Negative'),
195
+ 'neutral_count': sentiments.count('Neutral'),
196
+ 'avg_confidence': np.mean(confidences),
197
+ 'max_confidence': np.max(confidences),
198
+ 'min_confidence': np.min(confidences),
199
+ 'languages_detected': len(set(languages)),
200
+ 'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en',
201
+ 'avg_text_length': np.mean([len(item.get('full_text', '')) for item in self._history])
202
+ }
203
 
204
+ history_manager = HistoryManager()
205
+
206
+ class TextProcessor:
207
+ """Enhanced text processing"""
 
 
208
 
209
+ @staticmethod
210
+ @lru_cache(maxsize=config.CACHE_SIZE)
211
+ def clean_text(text: str, remove_punctuation: bool = True, remove_numbers: bool = False) -> str:
212
+ """Clean text with options"""
213
+ text = text.lower().strip()
214
+
215
+ if remove_numbers:
216
+ text = re.sub(r'\d+', '', text)
217
+
218
+ if remove_punctuation:
219
+ text = re.sub(r'[^\w\s]', '', text)
220
+
221
+ words = text.split()
222
+ cleaned_words = [w for w in words if w not in STOP_WORDS and len(w) > 2]
223
+ return ' '.join(cleaned_words)
224
+
225
+ @staticmethod
226
+ def extract_keywords(text: str, top_k: int = 5) -> List[str]:
227
+ """Extract key words from text"""
228
+ # For Chinese text, extract characters
229
+ if re.search(r'[\u4e00-\u9fff]', text):
230
+ words = re.findall(r'[\u4e00-\u9fff]+', text)
231
+ all_chars = ''.join(words)
232
+ char_freq = Counter(all_chars)
233
+ return [char for char, _ in char_freq.most_common(top_k)]
234
+ else:
235
+ # For other languages, use word-based extraction
236
+ cleaned = TextProcessor.clean_text(text)
237
+ words = cleaned.split()
238
+ word_freq = Counter(words)
239
+ return [word for word, _ in word_freq.most_common(top_k)]
240
+
241
+ @staticmethod
242
+ def parse_batch_input(text: str) -> List[str]:
243
+ """Parse batch input from textarea"""
244
+ lines = text.strip().split('\n')
245
+ return [line.strip() for line in lines if line.strip()]
246
+
247
+ class SentimentAnalyzer:
248
+ """Enhanced sentiment analysis"""
249
+
250
+ @staticmethod
251
+ def analyze_text(text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
252
+ """Analyze single text with language support"""
253
+ if not text.strip():
254
+ raise ValueError("Empty text provided")
255
+
256
+ # Detect language if auto
257
+ if language == 'auto':
258
+ detected_lang = model_manager.detect_language(text)
259
+ else:
260
+ detected_lang = language
261
+
262
+ # Get appropriate model
263
+ model, tokenizer = model_manager.get_model(detected_lang)
264
+
265
+ # Preprocessing options - don't clean Chinese text
266
+ options = preprocessing_options or {}
267
+ processed_text = text
268
+ if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
269
+ processed_text = TextProcessor.clean_text(
270
+ text,
271
+ options.get('remove_punctuation', True),
272
+ options.get('remove_numbers', False)
273
+ )
274
+
275
  try:
276
+ # Tokenize and analyze
277
+ inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
278
+ truncation=True, max_length=config.MAX_TEXT_LENGTH).to(model_manager.device)
 
 
 
279
 
 
280
  with torch.no_grad():
281
+ outputs = model(**inputs)
282
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
283
 
284
+ # Handle different model outputs
285
+ if len(probs) == 3: # negative, neutral, positive
286
+ sentiment_idx = np.argmax(probs)
287
+ sentiment_labels = ['Negative', 'Neutral', 'Positive']
288
+ sentiment = sentiment_labels[sentiment_idx]
289
+ confidence = float(probs[sentiment_idx])
290
 
291
+ result = {
292
+ 'sentiment': sentiment,
293
+ 'confidence': confidence,
294
+ 'neg_prob': float(probs[0]),
295
+ 'neu_prob': float(probs[1]),
296
+ 'pos_prob': float(probs[2]),
297
+ 'has_neutral': True
298
+ }
299
+ else: # negative, positive
300
+ pred = np.argmax(probs)
301
+ sentiment = "Positive" if pred == 1 else "Negative"
302
+ confidence = float(probs[pred])
303
 
304
+ result = {
305
+ 'sentiment': sentiment,
306
+ 'confidence': confidence,
307
+ 'neg_prob': float(probs[0]),
308
+ 'pos_prob': float(probs[1]),
309
+ 'neu_prob': 0.0,
310
+ 'has_neutral': False
311
+ }
312
+
313
+ # Add metadata
314
+ result.update({
315
+ 'language': detected_lang,
316
+ 'keywords': TextProcessor.extract_keywords(text),
317
+ 'word_count': len(text.split()),
318
+ 'char_count': len(text)
319
+ })
320
+
321
+ return result
322
 
323
+ except Exception as e:
324
+ logger.error(f"Analysis failed: {e}")
325
+ raise
326
+
327
+ @staticmethod
328
+ def analyze_batch(texts: List[str], language: str = 'auto',
329
+ preprocessing_options: Dict = None) -> List[Dict]:
330
+ """Analyze multiple texts"""
331
+ results = []
332
+ for i, text in enumerate(texts):
333
+ try:
334
+ result = SentimentAnalyzer.analyze_text(text, language, preprocessing_options)
335
+ result['batch_index'] = i
336
+ results.append(result)
337
+ except Exception as e:
338
+ # Add error result
339
+ results.append({
340
+ 'sentiment': 'Error',
341
+ 'confidence': 0.0,
342
+ 'error': str(e),
343
+ 'batch_index': i,
344
+ 'text': text
345
+ })
346
+ return results
347
+
348
+ class ExplainabilityAnalyzer:
349
+ """SHAP and LIME explainability analysis with fallbacks"""
350
+
351
+ @staticmethod
352
+ def create_prediction_function(model, tokenizer, device):
353
+ """Create prediction function for LIME"""
354
+ def predict_proba(texts):
355
+ if isinstance(texts, str):
356
+ texts = [texts]
357
 
358
+ results = []
359
+ for text in texts:
360
+ try:
361
+ inputs = tokenizer(text, return_tensors="pt", padding=True,
362
+ truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
363
+ with torch.no_grad():
364
+ outputs = model(**inputs)
365
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
366
+ results.append(probs)
367
+ except Exception as e:
368
+ # Return neutral probabilities on error
369
+ if len(results) > 0:
370
+ results.append(results[0]) # Use previous result
371
+ else:
372
+ results.append(np.array([0.33, 0.33, 0.34])) # Neutral fallback
373
 
374
+ return np.array(results)
375
+ return predict_proba
376
+
377
+ @staticmethod
378
+ def analyze_with_lime(text: str, model, tokenizer, device, num_features: int = 10) -> Dict:
379
+ """Analyze text with LIME"""
380
+ if not LIME_AVAILABLE:
381
+ return {'method': 'LIME', 'error': 'LIME library not available'}
382
+
383
+ try:
384
+ # Create prediction function
385
+ predict_fn = ExplainabilityAnalyzer.create_prediction_function(model, tokenizer, device)
386
 
387
+ # Test prediction function first
388
+ test_probs = predict_fn([text])
389
+ if len(test_probs) == 0:
390
+ return {'method': 'LIME', 'error': 'Prediction function failed'}
391
 
392
+ # Determine class names based on model output
393
+ num_classes = len(test_probs[0])
394
+ if num_classes == 3:
395
+ class_names = ['Negative', 'Neutral', 'Positive']
396
+ else:
397
+ class_names = ['Negative', 'Positive']
398
+
399
+ # Initialize LIME explainer
400
+ explainer = LimeTextExplainer(
401
+ class_names=class_names,
402
+ feature_selection='auto',
403
+ split_expression=r'\W+',
404
+ bow=False
405
+ )
406
+
407
+ # Generate explanation
408
+ explanation = explainer.explain_instance(
409
+ text,
410
+ predict_fn,
411
+ num_features=min(num_features, len(text.split())),
412
+ num_samples=50 # Reduced for faster processing
413
+ )
414
+
415
+ # Extract feature importance
416
+ feature_importance = explanation.as_list()
417
+
418
+ return {
419
+ 'method': 'LIME',
420
+ 'feature_importance': feature_importance,
421
+ 'class_names': class_names
422
  }
423
 
424
+ except Exception as e:
425
+ logger.error(f"LIME analysis failed: {e}")
426
+ return {'method': 'LIME', 'error': str(e)}
427
+
428
+ @staticmethod
429
+ def analyze_with_attention(text: str, model, tokenizer, device) -> Dict:
430
+ """Analyze text with attention weights - simplified version"""
431
+ try:
432
+ # Tokenize input
433
+ inputs = tokenizer(text, return_tensors="pt", padding=True,
434
+ truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
435
+
436
+ # Get tokens for display
437
+ tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
438
+
439
+ # Simple attention simulation based on input importance
440
+ # This is a fallback when model doesn't support attention output
441
+ try:
442
+ with torch.no_grad():
443
+ outputs = model(**inputs, output_attentions=True)
444
+ if hasattr(outputs, 'attentions') and outputs.attentions is not None:
445
+ attentions = outputs.attentions
446
+ # Average attention across layers and heads
447
+ avg_attention = torch.mean(torch.stack(attentions), dim=(0, 1, 2)).cpu().numpy()
448
+ else:
449
+ raise AttributeError("No attention outputs")
450
+ except:
451
+ # Fallback: simulate attention based on token position and type
452
+ avg_attention = np.random.uniform(0.1, 1.0, len(tokens))
453
+ # Give higher attention to non-special tokens
454
+ for i, token in enumerate(tokens):
455
+ if token in ['[CLS]', '[SEP]', '<s>', '</s>', '<pad>']:
456
+ avg_attention[i] *= 0.3
457
+
458
+ # Create attention weights for each token
459
+ attention_weights = []
460
+ for i, token in enumerate(tokens):
461
+ if i < len(avg_attention):
462
+ # Clean token for display
463
+ clean_token = token.replace('Ġ', '').replace('##', '')
464
+ if clean_token.strip():
465
+ attention_weights.append((clean_token, float(avg_attention[i])))
466
+
467
+ return {
468
+ 'method': 'Attention',
469
+ 'tokens': [t[0] for t in attention_weights],
470
+ 'attention_weights': attention_weights
471
+ }
472
 
473
  except Exception as e:
474
+ logger.error(f"Attention analysis failed: {e}")
475
+ return {'method': 'Attention', 'error': str(e)}
476
+
477
+ class AdvancedVisualizer:
478
+ """Visualizations for explainability analysis"""
479
 
480
+ @staticmethod
481
+ def create_lime_plot(lime_result: Dict, theme: str = 'default') -> go.Figure:
482
+ """Create LIME feature importance plot"""
483
+ if 'error' in lime_result:
484
+ fig = go.Figure()
485
+ fig.add_annotation(text=f"LIME Error: {lime_result['error']}",
486
+ x=0.5, y=0.5, showarrow=False)
487
+ return fig
488
 
489
+ features, scores = zip(*lime_result['feature_importance'])
490
+ colors = ['red' if score < 0 else 'green' for score in scores]
 
 
491
 
492
+ fig = go.Figure(data=[
493
+ go.Bar(
494
+ y=features,
495
+ x=scores,
496
+ orientation='h',
497
+ marker_color=colors,
498
+ text=[f'{score:.3f}' for score in scores],
499
+ textposition='auto'
500
+ )
501
+ ])
502
 
503
+ fig.update_layout(
504
+ title="LIME Feature Importance",
505
+ xaxis_title="Importance Score",
506
+ yaxis_title="Features",
507
+ height=400,
508
+ showlegend=False
509
+ )
510
 
511
+ return fig
512
+
513
+ @staticmethod
514
+ def create_attention_plot(attention_result: Dict, theme: str = 'default') -> go.Figure:
515
+ """Create attention weights visualization"""
516
+ if 'error' in attention_result:
517
+ fig = go.Figure()
518
+ fig.add_annotation(
519
+ text=f"Attention Error: {attention_result['error']}",
520
+ x=0.5, y=0.5,
521
+ xref="paper", yref="paper",
522
+ showarrow=False,
523
+ font=dict(size=14)
524
+ )
525
+ fig.update_layout(height=400, title="Attention Analysis Error")
526
+ return fig
527
 
528
+ if not attention_result.get('attention_weights'):
529
+ fig = go.Figure()
530
+ fig.add_annotation(
531
+ text="No attention weights available",
532
+ x=0.5, y=0.5,
533
+ xref="paper", yref="paper",
534
+ showarrow=False
535
+ )
536
+ fig.update_layout(height=400, title="No Attention Data")
537
+ return fig
538
 
539
+ tokens, weights = zip(*attention_result['attention_weights'])
540
+
541
+ # Normalize weights for better visualization
542
+ weights = np.array(weights)
543
+ if weights.max() > weights.min():
544
+ normalized_weights = (weights - weights.min()) / (weights.max() - weights.min())
 
 
 
 
 
 
 
 
 
 
 
 
545
  else:
546
+ normalized_weights = weights
 
547
 
548
+ # Limit display to top 15 tokens for readability
549
+ if len(tokens) > 15:
550
+ # Get top 15 by attention weight
551
+ top_indices = np.argsort(weights)[-15:]
552
+ tokens = [tokens[i] for i in top_indices]
553
+ normalized_weights = normalized_weights[top_indices]
554
 
555
+ fig = go.Figure(data=[
556
+ go.Bar(
557
+ x=list(range(len(tokens))),
558
+ y=normalized_weights,
559
+ text=tokens,
560
+ textposition='outside',
561
+ marker_color=normalized_weights,
562
+ colorscale='Viridis',
563
+ hovertemplate='<b>%{text}</b><br>Weight: %{y:.3f}<extra></extra>'
564
+ )
565
+ ])
 
 
 
 
566
 
567
+ fig.update_layout(
568
+ title="Attention Weights (Top Tokens)",
569
+ xaxis_title="Token Position",
570
+ yaxis_title="Attention Weight (Normalized)",
571
+ height=400,
572
+ showlegend=False,
573
+ xaxis=dict(tickmode='array', tickvals=list(range(len(tokens))), ticktext=tokens)
574
+ )
575
 
576
+ return fig
577
+ """Enhanced visualizations with Plotly"""
578
+
579
+ @staticmethod
580
+ def create_sentiment_gauge(result: Dict, theme: str = 'default') -> go.Figure:
581
+ """Create an animated sentiment gauge"""
582
+ colors = config.THEMES[theme]
583
+
584
+ if result['has_neutral']:
585
+ # Three-way gauge
586
+ fig = go.Figure(go.Indicator(
587
+ mode = "gauge+number+delta",
588
+ value = result['pos_prob'] * 100,
589
+ domain = {'x': [0, 1], 'y': [0, 1]},
590
+ title = {'text': f"Sentiment: {result['sentiment']}"},
591
+ delta = {'reference': 50},
592
+ gauge = {
593
+ 'axis': {'range': [None, 100]},
594
+ 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
595
+ 'steps': [
596
+ {'range': [0, 33], 'color': colors['neg']},
597
+ {'range': [33, 67], 'color': colors['neu']},
598
+ {'range': [67, 100], 'color': colors['pos']}
599
+ ],
600
+ 'threshold': {
601
+ 'line': {'color': "red", 'width': 4},
602
+ 'thickness': 0.75,
603
+ 'value': 90
604
+ }
605
+ }
606
+ ))
607
+ else:
608
+ # Two-way gauge
609
+ fig = go.Figure(go.Indicator(
610
+ mode = "gauge+number",
611
+ value = result['confidence'] * 100,
612
+ domain = {'x': [0, 1], 'y': [0, 1]},
613
+ title = {'text': f"Confidence: {result['sentiment']}"},
614
+ gauge = {
615
+ 'axis': {'range': [None, 100]},
616
+ 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
617
+ 'steps': [
618
+ {'range': [0, 50], 'color': "lightgray"},
619
+ {'range': [50, 100], 'color': "gray"}
620
+ ]
621
+ }
622
+ ))
623
 
624
+ fig.update_layout(height=400, font={'size': 16})
625
+ return fig
 
 
 
626
 
627
  @staticmethod
628
+ def create_probability_bars(result: Dict, theme: str = 'default') -> go.Figure:
629
+ """Create probability bar chart"""
630
+ colors = config.THEMES[theme]
631
+
632
+ if result['has_neutral']:
633
+ labels = ['Negative', 'Neutral', 'Positive']
634
+ values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
635
+ bar_colors = [colors['neg'], colors['neu'], colors['pos']]
636
+ else:
637
+ labels = ['Negative', 'Positive']
638
+ values = [result['neg_prob'], result['pos_prob']]
639
+ bar_colors = [colors['neg'], colors['pos']]
 
 
 
 
 
 
 
 
 
640
 
641
  fig = go.Figure(data=[
642
+ go.Bar(x=labels, y=values, marker_color=bar_colors, text=[f'{v:.3f}' for v in values])
 
 
 
 
 
 
643
  ])
644
 
645
+ fig.update_traces(texttemplate='%{text}', textposition='outside')
646
  fig.update_layout(
647
  title="Sentiment Probabilities",
 
648
  yaxis_title="Probability",
649
+ height=400,
 
 
650
  showlegend=False
651
  )
652
 
653
  return fig
654
+
655
  @staticmethod
656
+ def create_batch_summary(results: List[Dict], theme: str = 'default') -> go.Figure:
657
+ """Create batch analysis summary"""
658
+ colors = config.THEMES[theme]
659
+
660
+ # Count sentiments
661
+ sentiments = [r['sentiment'] for r in results if 'sentiment' in r]
662
+ sentiment_counts = Counter(sentiments)
663
+
664
+ # Create pie chart
665
+ fig = go.Figure(data=[go.Pie(
666
+ labels=list(sentiment_counts.keys()),
667
+ values=list(sentiment_counts.values()),
668
+ marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
669
+ textinfo='label+percent',
670
+ hole=0.3
671
+ )])
 
 
 
 
 
 
 
 
 
672
 
673
  fig.update_layout(
674
+ title=f"Batch Analysis Summary ({len(results)} texts)",
675
+ height=400
676
  )
677
 
678
  return fig
679
+
680
  @staticmethod
681
+ def create_confidence_distribution(results: List[Dict]) -> go.Figure:
682
+ """Create confidence distribution plot"""
683
+ confidences = [r['confidence'] for r in results if 'confidence' in r and r['sentiment'] != 'Error']
684
+
685
+ if not confidences:
686
+ return go.Figure()
687
+
688
+ fig = go.Figure(data=[go.Histogram(
689
+ x=confidences,
690
+ nbinsx=20,
691
+ marker_color='skyblue',
692
+ opacity=0.7
693
+ )])
 
 
 
 
 
 
 
694
 
695
  fig.update_layout(
696
+ title="Confidence Distribution",
697
+ xaxis_title="Confidence Score",
698
+ yaxis_title="Frequency",
699
+ height=400
 
 
700
  )
701
 
702
  return fig
703
 
704
  @staticmethod
705
+ def create_history_dashboard(history: List[Dict]) -> go.Figure:
706
+ """Create comprehensive history dashboard"""
707
+ if len(history) < 2:
708
+ return go.Figure()
 
709
 
710
+ # Create subplots
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
  fig = make_subplots(
712
  rows=2, cols=2,
713
+ subplot_titles=['Sentiment Timeline', 'Confidence Distribution',
714
+ 'Language Distribution', 'Sentiment Summary'],
715
+ specs=[[{"secondary_y": False}, {"secondary_y": False}],
716
+ [{"type": "pie"}, {"type": "bar"}]]
717
  )
718
 
719
+ # Extract data
720
+ indices = list(range(len(history)))
721
+ pos_probs = [item['pos_prob'] for item in history]
722
+ confidences = [item['confidence'] for item in history]
723
+ sentiments = [item['sentiment'] for item in history]
724
+ languages = [item.get('language', 'en') for item in history]
725
 
726
+ # Sentiment timeline
727
+ colors = ['#4CAF50' if s == 'Positive' else '#F44336' for s in sentiments]
728
  fig.add_trace(
729
+ go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
730
+ marker=dict(color=colors, size=8),
731
+ name='Positive Probability'),
732
  row=1, col=1
733
  )
734
 
735
+ # Confidence distribution
 
736
  fig.add_trace(
737
+ go.Histogram(x=confidences, nbinsx=10, name='Confidence'),
738
  row=1, col=2
739
  )
740
 
741
+ # Language distribution
742
+ lang_counts = Counter(languages)
 
 
 
 
 
743
  fig.add_trace(
744
+ go.Pie(labels=list(lang_counts.keys()), values=list(lang_counts.values()),
745
+ name="Languages"),
 
746
  row=2, col=1
747
  )
748
 
749
+ # Sentiment summary
750
+ sent_counts = Counter(sentiments)
751
+ fig.add_trace(
752
+ go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
753
+ marker_color=['#4CAF50' if k == 'Positive' else '#F44336' for k in sent_counts.keys()]),
754
+ row=2, col=2
 
 
755
  )
756
 
757
+ fig.update_layout(height=800, showlegend=False)
758
  return fig
759
 
760
+ # Main application functions
761
+ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
762
+ remove_punct: bool, remove_nums: bool):
763
+ """Enhanced single text analysis"""
764
+ try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
  if not text.strip():
766
+ return "Please enter text", None, None
767
+
768
+ # Map display names back to language codes
769
+ language_map = {
770
+ 'Auto Detect': 'auto',
771
+ 'English': 'en',
772
+ 'Chinese': 'zh',
773
+ 'Spanish': 'es',
774
+ 'French': 'fr',
775
+ 'German': 'de',
776
+ 'Swedish': 'sv'
777
+ }
778
+ language_code = language_map.get(language, 'auto')
779
+
780
+ preprocessing_options = {
781
+ 'clean_text': clean_text,
782
+ 'remove_punctuation': remove_punct,
783
+ 'remove_numbers': remove_nums
784
+ }
785
 
786
+ result = SentimentAnalyzer.analyze_text(text, language_code, preprocessing_options)
787
 
788
  # Add to history
789
+ history_entry = {
790
+ 'text': text[:100] + '...' if len(text) > 100 else text,
791
  'full_text': text,
792
+ 'sentiment': result['sentiment'],
793
+ 'confidence': result['confidence'],
794
+ 'pos_prob': result['pos_prob'],
795
+ 'neg_prob': result['neg_prob'],
796
+ 'neu_prob': result.get('neu_prob', 0),
797
+ 'language': result['language'],
798
+ 'timestamp': datetime.now().isoformat(),
799
+ 'analysis_type': 'single'
800
+ }
801
+ history_manager.add_entry(history_entry)
802
 
803
  # Create visualizations
804
+ gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme)
805
+ bars_fig = PlotlyVisualizer.create_probability_bars(result, theme)
806
+
807
+ # Create info text
808
+ info_text = f"""
809
+ **Analysis Results:**
810
+ - **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
811
+ - **Language:** {result['language'].upper()}
812
+ - **Keywords:** {', '.join(result['keywords'])}
813
+ - **Stats:** {result['word_count']} words, {result['char_count']} characters
814
+ """
815
+
816
+ return info_text, gauge_fig, bars_fig
817
+
818
+ except Exception as e:
819
+ logger.error(f"Analysis failed: {e}")
820
+ return f"Error: {str(e)}", None, None
821
+
822
+ def analyze_batch_texts(batch_text: str, language: str, theme: str,
823
+ clean_text: bool, remove_punct: bool, remove_nums: bool):
824
+ """Batch text analysis"""
825
+ try:
826
+ if not batch_text.strip():
827
+ return "Please enter texts (one per line)", None, None, None
828
 
829
+ # Parse batch input
830
+ texts = TextProcessor.parse_batch_input(batch_text)
 
 
 
831
 
832
+ if len(texts) > config.BATCH_SIZE_LIMIT:
833
+ return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
834
+
835
+ if not texts:
836
+ return "No valid texts found", None, None, None
837
+
838
+ # Map display names back to language codes
839
+ language_map = {
840
+ 'Auto Detect': 'auto',
841
+ 'English': 'en',
842
+ 'Chinese': 'zh',
843
+ 'Spanish': 'es',
844
+ 'French': 'fr',
845
+ 'German': 'de',
846
+ 'Swedish': 'sv'
847
+ }
848
+ language_code = language_map.get(language, 'auto')
849
 
850
+ preprocessing_options = {
851
+ 'clean_text': clean_text,
852
+ 'remove_punctuation': remove_punct,
853
+ 'remove_numbers': remove_nums
854
+ }
855
 
856
+ # Analyze all texts
857
+ results = SentimentAnalyzer.analyze_batch(texts, language_code, preprocessing_options)
858
 
859
  # Add to history
860
+ batch_entries = []
861
+ for i, (text, result) in enumerate(zip(texts, results)):
862
+ if 'error' not in result:
863
+ entry = {
864
+ 'text': text[:100] + '...' if len(text) > 100 else text,
865
+ 'full_text': text,
866
+ 'sentiment': result['sentiment'],
867
+ 'confidence': result['confidence'],
868
+ 'pos_prob': result['pos_prob'],
869
+ 'neg_prob': result['neg_prob'],
870
+ 'neu_prob': result.get('neu_prob', 0),
871
+ 'language': result['language'],
872
+ 'timestamp': datetime.now().isoformat(),
873
+ 'analysis_type': 'batch',
874
+ 'batch_index': i
875
+ }
876
+ batch_entries.append(entry)
877
 
878
+ history_manager.add_batch_entries(batch_entries)
 
 
 
 
 
 
 
 
 
879
 
880
+ # Create visualizations
881
+ summary_fig = PlotlyVisualizer.create_batch_summary(results, theme)
882
+ confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
883
+
884
+ # Create results table
885
+ df_data = []
886
+ for i, (text, result) in enumerate(zip(texts, results)):
887
+ if 'error' in result:
888
+ df_data.append({
889
+ 'Index': i+1,
890
+ 'Text': text[:50] + '...' if len(text) > 50 else text,
891
+ 'Sentiment': 'Error',
892
+ 'Confidence': 0.0,
893
+ 'Language': 'Unknown',
894
+ 'Error': result['error']
895
+ })
896
+ else:
897
+ df_data.append({
898
+ 'Index': i+1,
899
+ 'Text': text[:50] + '...' if len(text) > 50 else text,
900
+ 'Sentiment': result['sentiment'],
901
+ 'Confidence': f"{result['confidence']:.3f}",
902
+ 'Language': result['language'].upper(),
903
+ 'Keywords': ', '.join(result['keywords'][:3])
904
+ })
905
+
906
+ df = pd.DataFrame(df_data)
907
+
908
+ # Summary info
909
+ successful_results = [r for r in results if 'error' not in r]
910
+ error_count = len(results) - len(successful_results)
911
+
912
+ if successful_results:
913
+ sentiment_counts = Counter([r['sentiment'] for r in successful_results])
914
+ avg_confidence = np.mean([r['confidence'] for r in successful_results])
915
+
916
+ summary_text = f"""
917
+ **Batch Analysis Summary:**
918
+ - **Total Texts:** {len(texts)}
919
+ - **Successful:** {len(successful_results)}
920
+ - **Errors:** {error_count}
921
+ - **Average Confidence:** {avg_confidence:.3f}
922
+ - **Sentiments:** {dict(sentiment_counts)}
923
+ """
924
+ else:
925
+ summary_text = f"All {len(texts)} texts failed to analyze."
926
 
927
+ return summary_text, df, summary_fig, confidence_fig
 
 
 
 
 
928
 
929
+ except Exception as e:
930
+ logger.error(f"Batch analysis failed: {e}")
931
+ return f"Error: {str(e)}", None, None, None
932
+
933
+ def analyze_advanced_text(text: str, language: str, theme: str, use_lime: bool,
934
+ use_attention: bool, lime_features: int):
935
+ """Advanced analysis with SHAP and LIME explainability"""
936
+ try:
937
+ if not text.strip():
938
+ return "Please enter text", None, None, None, None
939
 
940
+ # Map display names back to language codes
941
+ language_map = {
942
+ 'Auto Detect': 'auto',
943
+ 'English': 'en',
944
+ 'Chinese': 'zh',
945
+ 'Spanish': 'es',
946
+ 'French': 'fr',
947
+ 'German': 'de',
948
+ 'Swedish': 'sv'
949
+ }
950
+ language_code = language_map.get(language, 'auto')
951
 
952
+ # Basic sentiment analysis first
953
+ result = SentimentAnalyzer.analyze_text(text, language_code)
 
 
 
 
 
 
 
 
 
954
 
955
+ # Create basic visualizations first
956
+ gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme)
957
+ bars_fig = PlotlyVisualizer.create_probability_bars(result, theme)
958
 
959
+ # Initialize explainability results
960
+ lime_result = None
961
+ attention_result = None
962
+ lime_plot = None
963
+ attention_plot = None
 
 
 
 
 
 
 
964
 
965
+ # Get model for explainability analysis
966
+ try:
967
+ model, tokenizer = model_manager.get_model(language_code)
968
+
969
+ # LIME Analysis
970
+ if use_lime:
971
+ lime_result = ExplainabilityAnalyzer.analyze_with_lime(
972
+ text, model, tokenizer, model_manager.device, lime_features
973
+ )
974
+ lime_plot = AdvancedVisualizer.create_lime_plot(lime_result, theme)
975
+ else:
976
+ # Create empty plot
977
+ lime_plot = go.Figure()
978
+ lime_plot.add_annotation(text="LIME analysis disabled", x=0.5, y=0.5,
979
+ xref="paper", yref="paper", showarrow=False)
980
+ lime_plot.update_layout(height=400, title="LIME Analysis (Disabled)")
981
+
982
+ # Attention Analysis
983
+ if use_attention:
984
+ attention_result = ExplainabilityAnalyzer.analyze_with_attention(
985
+ text, model, tokenizer, model_manager.device
986
+ )
987
+ attention_plot = AdvancedVisualizer.create_attention_plot(attention_result, theme)
988
+ else:
989
+ # Create empty plot
990
+ attention_plot = go.Figure()
991
+ attention_plot.add_annotation(text="Attention analysis disabled", x=0.5, y=0.5,
992
+ xref="paper", yref="paper", showarrow=False)
993
+ attention_plot.update_layout(height=400, title="Attention Analysis (Disabled)")
994
+
995
+ except Exception as e:
996
+ logger.error(f"Explainability analysis failed: {e}")
997
+ # Create error plots
998
+ lime_plot = go.Figure()
999
+ lime_plot.add_annotation(text=f"Analysis Error: {str(e)}", x=0.5, y=0.5,
1000
+ xref="paper", yref="paper", showarrow=False)
1001
+ lime_plot.update_layout(height=400, title="Analysis Error")
1002
+
1003
+ attention_plot = go.Figure()
1004
+ attention_plot.add_annotation(text=f"Analysis Error: {str(e)}", x=0.5, y=0.5,
1005
+ xref="paper", yref="paper", showarrow=False)
1006
+ attention_plot.update_layout(height=400, title="Analysis Error")
1007
+
1008
+ # Add to history
1009
+ history_entry = {
1010
+ 'text': text[:100] + '...' if len(text) > 100 else text,
1011
+ 'full_text': text,
1012
+ 'sentiment': result['sentiment'],
1013
+ 'confidence': result['confidence'],
1014
+ 'pos_prob': result['pos_prob'],
1015
+ 'neg_prob': result['neg_prob'],
1016
+ 'neu_prob': result.get('neu_prob', 0),
1017
+ 'language': result['language'],
1018
+ 'timestamp': datetime.now().isoformat(),
1019
+ 'analysis_type': 'advanced',
1020
+ 'explainability_used': use_lime or use_attention
1021
+ }
1022
+ history_manager.add_entry(history_entry)
1023
+
1024
+ # Create detailed info text
1025
+ info_text = f"""
1026
+ **Advanced Analysis Results:**
1027
+ - **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
1028
+ - **Language:** {result['language'].upper()}
1029
+ - **Text Statistics:**
1030
+ - Words: {result['word_count']}
1031
+ - Characters: {result['char_count']}
1032
+ - Average word length: {result['char_count']/max(result['word_count'], 1):.1f}
1033
+ - **Keywords:** {', '.join(result['keywords'])}
1034
+
1035
+ **Explainability Analysis:**
1036
+ """
1037
+
1038
+ if use_lime:
1039
+ if lime_result and 'error' not in lime_result:
1040
+ info_text += f"\n- **LIME:** ✅ Analyzed top {lime_features} features"
1041
+ else:
1042
+ error_msg = lime_result.get('error', 'Unknown error') if lime_result else 'Not available'
1043
+ info_text += f"\n- **LIME:** ❌ {error_msg}"
1044
+ else:
1045
+ info_text += f"\n- **LIME:** ⏸️ Disabled"
1046
+
1047
+ if use_attention:
1048
+ if attention_result and 'error' not in attention_result:
1049
+ info_text += f"\n- **Attention:** ✅ Token-level attention weights computed"
1050
+ else:
1051
+ error_msg = attention_result.get('error', 'Unknown error') if attention_result else 'Not available'
1052
+ info_text += f"\n- **Attention:** ❌ {error_msg}"
1053
+ else:
1054
+ info_text += f"\n- **Attention:** ⏸️ Disabled"
1055
 
1056
+ return info_text, gauge_fig, bars_fig, lime_plot, attention_plot
 
 
1057
 
1058
+ except Exception as e:
1059
+ logger.error(f"Advanced analysis failed: {e}")
1060
+ # Return basic empty plots on complete failure
1061
+ empty_fig = go.Figure()
1062
+ empty_fig.add_annotation(text=f"Analysis failed: {str(e)}", x=0.5, y=0.5,
1063
+ xref="paper", yref="paper", showarrow=False)
1064
+ empty_fig.update_layout(height=400)
1065
+
1066
+ return f"Error: {str(e)}", empty_fig, empty_fig, empty_fig, empty_fig
1067
 
1068
+ def get_history_stats():
1069
+ """Get enhanced history statistics"""
1070
+ stats = history_manager.get_stats()
1071
+ if not stats:
1072
+ return "No analysis history available"
1073
 
1074
+ return f"""
1075
+ **Comprehensive History Statistics:**
1076
+
1077
+ **Analysis Counts:**
1078
+ - Total Analyses: {stats['total_analyses']}
1079
+ - Positive: {stats['positive_count']}
1080
+ - Negative: {stats['negative_count']}
1081
+ - Neutral: {stats['neutral_count']}
1082
+
1083
+ **Confidence Metrics:**
1084
+ - Average Confidence: {stats['avg_confidence']:.3f}
1085
+ - Highest Confidence: {stats['max_confidence']:.3f}
1086
+ - Lowest Confidence: {stats['min_confidence']:.3f}
1087
+
1088
+ **Language Statistics:**
1089
+ - Languages Detected: {stats['languages_detected']}
1090
+ - Most Common Language: {stats['most_common_language'].upper()}
1091
+
1092
+ **Text Statistics:**
1093
+ - Average Text Length: {stats['avg_text_length']:.1f} characters
1094
+ """
1095
+
1096
+ def filter_history_display(sentiment_filter: str, language_filter: str, min_confidence: float):
1097
+ """Display filtered history"""
1098
+ # Convert filters
1099
+ sentiment = sentiment_filter if sentiment_filter != "All" else None
1100
+ language = language_filter.lower() if language_filter != "All" else None
1101
+
1102
+ filtered_history = history_manager.filter_history(
1103
+ sentiment=sentiment,
1104
+ language=language,
1105
+ min_confidence=min_confidence if min_confidence > 0 else None
1106
+ )
1107
+
1108
+ if not filtered_history:
1109
+ return "No entries match the filter criteria", None
1110
+
1111
+ # Create DataFrame for display
1112
+ df_data = []
1113
+ for entry in filtered_history[-20:]: # Show last 20 entries
1114
+ df_data.append({
1115
+ 'Timestamp': entry['timestamp'][:16], # YYYY-MM-DD HH:MM
1116
+ 'Text': entry['text'],
1117
+ 'Sentiment': entry['sentiment'],
1118
+ 'Confidence': f"{entry['confidence']:.3f}",
1119
+ 'Language': entry['language'].upper(),
1120
+ 'Type': entry.get('analysis_type', 'single')
1121
+ })
1122
+
1123
+ df = pd.DataFrame(df_data)
1124
+
1125
+ summary = f"""
1126
+ **Filtered Results:**
1127
+ - Found {len(filtered_history)} entries matching criteria
1128
+ - Showing most recent {min(20, len(filtered_history))} entries
1129
+ """
1130
+
1131
+ return summary, df
1132
+
1133
+ def plot_history_dashboard():
1134
+ """Create history dashboard"""
1135
+ history = history_manager.get_history()
1136
+ if len(history) < 2:
1137
+ return None, "Need at least 2 analyses for dashboard"
1138
+
1139
+ fig = PlotlyVisualizer.create_history_dashboard(history)
1140
+ return fig, f"Dashboard showing {len(history)} analyses"
1141
+
1142
+ def export_history_csv():
1143
+ """Export history to CSV"""
1144
+ history = history_manager.get_history()
1145
+ if not history:
1146
+ return None, "No history to export"
1147
+
1148
+ try:
1149
+ df = pd.DataFrame(history)
1150
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w')
1151
+ df.to_csv(temp_file.name, index=False)
1152
+ return temp_file.name, f"Exported {len(history)} entries to CSV"
1153
+ except Exception as e:
1154
+ return None, f"Export failed: {str(e)}"
1155
+
1156
+ def export_history_excel():
1157
+ """Export history to Excel"""
1158
+ history = history_manager.get_history()
1159
+ if not history:
1160
+ return None, "No history to export"
1161
+
1162
+ try:
1163
+ df = pd.DataFrame(history)
1164
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
1165
+ df.to_excel(temp_file.name, index=False)
1166
+ return temp_file.name, f"Exported {len(history)} entries to Excel"
1167
+ except Exception as e:
1168
+ return None, f"Export failed: {str(e)}"
1169
+
1170
+ def clear_all_history():
1171
+ """Clear analysis history"""
1172
+ count = history_manager.clear()
1173
+ return f"Cleared {count} entries from history"
1174
+
1175
+ def get_recent_analyses():
1176
+ """Get recent analysis summary"""
1177
+ recent = history_manager.get_recent_history(10)
1178
+ if not recent:
1179
+ return "No recent analyses available"
1180
+
1181
+ summary_text = "**Recent Analyses (Last 10):**\n\n"
1182
+ for i, entry in enumerate(recent, 1):
1183
+ summary_text += f"{i}. **{entry['sentiment']}** ({entry['confidence']:.3f}) - {entry['text']}\n"
1184
+
1185
+ return summary_text
1186
+
1187
+ # Sample data
1188
+ SAMPLE_TEXTS = [
1189
+ # Auto Detect
1190
+ ["The film had its moments, but overall it felt a bit too long and lacked emotional depth."],
1191
+
1192
+ # English
1193
+ ["I was completely blown away by the movie — the performances were raw and powerful, and the story stayed with me long after the credits rolled."],
1194
+
1195
+ # Chinese
1196
+ ["这部电影节奏拖沓,剧情老套,完全没有让我产生任何共鸣,是一次失望的观影体验。"],
1197
+
1198
+ # Spanish
1199
+ ["Una obra maestra del cine contemporáneo, con actuaciones sobresalientes, un guion bien escrito y una dirección impecable."],
1200
+
1201
+ # French
1202
+ ["Je m'attendais à beaucoup mieux. Le scénario était confus, les dialogues ennuyeux, et je me suis presque endormi au milieu du film."],
1203
+
1204
+ # German
1205
+ ["Der Film war ein emotionales Erlebnis mit großartigen Bildern, einem mitreißenden Soundtrack und einer Geschichte, die zum Nachdenken anregt."],
1206
+
1207
+ # Swedish
1208
+ ["Filmen var en besvikelse – tråkig handling, överdrivet skådespeleri och ett slut som inte gav något avslut alls."]
1209
+ ]
1210
+
1211
+ BATCH_SAMPLE = """I love this product! It works perfectly.
1212
+ The service was terrible and slow.
1213
+ Not sure if I like it or not.
1214
+ Amazing quality and fast delivery!
1215
+ Could be better, but it's okay."""
1216
+
1217
+ # Gradio Interface
1218
+ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Multilingual Sentiment Analyzer") as demo:
1219
+ gr.Markdown("# 🎭 Advanced Multilingual Sentiment Analyzer")
1220
+ gr.Markdown("Comprehensive sentiment analysis with batch processing, advanced analytics, and multilingual support")
1221
+
1222
+ with gr.Tab("📝 Single Analysis"):
1223
+ with gr.Row():
1224
+ with gr.Column(scale=2):
1225
+ text_input = gr.Textbox(
1226
+ label="Text to Analyze",
1227
+ placeholder="Enter your text here... (supports multiple languages)",
1228
+ lines=4
1229
+ )
1230
+
1231
+ with gr.Row():
1232
+ language_select = gr.Dropdown(
1233
+ choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
1234
+ value='Auto Detect',
1235
+ label="Language"
1236
  )
1237
+ theme_select = gr.Dropdown(
1238
+ choices=list(config.THEMES.keys()),
1239
+ value='default',
1240
+ label="Theme"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1241
  )
1242
 
1243
+ with gr.Row():
1244
+ clean_text = gr.Checkbox(label="Clean Text", value=False)
1245
+ remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
1246
+ remove_nums = gr.Checkbox(label="Remove Numbers", value=False)
1247
+
1248
+ analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")
1249
+
1250
+ gr.Examples(
1251
+ examples=SAMPLE_TEXTS,
1252
+ inputs=text_input,
1253
+ label="Sample Texts (Multiple Languages)"
1254
+ )
1255
 
1256
+ with gr.Column(scale=1):
1257
+ result_info = gr.Markdown("Enter text and click Analyze")
1258
+
1259
+ with gr.Row():
1260
+ gauge_plot = gr.Plot(label="Sentiment Gauge")
1261
+ bars_plot = gr.Plot(label="Probability Distribution")
1262
+
1263
+ with gr.Tab("🔬 Advanced Analysis"):
1264
+ with gr.Row():
1265
+ with gr.Column(scale=2):
1266
+ advanced_input = gr.Textbox(
1267
+ label="Text for Advanced Analysis",
1268
+ placeholder="Enter text for explainability analysis...",
1269
+ lines=4
1270
+ )
1271
+
1272
+ with gr.Row():
1273
+ advanced_language = gr.Dropdown(
1274
+ choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
1275
+ value='Auto Detect',
1276
+ label="Language"
1277
+ )
1278
+ advanced_theme = gr.Dropdown(
1279
+ choices=list(config.THEMES.keys()),
1280
+ value='default',
1281
+ label="Theme"
1282
  )
1283
 
1284
+ gr.Markdown("### 🔍 Explainability Options")
1285
+ with gr.Row():
1286
+ use_lime = gr.Checkbox(label="Use LIME Analysis", value=True)
1287
+ use_attention = gr.Checkbox(label="Use Attention Weights", value=True)
1288
+
1289
+ lime_features = gr.Slider(
1290
+ minimum=5,
1291
+ maximum=20,
1292
+ value=10,
1293
+ step=1,
1294
+ label="LIME Features Count"
1295
+ )
1296
+
1297
+ advanced_analyze_btn = gr.Button("🔬 Advanced Analyze", variant="primary", size="lg")
 
 
1298
 
1299
+ with gr.Column(scale=1):
1300
+ advanced_result_info = gr.Markdown("Configure explainability settings and click Advanced Analyze")
1301
 
1302
+ with gr.Row():
1303
+ advanced_gauge_plot = gr.Plot(label="Sentiment Gauge")
1304
+ advanced_bars_plot = gr.Plot(label="Probability Distribution")
1305
+
1306
+ with gr.Row():
1307
+ lime_plot = gr.Plot(label="LIME Feature Importance")
1308
+ attention_plot = gr.Plot(label="Attention Weights")
1309
+
1310
+ with gr.Tab("📊 Batch Analysis"):
1311
+ with gr.Row():
1312
+ with gr.Column(scale=2):
1313
+ batch_input = gr.Textbox(
1314
+ label="Batch Text Input (One text per line)",
1315
+ placeholder="Enter multiple texts, one per line...",
1316
+ lines=8
1317
+ )
1318
+
1319
+ with gr.Row():
1320
+ batch_language = gr.Dropdown(
1321
+ choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
1322
+ value='Auto Detect',
1323
+ label="Language"
1324
+ )
1325
+ batch_theme = gr.Dropdown(
1326
+ choices=list(config.THEMES.keys()),
1327
+ value='default',
1328
+ label="Theme"
1329
+ )
1330
+
1331
+ with gr.Row():
1332
+ batch_clean = gr.Checkbox(label="Clean Text", value=False)
1333
+ batch_remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
1334
+ batch_remove_nums = gr.Checkbox(label="Remove Numbers", value=False)
1335
+
1336
+ batch_analyze_btn = gr.Button("🔍 Analyze Batch", variant="primary", size="lg")
1337
+
1338
+ gr.Examples(
1339
+ examples=[[BATCH_SAMPLE]],
1340
+ inputs=batch_input,
1341
+ label="Sample Batch Input"
1342
+ )
1343
 
1344
+ with gr.Column(scale=1):
1345
+ batch_summary = gr.Markdown("Enter texts and click Analyze Batch")
 
 
 
 
 
 
 
 
 
 
 
 
 
1346
 
1347
+ with gr.Row():
1348
+ batch_results_table = gr.DataFrame(
1349
+ label="Detailed Results",
1350
+ interactive=False
1351
+ )
1352
 
1353
+ with gr.Row():
1354
+ batch_summary_plot = gr.Plot(label="Sentiment Summary")
1355
+ batch_confidence_plot = gr.Plot(label="Confidence Distribution")
1356
+
1357
+ with gr.Tab("📈 History & Analytics"):
1358
+ with gr.Row():
1359
+ with gr.Column():
1360
+ gr.Markdown("### 📊 Statistics")
1361
+ stats_btn = gr.Button("📈 Get Statistics")
1362
+ recent_btn = gr.Button("🕒 Recent Analyses")
1363
+ stats_output = gr.Markdown("Click 'Get Statistics' to view analysis history")
1364
+
1365
+ with gr.Column():
1366
+ gr.Markdown("### 🔍 Filter History")
1367
+ with gr.Row():
1368
+ sentiment_filter = gr.Dropdown(
1369
+ choices=["All", "Positive", "Negative", "Neutral"],
1370
+ value="All",
1371
+ label="Filter by Sentiment"
1372
+ )
1373
+ language_filter = gr.Dropdown(
1374
+ choices=["All", "English", "Chinese", "Spanish", "French", "German", "Swedish"],
1375
+ value="All",
1376
+ label="Filter by Language"
1377
+ )
1378
+
1379
+ confidence_filter = gr.Slider(
1380
+ minimum=0.0,
1381
+ maximum=1.0,
1382
+ value=0.0,
1383
+ step=0.1,
1384
+ label="Minimum Confidence"
1385
+ )
1386
+
1387
+ filter_btn = gr.Button("🔍 Filter History")
1388
 
1389
+ with gr.Row():
1390
+ dashboard_btn = gr.Button("📊 View Dashboard")
1391
+ clear_btn = gr.Button("🗑️ Clear History", variant="stop")
 
 
1392
 
1393
+ with gr.Row():
1394
+ export_csv_btn = gr.Button("📄 Export CSV")
1395
+ export_excel_btn = gr.Button("📊 Export Excel")
 
1396
 
1397
+ dashboard_plot = gr.Plot(label="Analytics Dashboard")
 
 
 
1398
 
1399
+ with gr.Row():
1400
+ filtered_results = gr.Markdown("Use filters to view specific entries")
1401
+ filtered_table = gr.DataFrame(label="Filtered History", interactive=False)
 
1402
 
1403
+ csv_file = gr.File(label="Download CSV Report")
1404
+ excel_file = gr.File(label="Download Excel Report")
1405
+ history_status = gr.Textbox(label="Status", interactive=False)
1406
+
1407
+ # Event handlers
1408
+
1409
+ # Single Analysis
1410
+ analyze_btn.click(
1411
+ analyze_single_text,
1412
+ inputs=[text_input, language_select, theme_select, clean_text, remove_punct, remove_nums],
1413
+ outputs=[result_info, gauge_plot, bars_plot]
1414
+ )
1415
+
1416
+ # Batch Analysis
1417
+ batch_analyze_btn.click(
1418
+ analyze_batch_texts,
1419
+ inputs=[batch_input, batch_language, batch_theme, batch_clean, batch_remove_punct, batch_remove_nums],
1420
+ outputs=[batch_summary, batch_results_table, batch_summary_plot, batch_confidence_plot]
1421
+ )
1422
+
1423
+ # Advanced Analysis
1424
+ advanced_analyze_btn.click(
1425
+ analyze_advanced_text,
1426
+ inputs=[advanced_input, advanced_language, advanced_theme, use_lime, use_attention, lime_features],
1427
+ outputs=[advanced_result_info, advanced_gauge_plot, advanced_bars_plot, lime_plot, attention_plot]
1428
+ )
1429
+
1430
+ # History & Analytics
1431
+ stats_btn.click(
1432
+ get_history_stats,
1433
+ outputs=stats_output
1434
+ )
1435
+
1436
+ recent_btn.click(
1437
+ get_recent_analyses,
1438
+ outputs=stats_output
1439
+ )
1440
+
1441
+ filter_btn.click(
1442
+ filter_history_display,
1443
+ inputs=[sentiment_filter, language_filter, confidence_filter],
1444
+ outputs=[filtered_results, filtered_table]
1445
+ )
1446
+
1447
+ dashboard_btn.click(
1448
+ plot_history_dashboard,
1449
+ outputs=[dashboard_plot, history_status]
1450
+ )
1451
+
1452
+ export_csv_btn.click(
1453
+ export_history_csv,
1454
+ outputs=[csv_file, history_status]
1455
+ )
1456
+
1457
+ export_excel_btn.click(
1458
+ export_history_excel,
1459
+ outputs=[excel_file, history_status]
1460
+ )
1461
 
1462
+ clear_btn.click(
1463
+ clear_all_history,
1464
+ outputs=history_status
1465
+ )
1466
 
 
1467
  if __name__ == "__main__":
1468
+ demo.launch(share=True)