entropy25 commited on
Commit
d3eb8f6
·
verified ·
1 Parent(s): be7d5b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +443 -859
app.py CHANGED
@@ -5,6 +5,7 @@ import plotly.graph_objects as go
5
  import plotly.express as px
6
  from plotly.subplots import make_subplots
7
  import numpy as np
 
8
  from collections import Counter, defaultdict
9
  import re
10
  import json
@@ -16,54 +17,11 @@ import logging
16
  from functools import lru_cache
17
  from dataclasses import dataclass
18
  from typing import List, Dict, Optional, Tuple
 
 
 
19
  import pandas as pd
20
 
21
- # 设置日志 - 提前初始化
22
- logging.basicConfig(level=logging.INFO)
23
- logger = logging.getLogger(__name__)
24
-
25
- # 尝试导入可选依赖
26
- try:
27
- from wordcloud import WordCloud
28
- WORDCLOUD_AVAILABLE = True
29
- except ImportError:
30
- WORDCLOUD_AVAILABLE = False
31
- logger.warning("WordCloud not available")
32
-
33
- try:
34
- import nltk
35
- from nltk.corpus import stopwords
36
- nltk.download('stopwords', quiet=True)
37
- nltk.download('punkt', quiet=True)
38
- STOP_WORDS = set(stopwords.words('english'))
39
- NLTK_AVAILABLE = True
40
- except:
41
- NLTK_AVAILABLE = False
42
- STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
43
- logger.warning("NLTK not available, using basic stopwords")
44
-
45
- try:
46
- import langdetect
47
- LANGDETECT_AVAILABLE = True
48
- except ImportError:
49
- LANGDETECT_AVAILABLE = False
50
- logger.warning("langdetect not available, using fallback language detection")
51
-
52
- # 尝试导入SHAP和LIME
53
- try:
54
- import shap
55
- SHAP_AVAILABLE = True
56
- except ImportError:
57
- SHAP_AVAILABLE = False
58
- logger.warning("SHAP not available, using basic analysis")
59
-
60
- try:
61
- from lime.lime_text import LimeTextExplainer
62
- LIME_AVAILABLE = True
63
- except ImportError:
64
- LIME_AVAILABLE = False
65
- logger.warning("LIME not available, using basic analysis")
66
-
67
  # Configuration
68
  @dataclass
69
  class Config:
@@ -83,13 +41,10 @@ class Config:
83
  'sv': 'Swedish'
84
  }
85
 
86
- # 使用更稳定的模型
87
  MODELS = {
88
  'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
89
  'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
90
- 'zh': "uer/roberta-base-finetuned-dianping-chinese",
91
- # 备用模型
92
- 'fallback': "distilbert-base-uncased-finetuned-sst-2-english"
93
  }
94
 
95
  # Color themes
@@ -102,80 +57,62 @@ class Config:
102
 
103
  config = Config()
104
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  class ModelManager:
106
- """Manages multiple language models with better error handling"""
107
  def __init__(self):
108
  self.models = {}
109
  self.tokenizers = {}
110
- self.device = self._get_device()
111
- self.model_loaded = False
112
- self._load_models()
113
 
114
- def _get_device(self):
115
- """安全的设备选择"""
116
  try:
117
- if torch.cuda.is_available():
118
- return torch.device("cuda")
119
- elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
120
- return torch.device("mps")
121
- else:
122
- return torch.device("cpu")
123
- except:
124
- return torch.device("cpu")
125
-
126
- def _load_models(self):
127
- """Load models with error handling"""
128
- try:
129
- # 首先尝试加载多语言模型
130
  model_name = config.MODELS['multilingual']
131
- logger.info(f"Loading model: {model_name}")
132
-
133
  self.tokenizers['default'] = AutoTokenizer.from_pretrained(model_name)
134
  self.models['default'] = AutoModelForSequenceClassification.from_pretrained(model_name)
135
  self.models['default'].to(self.device)
 
136
 
137
- logger.info(f"Successfully loaded default model: {model_name}")
138
- self.model_loaded = True
 
 
 
 
139
 
140
  except Exception as e:
141
- logger.error(f"Failed to load multilingual model: {e}")
142
- # 尝试备用模型
143
- try:
144
- fallback_model = config.MODELS['fallback']
145
- logger.info(f"Trying fallback model: {fallback_model}")
146
-
147
- self.tokenizers['default'] = AutoTokenizer.from_pretrained(fallback_model)
148
- self.models['default'] = AutoModelForSequenceClassification.from_pretrained(fallback_model)
149
- self.models['default'].to(self.device)
150
-
151
- logger.info(f"Successfully loaded fallback model: {fallback_model}")
152
- self.model_loaded = True
153
-
154
- except Exception as e2:
155
- logger.error(f"Failed to load fallback model: {e2}")
156
- self.model_loaded = False
157
- raise RuntimeError("Failed to load any sentiment analysis model")
158
 
159
  def get_model(self, language='en'):
160
  """Get model for specific language"""
161
- if not self.model_loaded:
162
- raise RuntimeError("No models are loaded")
163
-
164
- # 简化:所有语言都使用默认模型
165
- return self.models['default'], self.tokenizers['default']
166
 
167
  @staticmethod
168
  def detect_language(text: str) -> str:
169
- """Detect text language with fallback"""
170
- if not LANGDETECT_AVAILABLE:
171
- # 简单的语言检测
172
- if re.search(r'[\u4e00-\u9fff]', text):
173
- return 'zh'
174
- else:
175
- return 'en'
176
-
177
  try:
 
178
  detected = langdetect.detect(text)
 
179
  language_mapping = {
180
  'zh-cn': 'zh',
181
  'zh-tw': 'zh'
@@ -185,23 +122,10 @@ class ModelManager:
185
  except:
186
  return 'en'
187
 
188
- # 全局模型管理器
189
- model_manager = None
190
-
191
- def initialize_models():
192
- """延迟初始化模型"""
193
- global model_manager
194
- if model_manager is None:
195
- try:
196
- model_manager = ModelManager()
197
- return True
198
- except Exception as e:
199
- logger.error(f"Model initialization failed: {e}")
200
- return False
201
- return True
202
 
203
  class HistoryManager:
204
- """Enhanced history manager"""
205
  def __init__(self):
206
  self._history = []
207
 
@@ -211,6 +135,7 @@ class HistoryManager:
211
  self._history = self._history[-config.MAX_HISTORY_SIZE:]
212
 
213
  def add_batch_entries(self, entries: List[Dict]):
 
214
  for entry in entries:
215
  self.add_entry(entry)
216
 
@@ -218,10 +143,12 @@ class HistoryManager:
218
  return self._history.copy()
219
 
220
  def get_recent_history(self, n: int = 10) -> List[Dict]:
 
221
  return self._history[-n:] if self._history else []
222
 
223
  def filter_history(self, sentiment: str = None, language: str = None,
224
  min_confidence: float = None) -> List[Dict]:
 
225
  filtered = self._history
226
 
227
  if sentiment:
@@ -251,12 +178,12 @@ class HistoryManager:
251
  'positive_count': sentiments.count('Positive'),
252
  'negative_count': sentiments.count('Negative'),
253
  'neutral_count': sentiments.count('Neutral'),
254
- 'avg_confidence': np.mean(confidences) if confidences else 0,
255
- 'max_confidence': np.max(confidences) if confidences else 0,
256
- 'min_confidence': np.min(confidences) if confidences else 0,
257
  'languages_detected': len(set(languages)),
258
  'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en',
259
- 'avg_text_length': np.mean([len(item.get('full_text', '')) for item in self._history]) if self._history else 0
260
  }
261
 
262
  history_manager = HistoryManager()
@@ -283,12 +210,14 @@ class TextProcessor:
283
  @staticmethod
284
  def extract_keywords(text: str, top_k: int = 5) -> List[str]:
285
  """Extract key words from text"""
 
286
  if re.search(r'[\u4e00-\u9fff]', text):
287
  words = re.findall(r'[\u4e00-\u9fff]+', text)
288
  all_chars = ''.join(words)
289
  char_freq = Counter(all_chars)
290
  return [char for char, _ in char_freq.most_common(top_k)]
291
  else:
 
292
  cleaned = TextProcessor.clean_text(text)
293
  words = cleaned.split()
294
  word_freq = Counter(words)
@@ -301,7 +230,7 @@ class TextProcessor:
301
  return [line.strip() for line in lines if line.strip()]
302
 
303
  class SentimentAnalyzer:
304
- """Enhanced sentiment analysis with better error handling"""
305
 
306
  @staticmethod
307
  def analyze_text(text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
@@ -309,10 +238,6 @@ class SentimentAnalyzer:
309
  if not text.strip():
310
  raise ValueError("Empty text provided")
311
 
312
- # 确保模型已加载
313
- if not initialize_models():
314
- raise RuntimeError("Failed to initialize sentiment analysis models")
315
-
316
  # Detect language if auto
317
  if language == 'auto':
318
  detected_lang = model_manager.detect_language(text)
@@ -320,13 +245,9 @@ class SentimentAnalyzer:
320
  detected_lang = language
321
 
322
  # Get appropriate model
323
- try:
324
- model, tokenizer = model_manager.get_model(detected_lang)
325
- except Exception as e:
326
- logger.error(f"Failed to get model: {e}")
327
- raise RuntimeError(f"Model loading failed: {e}")
328
 
329
- # Preprocessing
330
  options = preprocessing_options or {}
331
  processed_text = text
332
  if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
@@ -338,13 +259,8 @@ class SentimentAnalyzer:
338
 
339
  try:
340
  # Tokenize and analyze
341
- inputs = tokenizer(
342
- processed_text,
343
- return_tensors="pt",
344
- padding=True,
345
- truncation=True,
346
- max_length=config.MAX_TEXT_LENGTH
347
- ).to(model_manager.device)
348
 
349
  with torch.no_grad():
350
  outputs = model(**inputs)
@@ -391,7 +307,7 @@ class SentimentAnalyzer:
391
 
392
  except Exception as e:
393
  logger.error(f"Analysis failed: {e}")
394
- raise RuntimeError(f"Sentiment analysis failed: {e}")
395
 
396
  @staticmethod
397
  def analyze_batch(texts: List[str], language: str = 'auto',
@@ -404,461 +320,207 @@ class SentimentAnalyzer:
404
  result['batch_index'] = i
405
  results.append(result)
406
  except Exception as e:
 
407
  results.append({
408
  'sentiment': 'Error',
409
  'confidence': 0.0,
410
  'error': str(e),
411
  'batch_index': i,
412
- 'text': text[:50] + '...' if len(text) > 50 else text
413
  })
414
  return results
415
 
416
- class ExplainabilityAnalyzer:
417
- """SHAP and LIME explainability analysis with fallbacks"""
418
 
419
  @staticmethod
420
- def create_prediction_function(model, tokenizer, device):
421
- """Create prediction function for LIME"""
422
- def predict_proba(texts):
423
- if isinstance(texts, str):
424
- texts = [texts]
425
-
426
- results = []
427
- for text in texts:
428
- try:
429
- inputs = tokenizer(text, return_tensors="pt", padding=True,
430
- truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
431
- with torch.no_grad():
432
- outputs = model(**inputs)
433
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
434
- results.append(probs)
435
- except Exception as e:
436
- # Return neutral probabilities on error
437
- if len(results) > 0:
438
- results.append(results[0]) # Use previous result
439
- else:
440
- results.append(np.array([0.33, 0.33, 0.34])) # Neutral fallback
441
-
442
- return np.array(results)
443
- return predict_proba
444
-
445
- @staticmethod
446
- def analyze_with_lime(text: str, model, tokenizer, device, num_features: int = 10) -> Dict:
447
- """Analyze text with LIME"""
448
- if not LIME_AVAILABLE:
449
- return {'method': 'LIME', 'error': 'LIME library not available. Install with: pip install lime'}
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
- try:
452
- # Create prediction function
453
- predict_fn = ExplainabilityAnalyzer.create_prediction_function(model, tokenizer, device)
454
-
455
- # Test prediction function first
456
- test_probs = predict_fn([text])
457
- if len(test_probs) == 0:
458
- return {'method': 'LIME', 'error': 'Prediction function failed'}
459
-
460
- # Determine class names based on model output
461
- num_classes = len(test_probs[0])
462
- if num_classes == 3:
463
- class_names = ['Negative', 'Neutral', 'Positive']
464
- else:
465
- class_names = ['Negative', 'Positive']
466
-
467
- # Initialize LIME explainer
468
- explainer = LimeTextExplainer(
469
- class_names=class_names,
470
- feature_selection='auto',
471
- split_expression=r'\W+',
472
- bow=False
473
- )
474
-
475
- # Generate explanation
476
- explanation = explainer.explain_instance(
477
- text,
478
- predict_fn,
479
- num_features=min(num_features, len(text.split())),
480
- num_samples=50 # Reduced for faster processing
481
- )
482
-
483
- # Extract feature importance
484
- feature_importance = explanation.as_list()
485
-
486
- return {
487
- 'method': 'LIME',
488
- 'feature_importance': feature_importance,
489
- 'class_names': class_names,
490
- 'success': True
491
- }
492
-
493
- except Exception as e:
494
- logger.error(f"LIME analysis failed: {e}")
495
- return {'method': 'LIME', 'error': str(e)}
496
-
497
- @staticmethod
498
- def analyze_with_attention(text: str, model, tokenizer, device) -> Dict:
499
- """Analyze text with attention weights - simplified version"""
500
- try:
501
- # Tokenize input
502
- inputs = tokenizer(text, return_tensors="pt", padding=True,
503
- truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
504
-
505
- # Get tokens for display
506
- tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
507
-
508
- # Simple attention simulation based on input importance
509
- try:
510
- with torch.no_grad():
511
- outputs = model(**inputs, output_attentions=True)
512
- if hasattr(outputs, 'attentions') and outputs.attentions is not None:
513
- attentions = outputs.attentions
514
- # Average attention across layers and heads
515
- avg_attention = torch.mean(torch.stack(attentions), dim=(0, 1, 2)).cpu().numpy()
516
- else:
517
- raise AttributeError("No attention outputs")
518
- except:
519
- # Fallback: simulate attention based on token position and type
520
- avg_attention = np.random.uniform(0.1, 1.0, len(tokens))
521
- # Give higher attention to non-special tokens
522
- for i, token in enumerate(tokens):
523
- if token in ['[CLS]', '[SEP]', '<s>', '</s>', '<pad>']:
524
- avg_attention[i] *= 0.3
525
-
526
- # Create attention weights for each token
527
- attention_weights = []
528
- for i, token in enumerate(tokens):
529
- if i < len(avg_attention):
530
- # Clean token for display
531
- clean_token = token.replace('Ġ', '').replace('##', '')
532
- if clean_token.strip():
533
- attention_weights.append((clean_token, float(avg_attention[i])))
534
-
535
- return {
536
- 'method': 'Attention',
537
- 'tokens': [t[0] for t in attention_weights],
538
- 'attention_weights': attention_weights,
539
- 'success': True
540
- }
541
-
542
- except Exception as e:
543
- logger.error(f"Attention analysis failed: {e}")
544
- return {'method': 'Attention', 'error': str(e)}
545
-
546
- class AdvancedVisualizer:
547
- """Enhanced visualizations with Plotly - 修复了类名"""
548
 
549
  @staticmethod
550
- def create_lime_plot(lime_result: Dict, theme: str = 'default') -> go.Figure:
551
- """Create LIME feature importance plot"""
552
- if 'error' in lime_result:
553
- fig = go.Figure()
554
- fig.add_annotation(
555
- text=f"LIME Error: {lime_result['error']}",
556
- x=0.5, y=0.5,
557
- xref="paper", yref="paper",
558
- showarrow=False,
559
- font=dict(size=14)
560
- )
561
- fig.update_layout(height=400, title="LIME Analysis Error")
562
- return fig
563
-
564
- if not lime_result.get('feature_importance'):
565
- fig = go.Figure()
566
- fig.add_annotation(
567
- text="No LIME features available",
568
- x=0.5, y=0.5,
569
- xref="paper", yref="paper",
570
- showarrow=False
571
- )
572
- fig.update_layout(height=400, title="No LIME Data")
573
- return fig
574
 
575
- features, scores = zip(*lime_result['feature_importance'])
576
- colors = ['red' if score < 0 else 'green' for score in scores]
 
 
 
 
 
 
577
 
578
  fig = go.Figure(data=[
579
- go.Bar(
580
- y=features,
581
- x=scores,
582
- orientation='h',
583
- marker_color=colors,
584
- text=[f'{score:.3f}' for score in scores],
585
- textposition='auto',
586
- hovertemplate='<b>%{y}</b><br>Importance: %{x:.3f}<extra></extra>'
587
- )
588
  ])
589
 
 
590
  fig.update_layout(
591
- title="LIME Feature Importance Analysis",
592
- xaxis_title="Importance Score (Negative ← → Positive)",
593
- yaxis_title="Features",
594
  height=400,
595
  showlegend=False
596
  )
597
 
598
  return fig
599
-
600
  @staticmethod
601
- def create_attention_plot(attention_result: Dict, theme: str = 'default') -> go.Figure:
602
- """Create attention weights visualization"""
603
- if 'error' in attention_result:
604
- fig = go.Figure()
605
- fig.add_annotation(
606
- text=f"Attention Error: {attention_result['error']}",
607
- x=0.5, y=0.5,
608
- xref="paper", yref="paper",
609
- showarrow=False,
610
- font=dict(size=14)
611
- )
612
- fig.update_layout(height=400, title="Attention Analysis Error")
613
- return fig
614
-
615
- if not attention_result.get('attention_weights'):
616
- fig = go.Figure()
617
- fig.add_annotation(
618
- text="No attention weights available",
619
- x=0.5, y=0.5,
620
- xref="paper", yref="paper",
621
- showarrow=False
622
- )
623
- fig.update_layout(height=400, title="No Attention Data")
624
- return fig
625
 
626
- tokens, weights = zip(*attention_result['attention_weights'])
 
 
627
 
628
- # Normalize weights for better visualization
629
- weights = np.array(weights)
630
- if weights.max() > weights.min():
631
- normalized_weights = (weights - weights.min()) / (weights.max() - weights.min())
632
- else:
633
- normalized_weights = weights
 
 
634
 
635
- # Limit display to top 15 tokens for readability
636
- if len(tokens) > 15:
637
- # Get top 15 by attention weight
638
- top_indices = np.argsort(weights)[-15:]
639
- tokens = [tokens[i] for i in top_indices]
640
- normalized_weights = normalized_weights[top_indices]
641
- weights = weights[top_indices]
642
 
643
- fig = go.Figure(data=[
644
- go.Bar(
645
- x=list(range(len(tokens))),
646
- y=normalized_weights,
647
- text=tokens,
648
- textposition='outside',
649
- marker_color=normalized_weights,
650
- colorscale='Viridis',
651
- hovertemplate='<b>%{text}</b><br>Attention Weight: %{customdata:.3f}<extra></extra>',
652
- customdata=weights
653
- )
654
- ])
 
 
 
 
655
 
656
  fig.update_layout(
657
- title="Attention Weights Analysis (Top Tokens)",
658
- xaxis_title="Token Position",
659
- yaxis_title="Attention Weight (Normalized)",
660
- height=400,
661
- showlegend=False,
662
- xaxis=dict(tickmode='array', tickvals=list(range(len(tokens))), ticktext=tokens, tickangle=45)
663
  )
664
 
665
  return fig
666
 
667
  @staticmethod
668
- def create_sentiment_gauge(result: Dict, theme: str = 'default') -> go.Figure:
669
- """Create an animated sentiment gauge"""
670
- colors = config.THEMES.get(theme, config.THEMES['default'])
 
671
 
672
- try:
673
- if result.get('has_neutral', False):
674
- # Three-way gauge
675
- fig = go.Figure(go.Indicator(
676
- mode="gauge+number+delta",
677
- value=result['pos_prob'] * 100,
678
- domain={'x': [0, 1], 'y': [0, 1]},
679
- title={'text': f"Sentiment: {result['sentiment']}"},
680
- delta={'reference': 50},
681
- gauge={
682
- 'axis': {'range': [None, 100]},
683
- 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
684
- 'steps': [
685
- {'range': [0, 33], 'color': colors['neg']},
686
- {'range': [33, 67], 'color': colors['neu']},
687
- {'range': [67, 100], 'color': colors['pos']}
688
- ],
689
- 'threshold': {
690
- 'line': {'color': "red", 'width': 4},
691
- 'thickness': 0.75,
692
- 'value': 90
693
- }
694
- }
695
- ))
696
- else:
697
- # Two-way gauge
698
- fig = go.Figure(go.Indicator(
699
- mode="gauge+number",
700
- value=result['confidence'] * 100,
701
- domain={'x': [0, 1], 'y': [0, 1]},
702
- title={'text': f"Confidence: {result['sentiment']}"},
703
- gauge={
704
- 'axis': {'range': [None, 100]},
705
- 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
706
- 'steps': [
707
- {'range': [0, 50], 'color': "lightgray"},
708
- {'range': [50, 100], 'color': "gray"}
709
- ]
710
- }
711
- ))
712
-
713
- fig.update_layout(height=400, font={'size': 16})
714
- return fig
715
-
716
- except Exception as e:
717
- logger.error(f"Failed to create gauge: {e}")
718
- # 返回错误图表
719
- fig = go.Figure()
720
- fig.add_annotation(
721
- text=f"Visualization Error: {str(e)}",
722
- x=0.5, y=0.5,
723
- xref="paper", yref="paper",
724
- showarrow=False,
725
- font=dict(size=14)
726
- )
727
- fig.update_layout(height=400)
728
- return fig
729
-
730
- @staticmethod
731
- def create_probability_bars(result: Dict, theme: str = 'default') -> go.Figure:
732
- """Create probability bar chart"""
733
- colors = config.THEMES.get(theme, config.THEMES['default'])
734
 
735
- try:
736
- if result.get('has_neutral', False):
737
- labels = ['Negative', 'Neutral', 'Positive']
738
- values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
739
- bar_colors = [colors['neg'], colors['neu'], colors['pos']]
740
- else:
741
- labels = ['Negative', 'Positive']
742
- values = [result['neg_prob'], result['pos_prob']]
743
- bar_colors = [colors['neg'], colors['pos']]
744
-
745
- fig = go.Figure(data=[
746
- go.Bar(x=labels, y=values, marker_color=bar_colors,
747
- text=[f'{v:.3f}' for v in values])
748
- ])
749
-
750
- fig.update_traces(texttemplate='%{text}', textposition='outside')
751
- fig.update_layout(
752
- title="Sentiment Probabilities",
753
- yaxis_title="Probability",
754
- height=400,
755
- showlegend=False
756
- )
757
-
758
- return fig
759
-
760
- except Exception as e:
761
- logger.error(f"Failed to create bars: {e}")
762
- fig = go.Figure()
763
- fig.add_annotation(
764
- text=f"Visualization Error: {str(e)}",
765
- x=0.5, y=0.5,
766
- xref="paper", yref="paper",
767
- showarrow=False
768
- )
769
- fig.update_layout(height=400)
770
- return fig
771
-
772
- @staticmethod
773
- def create_batch_summary(results: List[Dict], theme: str = 'default') -> go.Figure:
774
- """Create batch analysis summary"""
775
- colors = config.THEMES.get(theme, config.THEMES['default'])
776
 
777
- try:
778
- # Count sentiments
779
- sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error']
780
- if not sentiments:
781
- fig = go.Figure()
782
- fig.add_annotation(text="No valid results to display", x=0.5, y=0.5,
783
- xref="paper", yref="paper", showarrow=False)
784
- fig.update_layout(height=400, title="Batch Summary")
785
- return fig
786
-
787
- sentiment_counts = Counter(sentiments)
788
-
789
- # Create pie chart
790
- fig = go.Figure(data=[go.Pie(
791
- labels=list(sentiment_counts.keys()),
792
- values=list(sentiment_counts.values()),
793
- marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
794
- textinfo='label+percent',
795
- hole=0.3
796
- )])
797
-
798
- fig.update_layout(
799
- title=f"Batch Analysis Summary ({len(results)} texts)",
800
- height=400
801
- )
802
-
803
- return fig
804
-
805
- except Exception as e:
806
- logger.error(f"Failed to create batch summary: {e}")
807
- fig = go.Figure()
808
- fig.add_annotation(text=f"Error: {str(e)}", x=0.5, y=0.5,
809
- xref="paper", yref="paper", showarrow=False)
810
- fig.update_layout(height=400)
811
- return fig
812
-
813
- @staticmethod
814
- def create_confidence_distribution(results: List[Dict]) -> go.Figure:
815
- """Create confidence distribution plot"""
816
- try:
817
- confidences = [r['confidence'] for r in results
818
- if 'confidence' in r and r['sentiment'] != 'Error']
819
-
820
- if not confidences:
821
- fig = go.Figure()
822
- fig.add_annotation(text="No confidence data to display", x=0.5, y=0.5,
823
- xref="paper", yref="paper", showarrow=False)
824
- fig.update_layout(height=400, title="Confidence Distribution")
825
- return fig
826
-
827
- fig = go.Figure(data=[go.Histogram(
828
- x=confidences,
829
- nbinsx=20,
830
- marker_color='skyblue',
831
- opacity=0.7
832
- )])
833
-
834
- fig.update_layout(
835
- title="Confidence Distribution",
836
- xaxis_title="Confidence Score",
837
- yaxis_title="Frequency",
838
- height=400
839
- )
840
-
841
- return fig
842
-
843
- except Exception as e:
844
- logger.error(f"Failed to create confidence distribution: {e}")
845
- fig = go.Figure()
846
- fig.add_annotation(text=f"Error: {str(e)}", x=0.5, y=0.5,
847
- xref="paper", yref="paper", showarrow=False)
848
- fig.update_layout(height=400)
849
- return fig
850
 
851
- # Main application functions with better error handling
852
  def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
853
  remove_punct: bool, remove_nums: bool):
854
- """Enhanced single text analysis with error handling"""
855
  try:
856
  if not text.strip():
857
- return "Please enter text to analyze", None, None
858
-
859
- # 初始化检查
860
- if not initialize_models():
861
- return "❌ Failed to load sentiment analysis models. Please check your internet connection and try again.", None, None
862
 
863
  # Map display names back to language codes
864
  language_map = {
@@ -878,7 +540,6 @@ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
878
  'remove_numbers': remove_nums
879
  }
880
 
881
- # 分析文本
882
  result = SentimentAnalyzer.analyze_text(text, language_code, preprocessing_options)
883
 
884
  # Add to history
@@ -897,49 +558,39 @@ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
897
  history_manager.add_entry(history_entry)
898
 
899
  # Create visualizations
900
- gauge_fig = AdvancedVisualizer.create_sentiment_gauge(result, theme)
901
- bars_fig = AdvancedVisualizer.create_probability_bars(result, theme)
902
 
903
  # Create info text
904
  info_text = f"""
905
- **Analysis Results:**
906
- - **Sentiment:** {result['sentiment']} (Confidence: {result['confidence']:.3f})
907
  - **Language:** {result['language'].upper()}
908
- - **Keywords:** {', '.join(result['keywords']) if result['keywords'] else 'None'}
909
- - **Text Stats:** {result['word_count']} words, {result['char_count']} characters
910
-
911
- 📊 **Probability Scores:**
912
- - Positive: {result['pos_prob']:.3f}
913
- - Negative: {result['neg_prob']:.3f}
914
- - Neutral: {result.get('neu_prob', 0):.3f}
915
  """
916
 
917
  return info_text, gauge_fig, bars_fig
918
 
919
  except Exception as e:
920
- logger.error(f"Single text analysis failed: {e}")
921
- error_msg = f" **Analysis Failed:** {str(e)}\n\nPlease check your input and try again."
922
- return error_msg, None, None
923
 
924
  def analyze_batch_texts(batch_text: str, language: str, theme: str,
925
  clean_text: bool, remove_punct: bool, remove_nums: bool):
926
- """Batch text analysis with improved error handling"""
927
  try:
928
  if not batch_text.strip():
929
- return "Please enter texts to analyze (one per line)", None, None, None
930
-
931
- # 初始化检查
932
- if not initialize_models():
933
- return "❌ Failed to load sentiment analysis models", None, None, None
934
 
935
  # Parse batch input
936
  texts = TextProcessor.parse_batch_input(batch_text)
937
 
938
  if len(texts) > config.BATCH_SIZE_LIMIT:
939
- return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
940
 
941
  if not texts:
942
- return "No valid texts found", None, None, None
943
 
944
  # Map display names back to language codes
945
  language_map = {
@@ -984,8 +635,8 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str,
984
  history_manager.add_batch_entries(batch_entries)
985
 
986
  # Create visualizations
987
- summary_fig = AdvancedVisualizer.create_batch_summary(results, theme)
988
- confidence_fig = AdvancedVisualizer.create_confidence_distribution(results)
989
 
990
  # Create results table
991
  df_data = []
@@ -995,9 +646,9 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str,
995
  'Index': i+1,
996
  'Text': text[:50] + '...' if len(text) > 50 else text,
997
  'Sentiment': 'Error',
998
- 'Confidence': '0.000',
999
  'Language': 'Unknown',
1000
- 'Error': result.get('error', 'Unknown error')
1001
  })
1002
  else:
1003
  df_data.append({
@@ -1006,7 +657,7 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str,
1006
  'Sentiment': result['sentiment'],
1007
  'Confidence': f"{result['confidence']:.3f}",
1008
  'Language': result['language'].upper(),
1009
- 'Keywords': ', '.join(result.get('keywords', [])[:3])
1010
  })
1011
 
1012
  df = pd.DataFrame(df_data)
@@ -1020,235 +671,214 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str,
1020
  avg_confidence = np.mean([r['confidence'] for r in successful_results])
1021
 
1022
  summary_text = f"""
1023
- **Batch Analysis Summary:**
1024
  - **Total Texts:** {len(texts)}
1025
  - **Successful:** {len(successful_results)}
1026
  - **Errors:** {error_count}
1027
  - **Average Confidence:** {avg_confidence:.3f}
1028
- - **Sentiment Distribution:**
1029
- - Positive: {sentiment_counts.get('Positive', 0)}
1030
- - Negative: {sentiment_counts.get('Negative', 0)}
1031
- - Neutral: {sentiment_counts.get('Neutral', 0)}
1032
  """
1033
  else:
1034
- summary_text = f"All {len(texts)} texts failed to analyze."
1035
 
1036
  return summary_text, df, summary_fig, confidence_fig
1037
 
1038
  except Exception as e:
1039
  logger.error(f"Batch analysis failed: {e}")
1040
- return f"Error: {str(e)}", None, None, None
1041
-
1042
- def get_history_stats():
1043
 
1044
- 💡 **Understanding the Results:**
1045
- - **LIME** shows which words push the sentiment positive/negative
1046
- - **Attention** shows which tokens the model focuses on most
1047
- - Higher confidence scores indicate more certain predictions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1048
  """
1049
 
1050
- return info_text, gauge_fig, bars_fig, lime_plot, attention_plot
 
 
 
 
 
 
1051
 
1052
  except Exception as e:
1053
  logger.error(f"Advanced analysis failed: {e}")
1054
- # Return basic empty plots on complete failure
1055
- empty_fig = go.Figure()
1056
- empty_fig.add_annotation(text=f"Analysis failed: {str(e)}", x=0.5, y=0.5,
1057
- xref="paper", yref="paper", showarrow=False)
1058
- empty_fig.update_layout(height=400)
1059
-
1060
- return f"❌ Error: {str(e)}", empty_fig, empty_fig, empty_fig, empty_fig
1061
  """Get enhanced history statistics"""
1062
- try:
1063
- stats = history_manager.get_stats()
1064
- if not stats:
1065
- return "📊 No analysis history available yet. Analyze some texts to see statistics!"
1066
-
1067
- return f"""
1068
- 📊 **Comprehensive History Statistics:**
1069
 
1070
- **📈 Analysis Counts:**
1071
  - Total Analyses: {stats['total_analyses']}
1072
- - 😊 Positive: {stats['positive_count']}
1073
- - 😞 Negative: {stats['negative_count']}
1074
- - 😐 Neutral: {stats['neutral_count']}
1075
 
1076
- **🎯 Confidence Metrics:**
1077
  - Average Confidence: {stats['avg_confidence']:.3f}
1078
  - Highest Confidence: {stats['max_confidence']:.3f}
1079
  - Lowest Confidence: {stats['min_confidence']:.3f}
1080
 
1081
- **🌍 Language Statistics:**
1082
  - Languages Detected: {stats['languages_detected']}
1083
  - Most Common Language: {stats['most_common_language'].upper()}
1084
 
1085
- **📝 Text Statistics:**
1086
  - Average Text Length: {stats['avg_text_length']:.1f} characters
1087
- """
1088
- except Exception as e:
1089
- logger.error(f"Failed to get history stats: {e}")
1090
- return f"❌ Error getting statistics: {str(e)}"
1091
 
1092
  def filter_history_display(sentiment_filter: str, language_filter: str, min_confidence: float):
1093
  """Display filtered history"""
1094
- try:
1095
- # Convert filters
1096
- sentiment = sentiment_filter if sentiment_filter != "All" else None
1097
- language = language_filter.lower() if language_filter != "All" else None
1098
-
1099
- filtered_history = history_manager.filter_history(
1100
- sentiment=sentiment,
1101
- language=language,
1102
- min_confidence=min_confidence if min_confidence > 0 else None
1103
- )
1104
-
1105
- if not filtered_history:
1106
- return "🔍 No entries match the filter criteria", None
1107
-
1108
- # Create DataFrame for display
1109
- df_data = []
1110
- for entry in filtered_history[-20:]: # Show last 20 entries
1111
- df_data.append({
1112
- 'Timestamp': entry['timestamp'][:16], # YYYY-MM-DD HH:MM
1113
- 'Text': entry['text'],
1114
- 'Sentiment': entry['sentiment'],
1115
- 'Confidence': f"{entry['confidence']:.3f}",
1116
- 'Language': entry['language'].upper(),
1117
- 'Type': entry.get('analysis_type', 'single')
1118
- })
1119
-
1120
- df = pd.DataFrame(df_data)
1121
-
1122
- summary = f"""
1123
- 🔍 **Filtered Results:**
1124
  - Found {len(filtered_history)} entries matching criteria
1125
  - Showing most recent {min(20, len(filtered_history))} entries
1126
- """
1127
-
1128
- return summary, df
1129
-
1130
- except Exception as e:
1131
- logger.error(f"Failed to filter history: {e}")
1132
- return f"❌ Error filtering history: {str(e)}", None
1133
 
1134
  def plot_history_dashboard():
1135
- """Create history dashboard with error handling"""
1136
- try:
1137
- history = history_manager.get_history()
1138
- if len(history) < 2:
1139
- return None, "📊 Need at least 2 analyses to create dashboard. Analyze more texts!"
1140
-
1141
- # Create subplots
1142
- fig = make_subplots(
1143
- rows=2, cols=2,
1144
- subplot_titles=['Sentiment Timeline', 'Confidence Distribution',
1145
- 'Language Distribution', 'Sentiment Summary'],
1146
- specs=[[{"secondary_y": False}, {"secondary_y": False}],
1147
- [{"type": "pie"}, {"type": "bar"}]]
1148
- )
1149
-
1150
- # Extract data
1151
- indices = list(range(len(history)))
1152
- pos_probs = [item.get('pos_prob', 0) for item in history]
1153
- confidences = [item['confidence'] for item in history]
1154
- sentiments = [item['sentiment'] for item in history]
1155
- languages = [item.get('language', 'en') for item in history]
1156
-
1157
- # Sentiment timeline
1158
- colors = ['#4CAF50' if s == 'Positive' else '#F44336' if s == 'Negative' else '#FF9800'
1159
- for s in sentiments]
1160
- fig.add_trace(
1161
- go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
1162
- marker=dict(color=colors, size=6),
1163
- name='Positive Probability'),
1164
- row=1, col=1
1165
- )
1166
-
1167
- # Confidence distribution
1168
- fig.add_trace(
1169
- go.Histogram(x=confidences, nbinsx=10, name='Confidence'),
1170
- row=1, col=2
1171
- )
1172
-
1173
- # Language distribution
1174
- lang_counts = Counter(languages)
1175
- fig.add_trace(
1176
- go.Pie(labels=list(lang_counts.keys()), values=list(lang_counts.values()),
1177
- name="Languages"),
1178
- row=2, col=1
1179
- )
1180
-
1181
- # Sentiment summary
1182
- sent_counts = Counter(sentiments)
1183
- colors_dict = {'Positive': '#4CAF50', 'Negative': '#F44336', 'Neutral': '#FF9800'}
1184
- fig.add_trace(
1185
- go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
1186
- marker_color=[colors_dict.get(k, '#999999') for k in sent_counts.keys()]),
1187
- row=2, col=2
1188
- )
1189
-
1190
- fig.update_layout(height=800, showlegend=False, title_text="Analysis Dashboard")
1191
- return fig, f"📊 Dashboard showing {len(history)} analyses"
1192
-
1193
- except Exception as e:
1194
- logger.error(f"Failed to create dashboard: {e}")
1195
- return None, f"❌ Error creating dashboard: {str(e)}"
1196
 
1197
  def export_history_csv():
1198
  """Export history to CSV"""
 
 
 
 
1199
  try:
1200
- history = history_manager.get_history()
1201
- if not history:
1202
- return None, "📊 No history to export"
1203
-
1204
  df = pd.DataFrame(history)
1205
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w')
1206
  df.to_csv(temp_file.name, index=False)
1207
- return temp_file.name, f"Exported {len(history)} entries to CSV"
1208
  except Exception as e:
1209
- logger.error(f"CSV export failed: {e}")
1210
- return None, f"❌ Export failed: {str(e)}"
1211
 
1212
  def export_history_excel():
1213
  """Export history to Excel"""
 
 
 
 
1214
  try:
1215
- history = history_manager.get_history()
1216
- if not history:
1217
- return None, "📊 No history to export"
1218
-
1219
  df = pd.DataFrame(history)
1220
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
1221
  df.to_excel(temp_file.name, index=False)
1222
- return temp_file.name, f"Exported {len(history)} entries to Excel"
1223
  except Exception as e:
1224
- logger.error(f"Excel export failed: {e}")
1225
- return None, f"❌ Export failed: {str(e)}"
1226
 
1227
  def clear_all_history():
1228
  """Clear analysis history"""
1229
- try:
1230
- count = history_manager.clear()
1231
- return f"🗑️ Cleared {count} entries from history"
1232
- except Exception as e:
1233
- logger.error(f"Failed to clear history: {e}")
1234
- return f"❌ Error clearing history: {str(e)}"
1235
 
1236
  def get_recent_analyses():
1237
  """Get recent analysis summary"""
1238
- try:
1239
- recent = history_manager.get_recent_history(10)
1240
- if not recent:
1241
- return "📊 No recent analyses available. Start analyzing some texts!"
1242
-
1243
- summary_text = "🕒 **Recent Analyses (Last 10):**\n\n"
1244
- for i, entry in enumerate(recent, 1):
1245
- emoji = "😊" if entry['sentiment'] == 'Positive' else "😞" if entry['sentiment'] == 'Negative' else "😐"
1246
- summary_text += f"{i}. {emoji} **{entry['sentiment']}** ({entry['confidence']:.3f}) - {entry['text']}\n"
1247
-
1248
- return summary_text
1249
- except Exception as e:
1250
- logger.error(f"Failed to get recent analyses: {e}")
1251
- return f"❌ Error getting recent analyses: {str(e)}"
1252
 
1253
  # Sample data
1254
  SAMPLE_TEXTS = [
@@ -1280,10 +910,10 @@ Not sure if I like it or not.
1280
  Amazing quality and fast delivery!
1281
  Could be better, but it's okay."""
1282
 
1283
- # Gradio Interface - 简化版本,专注于核心功能
1284
- with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
1285
- gr.Markdown("# 🎭 Multilingual Sentiment Analyzer")
1286
- gr.Markdown("Comprehensive sentiment analysis with batch processing and multilingual support")
1287
 
1288
  with gr.Tab("📝 Single Analysis"):
1289
  with gr.Row():
@@ -1320,140 +950,106 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer")
1320
  )
1321
 
1322
  with gr.Column(scale=1):
1323
- result_info = gr.Markdown("Enter text and click Analyze to see results")
1324
 
1325
  with gr.Row():
1326
  gauge_plot = gr.Plot(label="Sentiment Gauge")
1327
  bars_plot = gr.Plot(label="Probability Distribution")
1328
-
1329
- with gr.Tab("🔬 Advanced Analysis"):
1330
  with gr.Row():
1331
  with gr.Column(scale=2):
1332
- advanced_input = gr.Textbox(
1333
- label="Text for Advanced Analysis",
1334
- placeholder="Enter text for explainability analysis...",
1335
- lines=4
1336
  )
1337
 
1338
  with gr.Row():
1339
- advanced_language = gr.Dropdown(
1340
  choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
1341
  value='Auto Detect',
1342
  label="Language"
1343
  )
1344
- advanced_theme = gr.Dropdown(
1345
  choices=list(config.THEMES.keys()),
1346
  value='default',
1347
  label="Theme"
1348
  )
1349
 
1350
- gr.Markdown("### 🔍 Explainability Options")
1351
- gr.Markdown("**LIME** shows which words influence sentiment most. **Attention** shows which tokens the model focuses on.")
1352
-
1353
  with gr.Row():
1354
- use_lime = gr.Checkbox(
1355
- label="🔍 Use LIME Analysis",
1356
- value=True,
1357
- info="Explains feature importance (requires: pip install lime)"
1358
- )
1359
- use_attention = gr.Checkbox(
1360
- label="👁️ Use Attention Weights",
1361
- value=True,
1362
- info="Shows token-level attention patterns"
1363
- )
1364
-
1365
- lime_features = gr.Slider(
1366
- minimum=5,
1367
- maximum=20,
1368
- value=10,
1369
- step=1,
1370
- label="LIME Features Count",
1371
- info="Number of top features to analyze"
1372
- )
1373
 
1374
- advanced_analyze_btn = gr.Button("🔬 Advanced Analyze", variant="primary", size="lg")
1375
 
1376
  gr.Examples(
1377
- examples=[
1378
- ["This movie is absolutely fantastic! The acting is superb and the plot is engaging."],
1379
- ["I'm not sure how I feel about this product. It has some good features but also some issues."],
1380
- ["The service was terrible and the staff was very rude. I will never come back here again."]
1381
- ],
1382
- inputs=advanced_input,
1383
- label="Sample Texts for Advanced Analysis"
1384
  )
1385
 
1386
  with gr.Column(scale=1):
1387
- advanced_result_info = gr.Markdown("""
1388
- **Advanced Analysis Features:**
1389
-
1390
- 🔍 **LIME (Local Interpretable Model-agnostic Explanations)**
1391
- - Shows which words contribute most to the sentiment prediction
1392
- - Red bars = pushes toward negative sentiment
1393
- - Green bars = pushes toward positive sentiment
1394
-
1395
- 👁️ **Attention Weights**
1396
- - Visualizes which tokens the model pays attention to
1397
- - Darker/higher bars = more attention from the model
1398
- - Helps understand model focus patterns
1399
-
1400
- Configure explainability settings and click **Advanced Analyze** to start.
1401
- """)
1402
 
1403
  with gr.Row():
1404
- advanced_gauge_plot = gr.Plot(label="Sentiment Gauge")
1405
- advanced_bars_plot = gr.Plot(label="Probability Distribution")
 
 
1406
 
1407
  with gr.Row():
1408
- lime_plot = gr.Plot(label="🔍 LIME Feature Importance")
1409
- attention_plot = gr.Plot(label="👁️ Attention Weights")
1410
-
1411
- with gr.Tab("📊 Batch Analysis"):
1412
  with gr.Row():
1413
  with gr.Column(scale=2):
1414
- batch_input = gr.Textbox(
1415
- label="Batch Text Input (One text per line)",
1416
- placeholder="Enter multiple texts, one per line...",
1417
- lines=8
1418
  )
1419
 
1420
  with gr.Row():
1421
- batch_language = gr.Dropdown(
1422
  choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
1423
  value='Auto Detect',
1424
  label="Language"
1425
  )
1426
- batch_theme = gr.Dropdown(
1427
  choices=list(config.THEMES.keys()),
1428
  value='default',
1429
  label="Theme"
1430
  )
1431
 
1432
  with gr.Row():
1433
- batch_clean = gr.Checkbox(label="Clean Text", value=False)
1434
- batch_remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
1435
- batch_remove_nums = gr.Checkbox(label="Remove Numbers", value=False)
1436
-
1437
- batch_analyze_btn = gr.Button("🔍 Analyze Batch", variant="primary", size="lg")
 
 
 
1438
 
1439
- gr.Examples(
1440
- examples=[[BATCH_SAMPLE]],
1441
- inputs=batch_input,
1442
- label="Sample Batch Input"
 
 
1443
  )
 
 
1444
 
1445
  with gr.Column(scale=1):
1446
- batch_summary = gr.Markdown("Enter texts and click Analyze Batch to see results")
1447
 
1448
  with gr.Row():
1449
- batch_results_table = gr.DataFrame(
1450
- label="Detailed Results",
1451
- interactive=False
1452
- )
1453
-
1454
- with gr.Row():
1455
- batch_summary_plot = gr.Plot(label="Sentiment Summary")
1456
- batch_confidence_plot = gr.Plot(label="Confidence Distribution")
1457
 
1458
  with gr.Tab("📈 History & Analytics"):
1459
  with gr.Row():
@@ -1514,13 +1110,6 @@ Configure explainability settings and click **Advanced Analyze** to start.
1514
  outputs=[result_info, gauge_plot, bars_plot]
1515
  )
1516
 
1517
- # Advanced Analysis
1518
- advanced_analyze_btn.click(
1519
- analyze_advanced_text,
1520
- inputs=[advanced_input, advanced_language, advanced_theme, use_lime, use_attention, lime_features],
1521
- outputs=[advanced_result_info, advanced_gauge_plot, advanced_bars_plot, lime_plot, attention_plot]
1522
- )
1523
-
1524
  # Batch Analysis
1525
  batch_analyze_btn.click(
1526
  analyze_batch_texts,
@@ -1528,6 +1117,13 @@ Configure explainability settings and click **Advanced Analyze** to start.
1528
  outputs=[batch_summary, batch_results_table, batch_summary_plot, batch_confidence_plot]
1529
  )
1530
 
 
 
 
 
 
 
 
1531
  # History & Analytics
1532
  stats_btn.click(
1533
  get_history_stats,
@@ -1565,17 +1161,5 @@ Configure explainability settings and click **Advanced Analyze** to start.
1565
  outputs=history_status
1566
  )
1567
 
1568
- # 启动应用
1569
  if __name__ == "__main__":
1570
- try:
1571
- logger.info("Starting Multilingual Sentiment Analyzer...")
1572
- demo.launch(
1573
- share=True,
1574
- server_name="0.0.0.0",
1575
- server_port=7860,
1576
- show_error=True
1577
- )
1578
- except Exception as e:
1579
- logger.error(f"Failed to launch application: {e}")
1580
- print(f"❌ Application failed to start: {e}")
1581
- print("Please check your dependencies and try again.")
 
5
  import plotly.express as px
6
  from plotly.subplots import make_subplots
7
  import numpy as np
8
+ from wordcloud import WordCloud
9
  from collections import Counter, defaultdict
10
  import re
11
  import json
 
17
  from functools import lru_cache
18
  from dataclasses import dataclass
19
  from typing import List, Dict, Optional, Tuple
20
+ import nltk
21
+ from nltk.corpus import stopwords
22
+ import langdetect
23
  import pandas as pd
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # Configuration
26
  @dataclass
27
  class Config:
 
41
  'sv': 'Swedish'
42
  }
43
 
 
44
  MODELS = {
45
  'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
46
  'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
47
+ 'zh': "uer/roberta-base-finetuned-dianping-chinese"
 
 
48
  }
49
 
50
  # Color themes
 
57
 
58
  config = Config()
59
 
60
+ # Logging setup
61
+ logging.basicConfig(level=logging.INFO)
62
+ logger = logging.getLogger(__name__)
63
+
64
+ # Initialize NLTK
65
+ try:
66
+ nltk.download('stopwords', quiet=True)
67
+ nltk.download('punkt', quiet=True)
68
+ STOP_WORDS = set(stopwords.words('english'))
69
+ except:
70
+ STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
71
+
72
  class ModelManager:
73
+ """Manages multiple language models"""
74
  def __init__(self):
75
  self.models = {}
76
  self.tokenizers = {}
77
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
78
+ self._load_default_model()
 
79
 
80
+ def _load_default_model(self):
81
+ """Load the default models"""
82
  try:
83
+ # Load multilingual model as default
 
 
 
 
 
 
 
 
 
 
 
 
84
  model_name = config.MODELS['multilingual']
 
 
85
  self.tokenizers['default'] = AutoTokenizer.from_pretrained(model_name)
86
  self.models['default'] = AutoModelForSequenceClassification.from_pretrained(model_name)
87
  self.models['default'].to(self.device)
88
+ logger.info(f"Default model loaded: {model_name}")
89
 
90
+ # Load Chinese model
91
+ zh_model_name = config.MODELS['zh']
92
+ self.tokenizers['zh'] = AutoTokenizer.from_pretrained(zh_model_name)
93
+ self.models['zh'] = AutoModelForSequenceClassification.from_pretrained(zh_model_name)
94
+ self.models['zh'].to(self.device)
95
+ logger.info(f"Chinese model loaded: {zh_model_name}")
96
 
97
  except Exception as e:
98
+ logger.error(f"Failed to load models: {e}")
99
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  def get_model(self, language='en'):
102
  """Get model for specific language"""
103
+ if language == 'zh':
104
+ return self.models['zh'], self.tokenizers['zh']
105
+ elif language in ['en', 'auto'] or language not in config.SUPPORTED_LANGUAGES:
106
+ return self.models['default'], self.tokenizers['default']
107
+ return self.models['default'], self.tokenizers['default'] # Use multilingual for other languages
108
 
109
  @staticmethod
110
  def detect_language(text: str) -> str:
111
+ """Detect text language properly"""
 
 
 
 
 
 
 
112
  try:
113
+ # Use langdetect for all languages
114
  detected = langdetect.detect(text)
115
+ # Map some common langdetect codes to our supported languages
116
  language_mapping = {
117
  'zh-cn': 'zh',
118
  'zh-tw': 'zh'
 
122
  except:
123
  return 'en'
124
 
125
+ model_manager = ModelManager()
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  class HistoryManager:
128
+ """Enhanced history manager with more features"""
129
  def __init__(self):
130
  self._history = []
131
 
 
135
  self._history = self._history[-config.MAX_HISTORY_SIZE:]
136
 
137
  def add_batch_entries(self, entries: List[Dict]):
138
+ """Add multiple entries at once"""
139
  for entry in entries:
140
  self.add_entry(entry)
141
 
 
143
  return self._history.copy()
144
 
145
  def get_recent_history(self, n: int = 10) -> List[Dict]:
146
+ """Get n most recent entries"""
147
  return self._history[-n:] if self._history else []
148
 
149
  def filter_history(self, sentiment: str = None, language: str = None,
150
  min_confidence: float = None) -> List[Dict]:
151
+ """Filter history by criteria"""
152
  filtered = self._history
153
 
154
  if sentiment:
 
178
  'positive_count': sentiments.count('Positive'),
179
  'negative_count': sentiments.count('Negative'),
180
  'neutral_count': sentiments.count('Neutral'),
181
+ 'avg_confidence': np.mean(confidences),
182
+ 'max_confidence': np.max(confidences),
183
+ 'min_confidence': np.min(confidences),
184
  'languages_detected': len(set(languages)),
185
  'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en',
186
+ 'avg_text_length': np.mean([len(item.get('full_text', '')) for item in self._history])
187
  }
188
 
189
  history_manager = HistoryManager()
 
210
  @staticmethod
211
  def extract_keywords(text: str, top_k: int = 5) -> List[str]:
212
  """Extract key words from text"""
213
+ # For Chinese text, extract characters
214
  if re.search(r'[\u4e00-\u9fff]', text):
215
  words = re.findall(r'[\u4e00-\u9fff]+', text)
216
  all_chars = ''.join(words)
217
  char_freq = Counter(all_chars)
218
  return [char for char, _ in char_freq.most_common(top_k)]
219
  else:
220
+ # For other languages, use word-based extraction
221
  cleaned = TextProcessor.clean_text(text)
222
  words = cleaned.split()
223
  word_freq = Counter(words)
 
230
  return [line.strip() for line in lines if line.strip()]
231
 
232
  class SentimentAnalyzer:
233
+ """Enhanced sentiment analysis"""
234
 
235
  @staticmethod
236
  def analyze_text(text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
 
238
  if not text.strip():
239
  raise ValueError("Empty text provided")
240
 
 
 
 
 
241
  # Detect language if auto
242
  if language == 'auto':
243
  detected_lang = model_manager.detect_language(text)
 
245
  detected_lang = language
246
 
247
  # Get appropriate model
248
+ model, tokenizer = model_manager.get_model(detected_lang)
 
 
 
 
249
 
250
+ # Preprocessing options - don't clean Chinese text
251
  options = preprocessing_options or {}
252
  processed_text = text
253
  if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
 
259
 
260
  try:
261
  # Tokenize and analyze
262
+ inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
263
+ truncation=True, max_length=config.MAX_TEXT_LENGTH).to(model_manager.device)
 
 
 
 
 
264
 
265
  with torch.no_grad():
266
  outputs = model(**inputs)
 
307
 
308
  except Exception as e:
309
  logger.error(f"Analysis failed: {e}")
310
+ raise
311
 
312
  @staticmethod
313
  def analyze_batch(texts: List[str], language: str = 'auto',
 
320
  result['batch_index'] = i
321
  results.append(result)
322
  except Exception as e:
323
+ # Add error result
324
  results.append({
325
  'sentiment': 'Error',
326
  'confidence': 0.0,
327
  'error': str(e),
328
  'batch_index': i,
329
+ 'text': text
330
  })
331
  return results
332
 
333
+ class PlotlyVisualizer:
334
+ """Enhanced visualizations with Plotly"""
335
 
336
  @staticmethod
337
+ def create_sentiment_gauge(result: Dict, theme: str = 'default') -> go.Figure:
338
+ """Create an animated sentiment gauge"""
339
+ colors = config.THEMES[theme]
340
+
341
+ if result['has_neutral']:
342
+ # Three-way gauge
343
+ fig = go.Figure(go.Indicator(
344
+ mode = "gauge+number+delta",
345
+ value = result['pos_prob'] * 100,
346
+ domain = {'x': [0, 1], 'y': [0, 1]},
347
+ title = {'text': f"Sentiment: {result['sentiment']}"},
348
+ delta = {'reference': 50},
349
+ gauge = {
350
+ 'axis': {'range': [None, 100]},
351
+ 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
352
+ 'steps': [
353
+ {'range': [0, 33], 'color': colors['neg']},
354
+ {'range': [33, 67], 'color': colors['neu']},
355
+ {'range': [67, 100], 'color': colors['pos']}
356
+ ],
357
+ 'threshold': {
358
+ 'line': {'color': "red", 'width': 4},
359
+ 'thickness': 0.75,
360
+ 'value': 90
361
+ }
362
+ }
363
+ ))
364
+ else:
365
+ # Two-way gauge
366
+ fig = go.Figure(go.Indicator(
367
+ mode = "gauge+number",
368
+ value = result['confidence'] * 100,
369
+ domain = {'x': [0, 1], 'y': [0, 1]},
370
+ title = {'text': f"Confidence: {result['sentiment']}"},
371
+ gauge = {
372
+ 'axis': {'range': [None, 100]},
373
+ 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
374
+ 'steps': [
375
+ {'range': [0, 50], 'color': "lightgray"},
376
+ {'range': [50, 100], 'color': "gray"}
377
+ ]
378
+ }
379
+ ))
380
 
381
+ fig.update_layout(height=400, font={'size': 16})
382
+ return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
  @staticmethod
385
+ def create_probability_bars(result: Dict, theme: str = 'default') -> go.Figure:
386
+ """Create probability bar chart"""
387
+ colors = config.THEMES[theme]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
+ if result['has_neutral']:
390
+ labels = ['Negative', 'Neutral', 'Positive']
391
+ values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
392
+ bar_colors = [colors['neg'], colors['neu'], colors['pos']]
393
+ else:
394
+ labels = ['Negative', 'Positive']
395
+ values = [result['neg_prob'], result['pos_prob']]
396
+ bar_colors = [colors['neg'], colors['pos']]
397
 
398
  fig = go.Figure(data=[
399
+ go.Bar(x=labels, y=values, marker_color=bar_colors, text=[f'{v:.3f}' for v in values])
 
 
 
 
 
 
 
 
400
  ])
401
 
402
+ fig.update_traces(texttemplate='%{text}', textposition='outside')
403
  fig.update_layout(
404
+ title="Sentiment Probabilities",
405
+ yaxis_title="Probability",
 
406
  height=400,
407
  showlegend=False
408
  )
409
 
410
  return fig
411
+
412
  @staticmethod
413
+ def create_batch_summary(results: List[Dict], theme: str = 'default') -> go.Figure:
414
+ """Create batch analysis summary"""
415
+ colors = config.THEMES[theme]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
 
417
+ # Count sentiments
418
+ sentiments = [r['sentiment'] for r in results if 'sentiment' in r]
419
+ sentiment_counts = Counter(sentiments)
420
 
421
+ # Create pie chart
422
+ fig = go.Figure(data=[go.Pie(
423
+ labels=list(sentiment_counts.keys()),
424
+ values=list(sentiment_counts.values()),
425
+ marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
426
+ textinfo='label+percent',
427
+ hole=0.3
428
+ )])
429
 
430
+ fig.update_layout(
431
+ title=f"Batch Analysis Summary ({len(results)} texts)",
432
+ height=400
433
+ )
 
 
 
434
 
435
+ return fig
436
+
437
+ @staticmethod
438
+ def create_confidence_distribution(results: List[Dict]) -> go.Figure:
439
+ """Create confidence distribution plot"""
440
+ confidences = [r['confidence'] for r in results if 'confidence' in r and r['sentiment'] != 'Error']
441
+
442
+ if not confidences:
443
+ return go.Figure()
444
+
445
+ fig = go.Figure(data=[go.Histogram(
446
+ x=confidences,
447
+ nbinsx=20,
448
+ marker_color='skyblue',
449
+ opacity=0.7
450
+ )])
451
 
452
  fig.update_layout(
453
+ title="Confidence Distribution",
454
+ xaxis_title="Confidence Score",
455
+ yaxis_title="Frequency",
456
+ height=400
 
 
457
  )
458
 
459
  return fig
460
 
461
  @staticmethod
462
+ def create_history_dashboard(history: List[Dict]) -> go.Figure:
463
+ """Create comprehensive history dashboard"""
464
+ if len(history) < 2:
465
+ return go.Figure()
466
 
467
+ # Create subplots
468
+ fig = make_subplots(
469
+ rows=2, cols=2,
470
+ subplot_titles=['Sentiment Timeline', 'Confidence Distribution',
471
+ 'Language Distribution', 'Sentiment Summary'],
472
+ specs=[[{"secondary_y": False}, {"secondary_y": False}],
473
+ [{"type": "pie"}, {"type": "bar"}]]
474
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
476
+ # Extract data
477
+ indices = list(range(len(history)))
478
+ pos_probs = [item['pos_prob'] for item in history]
479
+ confidences = [item['confidence'] for item in history]
480
+ sentiments = [item['sentiment'] for item in history]
481
+ languages = [item.get('language', 'en') for item in history]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
 
483
+ # Sentiment timeline
484
+ colors = ['#4CAF50' if s == 'Positive' else '#F44336' for s in sentiments]
485
+ fig.add_trace(
486
+ go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
487
+ marker=dict(color=colors, size=8),
488
+ name='Positive Probability'),
489
+ row=1, col=1
490
+ )
491
+
492
+ # Confidence distribution
493
+ fig.add_trace(
494
+ go.Histogram(x=confidences, nbinsx=10, name='Confidence'),
495
+ row=1, col=2
496
+ )
497
+
498
+ # Language distribution
499
+ lang_counts = Counter(languages)
500
+ fig.add_trace(
501
+ go.Pie(labels=list(lang_counts.keys()), values=list(lang_counts.values()),
502
+ name="Languages"),
503
+ row=2, col=1
504
+ )
505
+
506
+ # Sentiment summary
507
+ sent_counts = Counter(sentiments)
508
+ fig.add_trace(
509
+ go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
510
+ marker_color=['#4CAF50' if k == 'Positive' else '#F44336' for k in sent_counts.keys()]),
511
+ row=2, col=2
512
+ )
513
+
514
+ fig.update_layout(height=800, showlegend=False)
515
+ return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
 
517
+ # Main application functions
518
  def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
519
  remove_punct: bool, remove_nums: bool):
520
+ """Enhanced single text analysis"""
521
  try:
522
  if not text.strip():
523
+ return "Please enter text", None, None
 
 
 
 
524
 
525
  # Map display names back to language codes
526
  language_map = {
 
540
  'remove_numbers': remove_nums
541
  }
542
 
 
543
  result = SentimentAnalyzer.analyze_text(text, language_code, preprocessing_options)
544
 
545
  # Add to history
 
558
  history_manager.add_entry(history_entry)
559
 
560
  # Create visualizations
561
+ gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme)
562
+ bars_fig = PlotlyVisualizer.create_probability_bars(result, theme)
563
 
564
  # Create info text
565
  info_text = f"""
566
+ **Analysis Results:**
567
+ - **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
568
  - **Language:** {result['language'].upper()}
569
+ - **Keywords:** {', '.join(result['keywords'])}
570
+ - **Stats:** {result['word_count']} words, {result['char_count']} characters
 
 
 
 
 
571
  """
572
 
573
  return info_text, gauge_fig, bars_fig
574
 
575
  except Exception as e:
576
+ logger.error(f"Analysis failed: {e}")
577
+ return f"Error: {str(e)}", None, None
 
578
 
579
  def analyze_batch_texts(batch_text: str, language: str, theme: str,
580
  clean_text: bool, remove_punct: bool, remove_nums: bool):
581
+ """Batch text analysis"""
582
  try:
583
  if not batch_text.strip():
584
+ return "Please enter texts (one per line)", None, None, None
 
 
 
 
585
 
586
  # Parse batch input
587
  texts = TextProcessor.parse_batch_input(batch_text)
588
 
589
  if len(texts) > config.BATCH_SIZE_LIMIT:
590
+ return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
591
 
592
  if not texts:
593
+ return "No valid texts found", None, None, None
594
 
595
  # Map display names back to language codes
596
  language_map = {
 
635
  history_manager.add_batch_entries(batch_entries)
636
 
637
  # Create visualizations
638
+ summary_fig = PlotlyVisualizer.create_batch_summary(results, theme)
639
+ confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
640
 
641
  # Create results table
642
  df_data = []
 
646
  'Index': i+1,
647
  'Text': text[:50] + '...' if len(text) > 50 else text,
648
  'Sentiment': 'Error',
649
+ 'Confidence': 0.0,
650
  'Language': 'Unknown',
651
+ 'Error': result['error']
652
  })
653
  else:
654
  df_data.append({
 
657
  'Sentiment': result['sentiment'],
658
  'Confidence': f"{result['confidence']:.3f}",
659
  'Language': result['language'].upper(),
660
+ 'Keywords': ', '.join(result['keywords'][:3])
661
  })
662
 
663
  df = pd.DataFrame(df_data)
 
671
  avg_confidence = np.mean([r['confidence'] for r in successful_results])
672
 
673
  summary_text = f"""
674
+ **Batch Analysis Summary:**
675
  - **Total Texts:** {len(texts)}
676
  - **Successful:** {len(successful_results)}
677
  - **Errors:** {error_count}
678
  - **Average Confidence:** {avg_confidence:.3f}
679
+ - **Sentiments:** {dict(sentiment_counts)}
 
 
 
680
  """
681
  else:
682
+ summary_text = f"All {len(texts)} texts failed to analyze."
683
 
684
  return summary_text, df, summary_fig, confidence_fig
685
 
686
  except Exception as e:
687
  logger.error(f"Batch analysis failed: {e}")
688
+ return f"Error: {str(e)}", None, None, None
 
 
689
 
690
+ def analyze_advanced_text(text: str, language: str, theme: str, include_keywords: bool,
691
+ keyword_count: int, min_confidence: float):
692
+ """Advanced analysis with additional features"""
693
+ try:
694
+ if not text.strip():
695
+ return "Please enter text", None, None
696
+
697
+ # Map display names back to language codes
698
+ language_map = {
699
+ 'Auto Detect': 'auto',
700
+ 'English': 'en',
701
+ 'Chinese': 'zh',
702
+ 'Spanish': 'es',
703
+ 'French': 'fr',
704
+ 'German': 'de',
705
+ 'Swedish': 'sv'
706
+ }
707
+ language_code = language_map.get(language, 'auto')
708
+
709
+ result = SentimentAnalyzer.analyze_text(text, language_code)
710
+
711
+ # Advanced keyword extraction
712
+ if include_keywords:
713
+ result['keywords'] = TextProcessor.extract_keywords(text, keyword_count)
714
+
715
+ # Confidence filtering
716
+ meets_confidence = result['confidence'] >= min_confidence
717
+
718
+ # Add to history
719
+ history_entry = {
720
+ 'text': text[:100] + '...' if len(text) > 100 else text,
721
+ 'full_text': text,
722
+ 'sentiment': result['sentiment'],
723
+ 'confidence': result['confidence'],
724
+ 'pos_prob': result['pos_prob'],
725
+ 'neg_prob': result['neg_prob'],
726
+ 'neu_prob': result.get('neu_prob', 0),
727
+ 'language': result['language'],
728
+ 'timestamp': datetime.now().isoformat(),
729
+ 'analysis_type': 'advanced',
730
+ 'meets_confidence_threshold': meets_confidence
731
+ }
732
+ history_manager.add_entry(history_entry)
733
+
734
+ # Create visualizations
735
+ gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme)
736
+ bars_fig = PlotlyVisualizer.create_probability_bars(result, theme)
737
+
738
+ # Create detailed info text
739
+ confidence_status = "✅ High Confidence" if meets_confidence else "⚠️ Low Confidence"
740
+
741
+ info_text = f"""
742
+ **Advanced Analysis Results:**
743
+ - **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
744
+ - **Confidence Status:** {confidence_status}
745
+ - **Language:** {result['language'].upper()}
746
+ - **Text Statistics:**
747
+ - Words: {result['word_count']}
748
+ - Characters: {result['char_count']}
749
+ - Average word length: {result['char_count']/max(result['word_count'], 1):.1f}
750
  """
751
 
752
+ if include_keywords:
753
+ info_text += f"\n- **Top Keywords:** {', '.join(result['keywords'])}"
754
+
755
+ if not meets_confidence:
756
+ info_text += f"\n\n⚠️ **Note:** Confidence ({result['confidence']:.3f}) is below threshold ({min_confidence})"
757
+
758
+ return info_text, gauge_fig, bars_fig
759
 
760
  except Exception as e:
761
  logger.error(f"Advanced analysis failed: {e}")
762
+ return f"Error: {str(e)}", None, None
763
+
764
+ def get_history_stats():
 
 
 
 
765
  """Get enhanced history statistics"""
766
+ stats = history_manager.get_stats()
767
+ if not stats:
768
+ return "No analysis history available"
769
+
770
+ return f"""
771
+ **Comprehensive History Statistics:**
 
772
 
773
+ **Analysis Counts:**
774
  - Total Analyses: {stats['total_analyses']}
775
+ - Positive: {stats['positive_count']}
776
+ - Negative: {stats['negative_count']}
777
+ - Neutral: {stats['neutral_count']}
778
 
779
+ **Confidence Metrics:**
780
  - Average Confidence: {stats['avg_confidence']:.3f}
781
  - Highest Confidence: {stats['max_confidence']:.3f}
782
  - Lowest Confidence: {stats['min_confidence']:.3f}
783
 
784
+ **Language Statistics:**
785
  - Languages Detected: {stats['languages_detected']}
786
  - Most Common Language: {stats['most_common_language'].upper()}
787
 
788
+ **Text Statistics:**
789
  - Average Text Length: {stats['avg_text_length']:.1f} characters
790
+ """
 
 
 
791
 
792
  def filter_history_display(sentiment_filter: str, language_filter: str, min_confidence: float):
793
  """Display filtered history"""
794
+ # Convert filters
795
+ sentiment = sentiment_filter if sentiment_filter != "All" else None
796
+ language = language_filter.lower() if language_filter != "All" else None
797
+
798
+ filtered_history = history_manager.filter_history(
799
+ sentiment=sentiment,
800
+ language=language,
801
+ min_confidence=min_confidence if min_confidence > 0 else None
802
+ )
803
+
804
+ if not filtered_history:
805
+ return "No entries match the filter criteria", None
806
+
807
+ # Create DataFrame for display
808
+ df_data = []
809
+ for entry in filtered_history[-20:]: # Show last 20 entries
810
+ df_data.append({
811
+ 'Timestamp': entry['timestamp'][:16], # YYYY-MM-DD HH:MM
812
+ 'Text': entry['text'],
813
+ 'Sentiment': entry['sentiment'],
814
+ 'Confidence': f"{entry['confidence']:.3f}",
815
+ 'Language': entry['language'].upper(),
816
+ 'Type': entry.get('analysis_type', 'single')
817
+ })
818
+
819
+ df = pd.DataFrame(df_data)
820
+
821
+ summary = f"""
822
+ **Filtered Results:**
 
823
  - Found {len(filtered_history)} entries matching criteria
824
  - Showing most recent {min(20, len(filtered_history))} entries
825
+ """
826
+
827
+ return summary, df
 
 
 
 
828
 
829
  def plot_history_dashboard():
830
+ """Create history dashboard"""
831
+ history = history_manager.get_history()
832
+ if len(history) < 2:
833
+ return None, "Need at least 2 analyses for dashboard"
834
+
835
+ fig = PlotlyVisualizer.create_history_dashboard(history)
836
+ return fig, f"Dashboard showing {len(history)} analyses"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
 
838
  def export_history_csv():
839
  """Export history to CSV"""
840
+ history = history_manager.get_history()
841
+ if not history:
842
+ return None, "No history to export"
843
+
844
  try:
 
 
 
 
845
  df = pd.DataFrame(history)
846
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w')
847
  df.to_csv(temp_file.name, index=False)
848
+ return temp_file.name, f"Exported {len(history)} entries to CSV"
849
  except Exception as e:
850
+ return None, f"Export failed: {str(e)}"
 
851
 
852
  def export_history_excel():
853
  """Export history to Excel"""
854
+ history = history_manager.get_history()
855
+ if not history:
856
+ return None, "No history to export"
857
+
858
  try:
 
 
 
 
859
  df = pd.DataFrame(history)
860
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
861
  df.to_excel(temp_file.name, index=False)
862
+ return temp_file.name, f"Exported {len(history)} entries to Excel"
863
  except Exception as e:
864
+ return None, f"Export failed: {str(e)}"
 
865
 
866
  def clear_all_history():
867
  """Clear analysis history"""
868
+ count = history_manager.clear()
869
+ return f"Cleared {count} entries from history"
 
 
 
 
870
 
871
  def get_recent_analyses():
872
  """Get recent analysis summary"""
873
+ recent = history_manager.get_recent_history(10)
874
+ if not recent:
875
+ return "No recent analyses available"
876
+
877
+ summary_text = "**Recent Analyses (Last 10):**\n\n"
878
+ for i, entry in enumerate(recent, 1):
879
+ summary_text += f"{i}. **{entry['sentiment']}** ({entry['confidence']:.3f}) - {entry['text']}\n"
880
+
881
+ return summary_text
 
 
 
 
 
882
 
883
  # Sample data
884
  SAMPLE_TEXTS = [
 
910
  Amazing quality and fast delivery!
911
  Could be better, but it's okay."""
912
 
913
+ # Gradio Interface
914
+ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Multilingual Sentiment Analyzer") as demo:
915
+ gr.Markdown("# 🎭 Advanced Multilingual Sentiment Analyzer")
916
+ gr.Markdown("Comprehensive sentiment analysis with batch processing, advanced analytics, and multilingual support")
917
 
918
  with gr.Tab("📝 Single Analysis"):
919
  with gr.Row():
 
950
  )
951
 
952
  with gr.Column(scale=1):
953
+ result_info = gr.Markdown("Enter text and click Analyze")
954
 
955
  with gr.Row():
956
  gauge_plot = gr.Plot(label="Sentiment Gauge")
957
  bars_plot = gr.Plot(label="Probability Distribution")
958
+
959
+ with gr.Tab("📊 Batch Analysis"):
960
  with gr.Row():
961
  with gr.Column(scale=2):
962
+ batch_input = gr.Textbox(
963
+ label="Batch Text Input (One text per line)",
964
+ placeholder="Enter multiple texts, one per line...",
965
+ lines=8
966
  )
967
 
968
  with gr.Row():
969
+ batch_language = gr.Dropdown(
970
  choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
971
  value='Auto Detect',
972
  label="Language"
973
  )
974
+ batch_theme = gr.Dropdown(
975
  choices=list(config.THEMES.keys()),
976
  value='default',
977
  label="Theme"
978
  )
979
 
 
 
 
980
  with gr.Row():
981
+ batch_clean = gr.Checkbox(label="Clean Text", value=False)
982
+ batch_remove_punct = gr.Checkbox(label="Remove Punctuation", value=True)
983
+ batch_remove_nums = gr.Checkbox(label="Remove Numbers", value=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
984
 
985
+ batch_analyze_btn = gr.Button("🔍 Analyze Batch", variant="primary", size="lg")
986
 
987
  gr.Examples(
988
+ examples=[[BATCH_SAMPLE]],
989
+ inputs=batch_input,
990
+ label="Sample Batch Input"
 
 
 
 
991
  )
992
 
993
  with gr.Column(scale=1):
994
+ batch_summary = gr.Markdown("Enter texts and click Analyze Batch")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
995
 
996
  with gr.Row():
997
+ batch_results_table = gr.DataFrame(
998
+ label="Detailed Results",
999
+ interactive=False
1000
+ )
1001
 
1002
  with gr.Row():
1003
+ batch_summary_plot = gr.Plot(label="Sentiment Summary")
1004
+ batch_confidence_plot = gr.Plot(label="Confidence Distribution")
1005
+
1006
+ with gr.Tab("🔬 Advanced Analysis"):
1007
  with gr.Row():
1008
  with gr.Column(scale=2):
1009
+ advanced_input = gr.Textbox(
1010
+ label="Text for Advanced Analysis",
1011
+ placeholder="Enter text for detailed analysis...",
1012
+ lines=4
1013
  )
1014
 
1015
  with gr.Row():
1016
+ advanced_language = gr.Dropdown(
1017
  choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
1018
  value='Auto Detect',
1019
  label="Language"
1020
  )
1021
+ advanced_theme = gr.Dropdown(
1022
  choices=list(config.THEMES.keys()),
1023
  value='default',
1024
  label="Theme"
1025
  )
1026
 
1027
  with gr.Row():
1028
+ include_keywords = gr.Checkbox(label="Extract Keywords", value=True)
1029
+ keyword_count = gr.Slider(
1030
+ minimum=3,
1031
+ maximum=10,
1032
+ value=5,
1033
+ step=1,
1034
+ label="Number of Keywords"
1035
+ )
1036
 
1037
+ min_confidence_slider = gr.Slider(
1038
+ minimum=0.0,
1039
+ maximum=1.0,
1040
+ value=0.7,
1041
+ step=0.1,
1042
+ label="Minimum Confidence Threshold"
1043
  )
1044
+
1045
+ advanced_analyze_btn = gr.Button("🔬 Advanced Analyze", variant="primary", size="lg")
1046
 
1047
  with gr.Column(scale=1):
1048
+ advanced_result_info = gr.Markdown("Configure settings and click Advanced Analyze")
1049
 
1050
  with gr.Row():
1051
+ advanced_gauge_plot = gr.Plot(label="Sentiment Gauge")
1052
+ advanced_bars_plot = gr.Plot(label="Probability Distribution")
 
 
 
 
 
 
1053
 
1054
  with gr.Tab("📈 History & Analytics"):
1055
  with gr.Row():
 
1110
  outputs=[result_info, gauge_plot, bars_plot]
1111
  )
1112
 
 
 
 
 
 
 
 
1113
  # Batch Analysis
1114
  batch_analyze_btn.click(
1115
  analyze_batch_texts,
 
1117
  outputs=[batch_summary, batch_results_table, batch_summary_plot, batch_confidence_plot]
1118
  )
1119
 
1120
+ # Advanced Analysis
1121
+ advanced_analyze_btn.click(
1122
+ analyze_advanced_text,
1123
+ inputs=[advanced_input, advanced_language, advanced_theme, include_keywords, keyword_count, min_confidence_slider],
1124
+ outputs=[advanced_result_info, advanced_gauge_plot, advanced_bars_plot]
1125
+ )
1126
+
1127
  # History & Analytics
1128
  stats_btn.click(
1129
  get_history_stats,
 
1161
  outputs=history_status
1162
  )
1163
 
 
1164
  if __name__ == "__main__":
1165
+ demo.launch(share=True)