entropy25 commited on
Commit
5eb9344
ยท
verified ยท
1 Parent(s): d70aba4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +494 -736
app.py CHANGED
@@ -5,7 +5,6 @@ import plotly.graph_objects as go
5
  import plotly.express as px
6
  from plotly.subplots import make_subplots
7
  import numpy as np
8
- from wordcloud import WordCloud
9
  from collections import Counter, defaultdict
10
  import re
11
  import json
@@ -17,12 +16,40 @@ import logging
17
  from functools import lru_cache
18
  from dataclasses import dataclass
19
  from typing import List, Dict, Optional, Tuple
20
- import nltk
21
- from nltk.corpus import stopwords
22
- import langdetect
23
  import pandas as pd
24
 
25
- # Try to import SHAP and LIME, fall back to basic analysis if not available
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  try:
27
  import shap
28
  SHAP_AVAILABLE = True
@@ -56,10 +83,13 @@ class Config:
56
  'sv': 'Swedish'
57
  }
58
 
 
59
  MODELS = {
60
  'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
61
  'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
62
- 'zh': "uer/roberta-base-finetuned-dianping-chinese"
 
 
63
  }
64
 
65
  # Color themes
@@ -72,62 +102,80 @@ class Config:
72
 
73
  config = Config()
74
 
75
- # Logging setup
76
- logging.basicConfig(level=logging.INFO)
77
- logger = logging.getLogger(__name__)
78
-
79
- # Initialize NLTK
80
- try:
81
- nltk.download('stopwords', quiet=True)
82
- nltk.download('punkt', quiet=True)
83
- STOP_WORDS = set(stopwords.words('english'))
84
- except:
85
- STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
86
-
87
  class ModelManager:
88
- """Manages multiple language models"""
89
  def __init__(self):
90
  self.models = {}
91
  self.tokenizers = {}
92
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
93
- self._load_default_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- def _load_default_model(self):
96
- """Load the default models"""
97
  try:
98
- # Load multilingual model as default
99
  model_name = config.MODELS['multilingual']
 
 
100
  self.tokenizers['default'] = AutoTokenizer.from_pretrained(model_name)
101
  self.models['default'] = AutoModelForSequenceClassification.from_pretrained(model_name)
102
  self.models['default'].to(self.device)
103
- logger.info(f"Default model loaded: {model_name}")
104
 
105
- # Load Chinese model
106
- zh_model_name = config.MODELS['zh']
107
- self.tokenizers['zh'] = AutoTokenizer.from_pretrained(zh_model_name)
108
- self.models['zh'] = AutoModelForSequenceClassification.from_pretrained(zh_model_name)
109
- self.models['zh'].to(self.device)
110
- logger.info(f"Chinese model loaded: {zh_model_name}")
111
 
112
  except Exception as e:
113
- logger.error(f"Failed to load models: {e}")
114
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  def get_model(self, language='en'):
117
  """Get model for specific language"""
118
- if language == 'zh':
119
- return self.models['zh'], self.tokenizers['zh']
120
- elif language in ['en', 'auto'] or language not in config.SUPPORTED_LANGUAGES:
121
- return self.models['default'], self.tokenizers['default']
122
- return self.models['default'], self.tokenizers['default'] # Use multilingual for other languages
123
 
124
  @staticmethod
125
  def detect_language(text: str) -> str:
126
- """Detect text language properly"""
 
 
 
 
 
 
 
127
  try:
128
- # Use langdetect for all languages
129
  detected = langdetect.detect(text)
130
- # Map some common langdetect codes to our supported languages
131
  language_mapping = {
132
  'zh-cn': 'zh',
133
  'zh-tw': 'zh'
@@ -137,10 +185,23 @@ class ModelManager:
137
  except:
138
  return 'en'
139
 
140
- model_manager = ModelManager()
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  class HistoryManager:
143
- """Enhanced history manager with more features"""
144
  def __init__(self):
145
  self._history = []
146
 
@@ -150,7 +211,6 @@ class HistoryManager:
150
  self._history = self._history[-config.MAX_HISTORY_SIZE:]
151
 
152
  def add_batch_entries(self, entries: List[Dict]):
153
- """Add multiple entries at once"""
154
  for entry in entries:
155
  self.add_entry(entry)
156
 
@@ -158,12 +218,10 @@ class HistoryManager:
158
  return self._history.copy()
159
 
160
  def get_recent_history(self, n: int = 10) -> List[Dict]:
161
- """Get n most recent entries"""
162
  return self._history[-n:] if self._history else []
163
 
164
  def filter_history(self, sentiment: str = None, language: str = None,
165
  min_confidence: float = None) -> List[Dict]:
166
- """Filter history by criteria"""
167
  filtered = self._history
168
 
169
  if sentiment:
@@ -193,12 +251,12 @@ class HistoryManager:
193
  'positive_count': sentiments.count('Positive'),
194
  'negative_count': sentiments.count('Negative'),
195
  'neutral_count': sentiments.count('Neutral'),
196
- 'avg_confidence': np.mean(confidences),
197
- 'max_confidence': np.max(confidences),
198
- 'min_confidence': np.min(confidences),
199
  'languages_detected': len(set(languages)),
200
  'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en',
201
- 'avg_text_length': np.mean([len(item.get('full_text', '')) for item in self._history])
202
  }
203
 
204
  history_manager = HistoryManager()
@@ -225,14 +283,12 @@ class TextProcessor:
225
  @staticmethod
226
  def extract_keywords(text: str, top_k: int = 5) -> List[str]:
227
  """Extract key words from text"""
228
- # For Chinese text, extract characters
229
  if re.search(r'[\u4e00-\u9fff]', text):
230
  words = re.findall(r'[\u4e00-\u9fff]+', text)
231
  all_chars = ''.join(words)
232
  char_freq = Counter(all_chars)
233
  return [char for char, _ in char_freq.most_common(top_k)]
234
  else:
235
- # For other languages, use word-based extraction
236
  cleaned = TextProcessor.clean_text(text)
237
  words = cleaned.split()
238
  word_freq = Counter(words)
@@ -245,7 +301,7 @@ class TextProcessor:
245
  return [line.strip() for line in lines if line.strip()]
246
 
247
  class SentimentAnalyzer:
248
- """Enhanced sentiment analysis"""
249
 
250
  @staticmethod
251
  def analyze_text(text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
@@ -253,6 +309,10 @@ class SentimentAnalyzer:
253
  if not text.strip():
254
  raise ValueError("Empty text provided")
255
 
 
 
 
 
256
  # Detect language if auto
257
  if language == 'auto':
258
  detected_lang = model_manager.detect_language(text)
@@ -260,9 +320,13 @@ class SentimentAnalyzer:
260
  detected_lang = language
261
 
262
  # Get appropriate model
263
- model, tokenizer = model_manager.get_model(detected_lang)
 
 
 
 
264
 
265
- # Preprocessing options - don't clean Chinese text
266
  options = preprocessing_options or {}
267
  processed_text = text
268
  if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
@@ -274,8 +338,13 @@ class SentimentAnalyzer:
274
 
275
  try:
276
  # Tokenize and analyze
277
- inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
278
- truncation=True, max_length=config.MAX_TEXT_LENGTH).to(model_manager.device)
 
 
 
 
 
279
 
280
  with torch.no_grad():
281
  outputs = model(**inputs)
@@ -322,7 +391,7 @@ class SentimentAnalyzer:
322
 
323
  except Exception as e:
324
  logger.error(f"Analysis failed: {e}")
325
- raise
326
 
327
  @staticmethod
328
  def analyze_batch(texts: List[str], language: str = 'auto',
@@ -335,435 +404,213 @@ class SentimentAnalyzer:
335
  result['batch_index'] = i
336
  results.append(result)
337
  except Exception as e:
338
- # Add error result
339
  results.append({
340
  'sentiment': 'Error',
341
  'confidence': 0.0,
342
  'error': str(e),
343
  'batch_index': i,
344
- 'text': text
345
  })
346
  return results
347
 
348
- class ExplainabilityAnalyzer:
349
- """SHAP and LIME explainability analysis with fallbacks"""
350
-
351
- @staticmethod
352
- def create_prediction_function(model, tokenizer, device):
353
- """Create prediction function for LIME"""
354
- def predict_proba(texts):
355
- if isinstance(texts, str):
356
- texts = [texts]
357
-
358
- results = []
359
- for text in texts:
360
- try:
361
- inputs = tokenizer(text, return_tensors="pt", padding=True,
362
- truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
363
- with torch.no_grad():
364
- outputs = model(**inputs)
365
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
366
- results.append(probs)
367
- except Exception as e:
368
- # Return neutral probabilities on error
369
- if len(results) > 0:
370
- results.append(results[0]) # Use previous result
371
- else:
372
- results.append(np.array([0.33, 0.33, 0.34])) # Neutral fallback
373
-
374
- return np.array(results)
375
- return predict_proba
376
 
377
  @staticmethod
378
- def analyze_with_lime(text: str, model, tokenizer, device, num_features: int = 10) -> Dict:
379
- """Analyze text with LIME"""
380
- if not LIME_AVAILABLE:
381
- return {'method': 'LIME', 'error': 'LIME library not available'}
382
 
383
  try:
384
- # Create prediction function
385
- predict_fn = ExplainabilityAnalyzer.create_prediction_function(model, tokenizer, device)
386
-
387
- # Test prediction function first
388
- test_probs = predict_fn([text])
389
- if len(test_probs) == 0:
390
- return {'method': 'LIME', 'error': 'Prediction function failed'}
391
-
392
- # Determine class names based on model output
393
- num_classes = len(test_probs[0])
394
- if num_classes == 3:
395
- class_names = ['Negative', 'Neutral', 'Positive']
 
 
 
 
 
 
 
 
 
 
 
396
  else:
397
- class_names = ['Negative', 'Positive']
398
-
399
- # Initialize LIME explainer
400
- explainer = LimeTextExplainer(
401
- class_names=class_names,
402
- feature_selection='auto',
403
- split_expression=r'\W+',
404
- bow=False
405
- )
406
-
407
- # Generate explanation
408
- explanation = explainer.explain_instance(
409
- text,
410
- predict_fn,
411
- num_features=min(num_features, len(text.split())),
412
- num_samples=50 # Reduced for faster processing
413
- )
414
-
415
- # Extract feature importance
416
- feature_importance = explanation.as_list()
417
-
418
- return {
419
- 'method': 'LIME',
420
- 'feature_importance': feature_importance,
421
- 'class_names': class_names
422
- }
423
-
424
- except Exception as e:
425
- logger.error(f"LIME analysis failed: {e}")
426
- return {'method': 'LIME', 'error': str(e)}
427
-
428
- @staticmethod
429
- def analyze_with_attention(text: str, model, tokenizer, device) -> Dict:
430
- """Analyze text with attention weights - simplified version"""
431
- try:
432
- # Tokenize input
433
- inputs = tokenizer(text, return_tensors="pt", padding=True,
434
- truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
435
-
436
- # Get tokens for display
437
- tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
438
-
439
- # Simple attention simulation based on input importance
440
- # This is a fallback when model doesn't support attention output
441
- try:
442
- with torch.no_grad():
443
- outputs = model(**inputs, output_attentions=True)
444
- if hasattr(outputs, 'attentions') and outputs.attentions is not None:
445
- attentions = outputs.attentions
446
- # Average attention across layers and heads
447
- avg_attention = torch.mean(torch.stack(attentions), dim=(0, 1, 2)).cpu().numpy()
448
- else:
449
- raise AttributeError("No attention outputs")
450
- except:
451
- # Fallback: simulate attention based on token position and type
452
- avg_attention = np.random.uniform(0.1, 1.0, len(tokens))
453
- # Give higher attention to non-special tokens
454
- for i, token in enumerate(tokens):
455
- if token in ['[CLS]', '[SEP]', '<s>', '</s>', '<pad>']:
456
- avg_attention[i] *= 0.3
457
-
458
- # Create attention weights for each token
459
- attention_weights = []
460
- for i, token in enumerate(tokens):
461
- if i < len(avg_attention):
462
- # Clean token for display
463
- clean_token = token.replace('ฤ ', '').replace('##', '')
464
- if clean_token.strip():
465
- attention_weights.append((clean_token, float(avg_attention[i])))
466
 
467
- return {
468
- 'method': 'Attention',
469
- 'tokens': [t[0] for t in attention_weights],
470
- 'attention_weights': attention_weights
471
- }
472
 
473
  except Exception as e:
474
- logger.error(f"Attention analysis failed: {e}")
475
- return {'method': 'Attention', 'error': str(e)}
476
-
477
- class AdvancedVisualizer:
478
- """Visualizations for explainability analysis"""
479
-
480
- @staticmethod
481
- def create_lime_plot(lime_result: Dict, theme: str = 'default') -> go.Figure:
482
- """Create LIME feature importance plot"""
483
- if 'error' in lime_result:
484
- fig = go.Figure()
485
- fig.add_annotation(text=f"LIME Error: {lime_result['error']}",
486
- x=0.5, y=0.5, showarrow=False)
487
- return fig
488
-
489
- features, scores = zip(*lime_result['feature_importance'])
490
- colors = ['red' if score < 0 else 'green' for score in scores]
491
-
492
- fig = go.Figure(data=[
493
- go.Bar(
494
- y=features,
495
- x=scores,
496
- orientation='h',
497
- marker_color=colors,
498
- text=[f'{score:.3f}' for score in scores],
499
- textposition='auto'
500
- )
501
- ])
502
-
503
- fig.update_layout(
504
- title="LIME Feature Importance",
505
- xaxis_title="Importance Score",
506
- yaxis_title="Features",
507
- height=400,
508
- showlegend=False
509
- )
510
-
511
- return fig
512
-
513
- @staticmethod
514
- def create_attention_plot(attention_result: Dict, theme: str = 'default') -> go.Figure:
515
- """Create attention weights visualization"""
516
- if 'error' in attention_result:
517
  fig = go.Figure()
518
  fig.add_annotation(
519
- text=f"Attention Error: {attention_result['error']}",
520
- x=0.5, y=0.5,
521
  xref="paper", yref="paper",
522
  showarrow=False,
523
  font=dict(size=14)
524
  )
525
- fig.update_layout(height=400, title="Attention Analysis Error")
526
  return fig
 
 
 
 
 
527
 
528
- if not attention_result.get('attention_weights'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
  fig = go.Figure()
530
  fig.add_annotation(
531
- text="No attention weights available",
532
  x=0.5, y=0.5,
533
- xref="paper", yref="paper",
534
  showarrow=False
535
  )
536
- fig.update_layout(height=400, title="No Attention Data")
537
  return fig
538
-
539
- tokens, weights = zip(*attention_result['attention_weights'])
540
-
541
- # Normalize weights for better visualization
542
- weights = np.array(weights)
543
- if weights.max() > weights.min():
544
- normalized_weights = (weights - weights.min()) / (weights.max() - weights.min())
545
- else:
546
- normalized_weights = weights
547
-
548
- # Limit display to top 15 tokens for readability
549
- if len(tokens) > 15:
550
- # Get top 15 by attention weight
551
- top_indices = np.argsort(weights)[-15:]
552
- tokens = [tokens[i] for i in top_indices]
553
- normalized_weights = normalized_weights[top_indices]
554
-
555
- fig = go.Figure(data=[
556
- go.Bar(
557
- x=list(range(len(tokens))),
558
- y=normalized_weights,
559
- text=tokens,
560
- textposition='outside',
561
- marker_color=normalized_weights,
562
- colorscale='Viridis',
563
- hovertemplate='<b>%{text}</b><br>Weight: %{y:.3f}<extra></extra>'
564
- )
565
- ])
566
-
567
- fig.update_layout(
568
- title="Attention Weights (Top Tokens)",
569
- xaxis_title="Token Position",
570
- yaxis_title="Attention Weight (Normalized)",
571
- height=400,
572
- showlegend=False,
573
- xaxis=dict(tickmode='array', tickvals=list(range(len(tokens))), ticktext=tokens)
574
- )
575
-
576
- return fig
577
- """Enhanced visualizations with Plotly"""
578
-
579
- @staticmethod
580
- def create_sentiment_gauge(result: Dict, theme: str = 'default') -> go.Figure:
581
- """Create an animated sentiment gauge"""
582
- colors = config.THEMES[theme]
583
-
584
- if result['has_neutral']:
585
- # Three-way gauge
586
- fig = go.Figure(go.Indicator(
587
- mode = "gauge+number+delta",
588
- value = result['pos_prob'] * 100,
589
- domain = {'x': [0, 1], 'y': [0, 1]},
590
- title = {'text': f"Sentiment: {result['sentiment']}"},
591
- delta = {'reference': 50},
592
- gauge = {
593
- 'axis': {'range': [None, 100]},
594
- 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
595
- 'steps': [
596
- {'range': [0, 33], 'color': colors['neg']},
597
- {'range': [33, 67], 'color': colors['neu']},
598
- {'range': [67, 100], 'color': colors['pos']}
599
- ],
600
- 'threshold': {
601
- 'line': {'color': "red", 'width': 4},
602
- 'thickness': 0.75,
603
- 'value': 90
604
- }
605
- }
606
- ))
607
- else:
608
- # Two-way gauge
609
- fig = go.Figure(go.Indicator(
610
- mode = "gauge+number",
611
- value = result['confidence'] * 100,
612
- domain = {'x': [0, 1], 'y': [0, 1]},
613
- title = {'text': f"Confidence: {result['sentiment']}"},
614
- gauge = {
615
- 'axis': {'range': [None, 100]},
616
- 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
617
- 'steps': [
618
- {'range': [0, 50], 'color': "lightgray"},
619
- {'range': [50, 100], 'color': "gray"}
620
- ]
621
- }
622
- ))
623
-
624
- fig.update_layout(height=400, font={'size': 16})
625
- return fig
626
-
627
- @staticmethod
628
- def create_probability_bars(result: Dict, theme: str = 'default') -> go.Figure:
629
- """Create probability bar chart"""
630
- colors = config.THEMES[theme]
631
-
632
- if result['has_neutral']:
633
- labels = ['Negative', 'Neutral', 'Positive']
634
- values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
635
- bar_colors = [colors['neg'], colors['neu'], colors['pos']]
636
- else:
637
- labels = ['Negative', 'Positive']
638
- values = [result['neg_prob'], result['pos_prob']]
639
- bar_colors = [colors['neg'], colors['pos']]
640
-
641
- fig = go.Figure(data=[
642
- go.Bar(x=labels, y=values, marker_color=bar_colors, text=[f'{v:.3f}' for v in values])
643
- ])
644
-
645
- fig.update_traces(texttemplate='%{text}', textposition='outside')
646
- fig.update_layout(
647
- title="Sentiment Probabilities",
648
- yaxis_title="Probability",
649
- height=400,
650
- showlegend=False
651
- )
652
-
653
- return fig
654
 
655
  @staticmethod
656
  def create_batch_summary(results: List[Dict], theme: str = 'default') -> go.Figure:
657
  """Create batch analysis summary"""
658
- colors = config.THEMES[theme]
659
-
660
- # Count sentiments
661
- sentiments = [r['sentiment'] for r in results if 'sentiment' in r]
662
- sentiment_counts = Counter(sentiments)
663
-
664
- # Create pie chart
665
- fig = go.Figure(data=[go.Pie(
666
- labels=list(sentiment_counts.keys()),
667
- values=list(sentiment_counts.values()),
668
- marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
669
- textinfo='label+percent',
670
- hole=0.3
671
- )])
672
-
673
- fig.update_layout(
674
- title=f"Batch Analysis Summary ({len(results)} texts)",
675
- height=400
676
- )
677
 
678
- return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
 
680
  @staticmethod
681
  def create_confidence_distribution(results: List[Dict]) -> go.Figure:
682
  """Create confidence distribution plot"""
683
- confidences = [r['confidence'] for r in results if 'confidence' in r and r['sentiment'] != 'Error']
684
-
685
- if not confidences:
686
- return go.Figure()
687
-
688
- fig = go.Figure(data=[go.Histogram(
689
- x=confidences,
690
- nbinsx=20,
691
- marker_color='skyblue',
692
- opacity=0.7
693
- )])
694
-
695
- fig.update_layout(
696
- title="Confidence Distribution",
697
- xaxis_title="Confidence Score",
698
- yaxis_title="Frequency",
699
- height=400
700
- )
701
-
702
- return fig
703
-
704
- @staticmethod
705
- def create_history_dashboard(history: List[Dict]) -> go.Figure:
706
- """Create comprehensive history dashboard"""
707
- if len(history) < 2:
708
- return go.Figure()
709
-
710
- # Create subplots
711
- fig = make_subplots(
712
- rows=2, cols=2,
713
- subplot_titles=['Sentiment Timeline', 'Confidence Distribution',
714
- 'Language Distribution', 'Sentiment Summary'],
715
- specs=[[{"secondary_y": False}, {"secondary_y": False}],
716
- [{"type": "pie"}, {"type": "bar"}]]
717
- )
718
-
719
- # Extract data
720
- indices = list(range(len(history)))
721
- pos_probs = [item['pos_prob'] for item in history]
722
- confidences = [item['confidence'] for item in history]
723
- sentiments = [item['sentiment'] for item in history]
724
- languages = [item.get('language', 'en') for item in history]
725
-
726
- # Sentiment timeline
727
- colors = ['#4CAF50' if s == 'Positive' else '#F44336' for s in sentiments]
728
- fig.add_trace(
729
- go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
730
- marker=dict(color=colors, size=8),
731
- name='Positive Probability'),
732
- row=1, col=1
733
- )
734
-
735
- # Confidence distribution
736
- fig.add_trace(
737
- go.Histogram(x=confidences, nbinsx=10, name='Confidence'),
738
- row=1, col=2
739
- )
740
-
741
- # Language distribution
742
- lang_counts = Counter(languages)
743
- fig.add_trace(
744
- go.Pie(labels=list(lang_counts.keys()), values=list(lang_counts.values()),
745
- name="Languages"),
746
- row=2, col=1
747
- )
748
-
749
- # Sentiment summary
750
- sent_counts = Counter(sentiments)
751
- fig.add_trace(
752
- go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
753
- marker_color=['#4CAF50' if k == 'Positive' else '#F44336' for k in sent_counts.keys()]),
754
- row=2, col=2
755
- )
756
-
757
- fig.update_layout(height=800, showlegend=False)
758
- return fig
759
 
760
- # Main application functions
761
  def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
762
  remove_punct: bool, remove_nums: bool):
763
- """Enhanced single text analysis"""
764
  try:
765
  if not text.strip():
766
- return "Please enter text", None, None
 
 
 
 
767
 
768
  # Map display names back to language codes
769
  language_map = {
@@ -783,6 +630,7 @@ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
783
  'remove_numbers': remove_nums
784
  }
785
 
 
786
  result = SentimentAnalyzer.analyze_text(text, language_code, preprocessing_options)
787
 
788
  # Add to history
@@ -801,39 +649,49 @@ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
801
  history_manager.add_entry(history_entry)
802
 
803
  # Create visualizations
804
- gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme)
805
- bars_fig = PlotlyVisualizer.create_probability_bars(result, theme)
806
 
807
  # Create info text
808
  info_text = f"""
809
- **Analysis Results:**
810
- - **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
811
  - **Language:** {result['language'].upper()}
812
- - **Keywords:** {', '.join(result['keywords'])}
813
- - **Stats:** {result['word_count']} words, {result['char_count']} characters
 
 
 
 
 
814
  """
815
 
816
  return info_text, gauge_fig, bars_fig
817
 
818
  except Exception as e:
819
- logger.error(f"Analysis failed: {e}")
820
- return f"Error: {str(e)}", None, None
 
821
 
822
  def analyze_batch_texts(batch_text: str, language: str, theme: str,
823
  clean_text: bool, remove_punct: bool, remove_nums: bool):
824
- """Batch text analysis"""
825
  try:
826
  if not batch_text.strip():
827
- return "Please enter texts (one per line)", None, None, None
 
 
 
 
828
 
829
  # Parse batch input
830
  texts = TextProcessor.parse_batch_input(batch_text)
831
 
832
  if len(texts) > config.BATCH_SIZE_LIMIT:
833
- return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
834
 
835
  if not texts:
836
- return "No valid texts found", None, None, None
837
 
838
  # Map display names back to language codes
839
  language_map = {
@@ -878,8 +736,8 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str,
878
  history_manager.add_batch_entries(batch_entries)
879
 
880
  # Create visualizations
881
- summary_fig = PlotlyVisualizer.create_batch_summary(results, theme)
882
- confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
883
 
884
  # Create results table
885
  df_data = []
@@ -889,9 +747,9 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str,
889
  'Index': i+1,
890
  'Text': text[:50] + '...' if len(text) > 50 else text,
891
  'Sentiment': 'Error',
892
- 'Confidence': 0.0,
893
  'Language': 'Unknown',
894
- 'Error': result['error']
895
  })
896
  else:
897
  df_data.append({
@@ -900,7 +758,7 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str,
900
  'Sentiment': result['sentiment'],
901
  'Confidence': f"{result['confidence']:.3f}",
902
  'Language': result['language'].upper(),
903
- 'Keywords': ', '.join(result['keywords'][:3])
904
  })
905
 
906
  df = pd.DataFrame(df_data)
@@ -914,275 +772,217 @@ def analyze_batch_texts(batch_text: str, language: str, theme: str,
914
  avg_confidence = np.mean([r['confidence'] for r in successful_results])
915
 
916
  summary_text = f"""
917
- **Batch Analysis Summary:**
918
  - **Total Texts:** {len(texts)}
919
  - **Successful:** {len(successful_results)}
920
  - **Errors:** {error_count}
921
  - **Average Confidence:** {avg_confidence:.3f}
922
- - **Sentiments:** {dict(sentiment_counts)}
 
 
 
923
  """
924
  else:
925
- summary_text = f"All {len(texts)} texts failed to analyze."
926
 
927
  return summary_text, df, summary_fig, confidence_fig
928
 
929
  except Exception as e:
930
  logger.error(f"Batch analysis failed: {e}")
931
- return f"Error: {str(e)}", None, None, None
932
-
933
- def analyze_advanced_text(text: str, language: str, theme: str, use_lime: bool,
934
- use_attention: bool, lime_features: int):
935
- """Advanced analysis with SHAP and LIME explainability"""
936
- try:
937
- if not text.strip():
938
- return "Please enter text", None, None, None, None
939
-
940
- # Map display names back to language codes
941
- language_map = {
942
- 'Auto Detect': 'auto',
943
- 'English': 'en',
944
- 'Chinese': 'zh',
945
- 'Spanish': 'es',
946
- 'French': 'fr',
947
- 'German': 'de',
948
- 'Swedish': 'sv'
949
- }
950
- language_code = language_map.get(language, 'auto')
951
-
952
- # Basic sentiment analysis first
953
- result = SentimentAnalyzer.analyze_text(text, language_code)
954
-
955
- # Create basic visualizations first
956
- gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme)
957
- bars_fig = PlotlyVisualizer.create_probability_bars(result, theme)
958
-
959
- # Initialize explainability results
960
- lime_result = None
961
- attention_result = None
962
- lime_plot = None
963
- attention_plot = None
964
-
965
- # Get model for explainability analysis
966
- try:
967
- model, tokenizer = model_manager.get_model(language_code)
968
-
969
- # LIME Analysis
970
- if use_lime:
971
- lime_result = ExplainabilityAnalyzer.analyze_with_lime(
972
- text, model, tokenizer, model_manager.device, lime_features
973
- )
974
- lime_plot = AdvancedVisualizer.create_lime_plot(lime_result, theme)
975
- else:
976
- # Create empty plot
977
- lime_plot = go.Figure()
978
- lime_plot.add_annotation(text="LIME analysis disabled", x=0.5, y=0.5,
979
- xref="paper", yref="paper", showarrow=False)
980
- lime_plot.update_layout(height=400, title="LIME Analysis (Disabled)")
981
-
982
- # Attention Analysis
983
- if use_attention:
984
- attention_result = ExplainabilityAnalyzer.analyze_with_attention(
985
- text, model, tokenizer, model_manager.device
986
- )
987
- attention_plot = AdvancedVisualizer.create_attention_plot(attention_result, theme)
988
- else:
989
- # Create empty plot
990
- attention_plot = go.Figure()
991
- attention_plot.add_annotation(text="Attention analysis disabled", x=0.5, y=0.5,
992
- xref="paper", yref="paper", showarrow=False)
993
- attention_plot.update_layout(height=400, title="Attention Analysis (Disabled)")
994
-
995
- except Exception as e:
996
- logger.error(f"Explainability analysis failed: {e}")
997
- # Create error plots
998
- lime_plot = go.Figure()
999
- lime_plot.add_annotation(text=f"Analysis Error: {str(e)}", x=0.5, y=0.5,
1000
- xref="paper", yref="paper", showarrow=False)
1001
- lime_plot.update_layout(height=400, title="Analysis Error")
1002
-
1003
- attention_plot = go.Figure()
1004
- attention_plot.add_annotation(text=f"Analysis Error: {str(e)}", x=0.5, y=0.5,
1005
- xref="paper", yref="paper", showarrow=False)
1006
- attention_plot.update_layout(height=400, title="Analysis Error")
1007
-
1008
- # Add to history
1009
- history_entry = {
1010
- 'text': text[:100] + '...' if len(text) > 100 else text,
1011
- 'full_text': text,
1012
- 'sentiment': result['sentiment'],
1013
- 'confidence': result['confidence'],
1014
- 'pos_prob': result['pos_prob'],
1015
- 'neg_prob': result['neg_prob'],
1016
- 'neu_prob': result.get('neu_prob', 0),
1017
- 'language': result['language'],
1018
- 'timestamp': datetime.now().isoformat(),
1019
- 'analysis_type': 'advanced',
1020
- 'explainability_used': use_lime or use_attention
1021
- }
1022
- history_manager.add_entry(history_entry)
1023
-
1024
- # Create detailed info text
1025
- info_text = f"""
1026
- **Advanced Analysis Results:**
1027
- - **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
1028
- - **Language:** {result['language'].upper()}
1029
- - **Text Statistics:**
1030
- - Words: {result['word_count']}
1031
- - Characters: {result['char_count']}
1032
- - Average word length: {result['char_count']/max(result['word_count'], 1):.1f}
1033
- - **Keywords:** {', '.join(result['keywords'])}
1034
-
1035
- **Explainability Analysis:**
1036
- """
1037
-
1038
- if use_lime:
1039
- if lime_result and 'error' not in lime_result:
1040
- info_text += f"\n- **LIME:** โœ… Analyzed top {lime_features} features"
1041
- else:
1042
- error_msg = lime_result.get('error', 'Unknown error') if lime_result else 'Not available'
1043
- info_text += f"\n- **LIME:** โŒ {error_msg}"
1044
- else:
1045
- info_text += f"\n- **LIME:** โธ๏ธ Disabled"
1046
-
1047
- if use_attention:
1048
- if attention_result and 'error' not in attention_result:
1049
- info_text += f"\n- **Attention:** โœ… Token-level attention weights computed"
1050
- else:
1051
- error_msg = attention_result.get('error', 'Unknown error') if attention_result else 'Not available'
1052
- info_text += f"\n- **Attention:** โŒ {error_msg}"
1053
- else:
1054
- info_text += f"\n- **Attention:** โธ๏ธ Disabled"
1055
-
1056
- return info_text, gauge_fig, bars_fig, lime_plot, attention_plot
1057
-
1058
- except Exception as e:
1059
- logger.error(f"Advanced analysis failed: {e}")
1060
- # Return basic empty plots on complete failure
1061
- empty_fig = go.Figure()
1062
- empty_fig.add_annotation(text=f"Analysis failed: {str(e)}", x=0.5, y=0.5,
1063
- xref="paper", yref="paper", showarrow=False)
1064
- empty_fig.update_layout(height=400)
1065
-
1066
- return f"Error: {str(e)}", empty_fig, empty_fig, empty_fig, empty_fig
1067
 
1068
  def get_history_stats():
1069
  """Get enhanced history statistics"""
1070
- stats = history_manager.get_stats()
1071
- if not stats:
1072
- return "No analysis history available"
1073
-
1074
- return f"""
1075
- **Comprehensive History Statistics:**
 
1076
 
1077
- **Analysis Counts:**
1078
  - Total Analyses: {stats['total_analyses']}
1079
- - Positive: {stats['positive_count']}
1080
- - Negative: {stats['negative_count']}
1081
- - Neutral: {stats['neutral_count']}
1082
 
1083
- **Confidence Metrics:**
1084
  - Average Confidence: {stats['avg_confidence']:.3f}
1085
  - Highest Confidence: {stats['max_confidence']:.3f}
1086
  - Lowest Confidence: {stats['min_confidence']:.3f}
1087
 
1088
- **Language Statistics:**
1089
  - Languages Detected: {stats['languages_detected']}
1090
  - Most Common Language: {stats['most_common_language'].upper()}
1091
 
1092
- **Text Statistics:**
1093
  - Average Text Length: {stats['avg_text_length']:.1f} characters
1094
- """
 
 
 
1095
 
1096
  def filter_history_display(sentiment_filter: str, language_filter: str, min_confidence: float):
1097
  """Display filtered history"""
1098
- # Convert filters
1099
- sentiment = sentiment_filter if sentiment_filter != "All" else None
1100
- language = language_filter.lower() if language_filter != "All" else None
1101
-
1102
- filtered_history = history_manager.filter_history(
1103
- sentiment=sentiment,
1104
- language=language,
1105
- min_confidence=min_confidence if min_confidence > 0 else None
1106
- )
1107
-
1108
- if not filtered_history:
1109
- return "No entries match the filter criteria", None
1110
-
1111
- # Create DataFrame for display
1112
- df_data = []
1113
- for entry in filtered_history[-20:]: # Show last 20 entries
1114
- df_data.append({
1115
- 'Timestamp': entry['timestamp'][:16], # YYYY-MM-DD HH:MM
1116
- 'Text': entry['text'],
1117
- 'Sentiment': entry['sentiment'],
1118
- 'Confidence': f"{entry['confidence']:.3f}",
1119
- 'Language': entry['language'].upper(),
1120
- 'Type': entry.get('analysis_type', 'single')
1121
- })
1122
-
1123
- df = pd.DataFrame(df_data)
1124
-
1125
- summary = f"""
1126
- **Filtered Results:**
 
1127
  - Found {len(filtered_history)} entries matching criteria
1128
  - Showing most recent {min(20, len(filtered_history))} entries
1129
- """
1130
-
1131
- return summary, df
 
 
 
 
1132
 
1133
  def plot_history_dashboard():
1134
- """Create history dashboard"""
1135
- history = history_manager.get_history()
1136
- if len(history) < 2:
1137
- return None, "Need at least 2 analyses for dashboard"
1138
-
1139
- fig = PlotlyVisualizer.create_history_dashboard(history)
1140
- return fig, f"Dashboard showing {len(history)} analyses"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1141
 
1142
  def export_history_csv():
1143
  """Export history to CSV"""
1144
- history = history_manager.get_history()
1145
- if not history:
1146
- return None, "No history to export"
1147
-
1148
  try:
 
 
 
 
1149
  df = pd.DataFrame(history)
1150
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w')
1151
  df.to_csv(temp_file.name, index=False)
1152
- return temp_file.name, f"Exported {len(history)} entries to CSV"
1153
  except Exception as e:
1154
- return None, f"Export failed: {str(e)}"
 
1155
 
1156
  def export_history_excel():
1157
  """Export history to Excel"""
1158
- history = history_manager.get_history()
1159
- if not history:
1160
- return None, "No history to export"
1161
-
1162
  try:
 
 
 
 
1163
  df = pd.DataFrame(history)
1164
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
1165
  df.to_excel(temp_file.name, index=False)
1166
- return temp_file.name, f"Exported {len(history)} entries to Excel"
1167
  except Exception as e:
1168
- return None, f"Export failed: {str(e)}"
 
1169
 
1170
  def clear_all_history():
1171
  """Clear analysis history"""
1172
- count = history_manager.clear()
1173
- return f"Cleared {count} entries from history"
 
 
 
 
1174
 
1175
  def get_recent_analyses():
1176
  """Get recent analysis summary"""
1177
- recent = history_manager.get_recent_history(10)
1178
- if not recent:
1179
- return "No recent analyses available"
1180
-
1181
- summary_text = "**Recent Analyses (Last 10):**\n\n"
1182
- for i, entry in enumerate(recent, 1):
1183
- summary_text += f"{i}. **{entry['sentiment']}** ({entry['confidence']:.3f}) - {entry['text']}\n"
1184
-
1185
- return summary_text
 
 
 
 
 
1186
 
1187
  # Sample data
1188
  SAMPLE_TEXTS = [
@@ -1214,10 +1014,10 @@ Not sure if I like it or not.
1214
  Amazing quality and fast delivery!
1215
  Could be better, but it's okay."""
1216
 
1217
- # Gradio Interface
1218
- with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Multilingual Sentiment Analyzer") as demo:
1219
- gr.Markdown("# ๐ŸŽญ Advanced Multilingual Sentiment Analyzer")
1220
- gr.Markdown("Comprehensive sentiment analysis with batch processing, advanced analytics, and multilingual support")
1221
 
1222
  with gr.Tab("๐Ÿ“ Single Analysis"):
1223
  with gr.Row():
@@ -1254,58 +1054,11 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Multilingual Sentiment An
1254
  )
1255
 
1256
  with gr.Column(scale=1):
1257
- result_info = gr.Markdown("Enter text and click Analyze")
1258
 
1259
  with gr.Row():
1260
  gauge_plot = gr.Plot(label="Sentiment Gauge")
1261
  bars_plot = gr.Plot(label="Probability Distribution")
1262
-
1263
- with gr.Tab("๐Ÿ”ฌ Advanced Analysis"):
1264
- with gr.Row():
1265
- with gr.Column(scale=2):
1266
- advanced_input = gr.Textbox(
1267
- label="Text for Advanced Analysis",
1268
- placeholder="Enter text for explainability analysis...",
1269
- lines=4
1270
- )
1271
-
1272
- with gr.Row():
1273
- advanced_language = gr.Dropdown(
1274
- choices=['Auto Detect', 'English', 'Chinese', 'Spanish', 'French', 'German', 'Swedish'],
1275
- value='Auto Detect',
1276
- label="Language"
1277
- )
1278
- advanced_theme = gr.Dropdown(
1279
- choices=list(config.THEMES.keys()),
1280
- value='default',
1281
- label="Theme"
1282
- )
1283
-
1284
- gr.Markdown("### ๐Ÿ” Explainability Options")
1285
- with gr.Row():
1286
- use_lime = gr.Checkbox(label="Use LIME Analysis", value=True)
1287
- use_attention = gr.Checkbox(label="Use Attention Weights", value=True)
1288
-
1289
- lime_features = gr.Slider(
1290
- minimum=5,
1291
- maximum=20,
1292
- value=10,
1293
- step=1,
1294
- label="LIME Features Count"
1295
- )
1296
-
1297
- advanced_analyze_btn = gr.Button("๐Ÿ”ฌ Advanced Analyze", variant="primary", size="lg")
1298
-
1299
- with gr.Column(scale=1):
1300
- advanced_result_info = gr.Markdown("Configure explainability settings and click Advanced Analyze")
1301
-
1302
- with gr.Row():
1303
- advanced_gauge_plot = gr.Plot(label="Sentiment Gauge")
1304
- advanced_bars_plot = gr.Plot(label="Probability Distribution")
1305
-
1306
- with gr.Row():
1307
- lime_plot = gr.Plot(label="LIME Feature Importance")
1308
- attention_plot = gr.Plot(label="Attention Weights")
1309
 
1310
  with gr.Tab("๐Ÿ“Š Batch Analysis"):
1311
  with gr.Row():
@@ -1342,7 +1095,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Multilingual Sentiment An
1342
  )
1343
 
1344
  with gr.Column(scale=1):
1345
- batch_summary = gr.Markdown("Enter texts and click Analyze Batch")
1346
 
1347
  with gr.Row():
1348
  batch_results_table = gr.DataFrame(
@@ -1420,13 +1173,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Multilingual Sentiment An
1420
  outputs=[batch_summary, batch_results_table, batch_summary_plot, batch_confidence_plot]
1421
  )
1422
 
1423
- # Advanced Analysis
1424
- advanced_analyze_btn.click(
1425
- analyze_advanced_text,
1426
- inputs=[advanced_input, advanced_language, advanced_theme, use_lime, use_attention, lime_features],
1427
- outputs=[advanced_result_info, advanced_gauge_plot, advanced_bars_plot, lime_plot, attention_plot]
1428
- )
1429
-
1430
  # History & Analytics
1431
  stats_btn.click(
1432
  get_history_stats,
@@ -1464,5 +1210,17 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Multilingual Sentiment An
1464
  outputs=history_status
1465
  )
1466
 
 
1467
  if __name__ == "__main__":
1468
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
5
  import plotly.express as px
6
  from plotly.subplots import make_subplots
7
  import numpy as np
 
8
  from collections import Counter, defaultdict
9
  import re
10
  import json
 
16
  from functools import lru_cache
17
  from dataclasses import dataclass
18
  from typing import List, Dict, Optional, Tuple
 
 
 
19
  import pandas as pd
20
 
21
+ # ่ฎพ็ฝฎๆ—ฅๅฟ— - ๆๅ‰ๅˆๅง‹ๅŒ–
22
+ logging.basicConfig(level=logging.INFO)
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # ๅฐ่ฏ•ๅฏผๅ…ฅๅฏ้€‰ไพ่ต–
26
+ try:
27
+ from wordcloud import WordCloud
28
+ WORDCLOUD_AVAILABLE = True
29
+ except ImportError:
30
+ WORDCLOUD_AVAILABLE = False
31
+ logger.warning("WordCloud not available")
32
+
33
+ try:
34
+ import nltk
35
+ from nltk.corpus import stopwords
36
+ nltk.download('stopwords', quiet=True)
37
+ nltk.download('punkt', quiet=True)
38
+ STOP_WORDS = set(stopwords.words('english'))
39
+ NLTK_AVAILABLE = True
40
+ except:
41
+ NLTK_AVAILABLE = False
42
+ STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
43
+ logger.warning("NLTK not available, using basic stopwords")
44
+
45
+ try:
46
+ import langdetect
47
+ LANGDETECT_AVAILABLE = True
48
+ except ImportError:
49
+ LANGDETECT_AVAILABLE = False
50
+ logger.warning("langdetect not available, using fallback language detection")
51
+
52
+ # ๅฐ่ฏ•ๅฏผๅ…ฅSHAPๅ’ŒLIME
53
  try:
54
  import shap
55
  SHAP_AVAILABLE = True
 
83
  'sv': 'Swedish'
84
  }
85
 
86
+ # ไฝฟ็”จๆ›ด็จณๅฎš็š„ๆจกๅž‹
87
  MODELS = {
88
  'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
89
  'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
90
+ 'zh': "uer/roberta-base-finetuned-dianping-chinese",
91
+ # ๅค‡็”จๆจกๅž‹
92
+ 'fallback': "distilbert-base-uncased-finetuned-sst-2-english"
93
  }
94
 
95
  # Color themes
 
102
 
103
  config = Config()
104
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  class ModelManager:
106
+ """Manages multiple language models with better error handling"""
107
  def __init__(self):
108
  self.models = {}
109
  self.tokenizers = {}
110
+ self.device = self._get_device()
111
+ self.model_loaded = False
112
+ self._load_models()
113
+
114
+ def _get_device(self):
115
+ """ๅฎ‰ๅ…จ็š„่ฎพๅค‡้€‰ๆ‹ฉ"""
116
+ try:
117
+ if torch.cuda.is_available():
118
+ return torch.device("cuda")
119
+ elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
120
+ return torch.device("mps")
121
+ else:
122
+ return torch.device("cpu")
123
+ except:
124
+ return torch.device("cpu")
125
 
126
+ def _load_models(self):
127
+ """Load models with error handling"""
128
  try:
129
+ # ้ฆ–ๅ…ˆๅฐ่ฏ•ๅŠ ่ฝฝๅคš่ฏญ่จ€ๆจกๅž‹
130
  model_name = config.MODELS['multilingual']
131
+ logger.info(f"Loading model: {model_name}")
132
+
133
  self.tokenizers['default'] = AutoTokenizer.from_pretrained(model_name)
134
  self.models['default'] = AutoModelForSequenceClassification.from_pretrained(model_name)
135
  self.models['default'].to(self.device)
 
136
 
137
+ logger.info(f"Successfully loaded default model: {model_name}")
138
+ self.model_loaded = True
 
 
 
 
139
 
140
  except Exception as e:
141
+ logger.error(f"Failed to load multilingual model: {e}")
142
+ # ๅฐ่ฏ•ๅค‡็”จๆจกๅž‹
143
+ try:
144
+ fallback_model = config.MODELS['fallback']
145
+ logger.info(f"Trying fallback model: {fallback_model}")
146
+
147
+ self.tokenizers['default'] = AutoTokenizer.from_pretrained(fallback_model)
148
+ self.models['default'] = AutoModelForSequenceClassification.from_pretrained(fallback_model)
149
+ self.models['default'].to(self.device)
150
+
151
+ logger.info(f"Successfully loaded fallback model: {fallback_model}")
152
+ self.model_loaded = True
153
+
154
+ except Exception as e2:
155
+ logger.error(f"Failed to load fallback model: {e2}")
156
+ self.model_loaded = False
157
+ raise RuntimeError("Failed to load any sentiment analysis model")
158
 
159
  def get_model(self, language='en'):
160
  """Get model for specific language"""
161
+ if not self.model_loaded:
162
+ raise RuntimeError("No models are loaded")
163
+
164
+ # ็ฎ€ๅŒ–๏ผšๆ‰€ๆœ‰่ฏญ่จ€้ƒฝไฝฟ็”จ้ป˜่ฎคๆจกๅž‹
165
+ return self.models['default'], self.tokenizers['default']
166
 
167
  @staticmethod
168
  def detect_language(text: str) -> str:
169
+ """Detect text language with fallback"""
170
+ if not LANGDETECT_AVAILABLE:
171
+ # ็ฎ€ๅ•็š„่ฏญ่จ€ๆฃ€ๆต‹
172
+ if re.search(r'[\u4e00-\u9fff]', text):
173
+ return 'zh'
174
+ else:
175
+ return 'en'
176
+
177
  try:
 
178
  detected = langdetect.detect(text)
 
179
  language_mapping = {
180
  'zh-cn': 'zh',
181
  'zh-tw': 'zh'
 
185
  except:
186
  return 'en'
187
 
188
+ # ๅ…จๅฑ€ๆจกๅž‹็ฎก็†ๅ™จ
189
+ model_manager = None
190
+
191
+ def initialize_models():
192
+ """ๅปถ่ฟŸๅˆๅง‹ๅŒ–ๆจกๅž‹"""
193
+ global model_manager
194
+ if model_manager is None:
195
+ try:
196
+ model_manager = ModelManager()
197
+ return True
198
+ except Exception as e:
199
+ logger.error(f"Model initialization failed: {e}")
200
+ return False
201
+ return True
202
 
203
  class HistoryManager:
204
+ """Enhanced history manager"""
205
  def __init__(self):
206
  self._history = []
207
 
 
211
  self._history = self._history[-config.MAX_HISTORY_SIZE:]
212
 
213
  def add_batch_entries(self, entries: List[Dict]):
 
214
  for entry in entries:
215
  self.add_entry(entry)
216
 
 
218
  return self._history.copy()
219
 
220
  def get_recent_history(self, n: int = 10) -> List[Dict]:
 
221
  return self._history[-n:] if self._history else []
222
 
223
  def filter_history(self, sentiment: str = None, language: str = None,
224
  min_confidence: float = None) -> List[Dict]:
 
225
  filtered = self._history
226
 
227
  if sentiment:
 
251
  'positive_count': sentiments.count('Positive'),
252
  'negative_count': sentiments.count('Negative'),
253
  'neutral_count': sentiments.count('Neutral'),
254
+ 'avg_confidence': np.mean(confidences) if confidences else 0,
255
+ 'max_confidence': np.max(confidences) if confidences else 0,
256
+ 'min_confidence': np.min(confidences) if confidences else 0,
257
  'languages_detected': len(set(languages)),
258
  'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en',
259
+ 'avg_text_length': np.mean([len(item.get('full_text', '')) for item in self._history]) if self._history else 0
260
  }
261
 
262
  history_manager = HistoryManager()
 
283
  @staticmethod
284
  def extract_keywords(text: str, top_k: int = 5) -> List[str]:
285
  """Extract key words from text"""
 
286
  if re.search(r'[\u4e00-\u9fff]', text):
287
  words = re.findall(r'[\u4e00-\u9fff]+', text)
288
  all_chars = ''.join(words)
289
  char_freq = Counter(all_chars)
290
  return [char for char, _ in char_freq.most_common(top_k)]
291
  else:
 
292
  cleaned = TextProcessor.clean_text(text)
293
  words = cleaned.split()
294
  word_freq = Counter(words)
 
301
  return [line.strip() for line in lines if line.strip()]
302
 
303
  class SentimentAnalyzer:
304
+ """Enhanced sentiment analysis with better error handling"""
305
 
306
  @staticmethod
307
  def analyze_text(text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
 
309
  if not text.strip():
310
  raise ValueError("Empty text provided")
311
 
312
+ # ็กฎไฟๆจกๅž‹ๅทฒๅŠ ่ฝฝ
313
+ if not initialize_models():
314
+ raise RuntimeError("Failed to initialize sentiment analysis models")
315
+
316
  # Detect language if auto
317
  if language == 'auto':
318
  detected_lang = model_manager.detect_language(text)
 
320
  detected_lang = language
321
 
322
  # Get appropriate model
323
+ try:
324
+ model, tokenizer = model_manager.get_model(detected_lang)
325
+ except Exception as e:
326
+ logger.error(f"Failed to get model: {e}")
327
+ raise RuntimeError(f"Model loading failed: {e}")
328
 
329
+ # Preprocessing
330
  options = preprocessing_options or {}
331
  processed_text = text
332
  if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
 
338
 
339
  try:
340
  # Tokenize and analyze
341
+ inputs = tokenizer(
342
+ processed_text,
343
+ return_tensors="pt",
344
+ padding=True,
345
+ truncation=True,
346
+ max_length=config.MAX_TEXT_LENGTH
347
+ ).to(model_manager.device)
348
 
349
  with torch.no_grad():
350
  outputs = model(**inputs)
 
391
 
392
  except Exception as e:
393
  logger.error(f"Analysis failed: {e}")
394
+ raise RuntimeError(f"Sentiment analysis failed: {e}")
395
 
396
  @staticmethod
397
  def analyze_batch(texts: List[str], language: str = 'auto',
 
404
  result['batch_index'] = i
405
  results.append(result)
406
  except Exception as e:
 
407
  results.append({
408
  'sentiment': 'Error',
409
  'confidence': 0.0,
410
  'error': str(e),
411
  'batch_index': i,
412
+ 'text': text[:50] + '...' if len(text) > 50 else text
413
  })
414
  return results
415
 
416
+ class AdvancedVisualizer:
417
+ """Enhanced visualizations with Plotly - ไฟฎๅคไบ†็ฑปๅ"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
  @staticmethod
420
+ def create_sentiment_gauge(result: Dict, theme: str = 'default') -> go.Figure:
421
+ """Create an animated sentiment gauge"""
422
+ colors = config.THEMES.get(theme, config.THEMES['default'])
 
423
 
424
  try:
425
+ if result.get('has_neutral', False):
426
+ # Three-way gauge
427
+ fig = go.Figure(go.Indicator(
428
+ mode="gauge+number+delta",
429
+ value=result['pos_prob'] * 100,
430
+ domain={'x': [0, 1], 'y': [0, 1]},
431
+ title={'text': f"Sentiment: {result['sentiment']}"},
432
+ delta={'reference': 50},
433
+ gauge={
434
+ 'axis': {'range': [None, 100]},
435
+ 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
436
+ 'steps': [
437
+ {'range': [0, 33], 'color': colors['neg']},
438
+ {'range': [33, 67], 'color': colors['neu']},
439
+ {'range': [67, 100], 'color': colors['pos']}
440
+ ],
441
+ 'threshold': {
442
+ 'line': {'color': "red", 'width': 4},
443
+ 'thickness': 0.75,
444
+ 'value': 90
445
+ }
446
+ }
447
+ ))
448
  else:
449
+ # Two-way gauge
450
+ fig = go.Figure(go.Indicator(
451
+ mode="gauge+number",
452
+ value=result['confidence'] * 100,
453
+ domain={'x': [0, 1], 'y': [0, 1]},
454
+ title={'text': f"Confidence: {result['sentiment']}"},
455
+ gauge={
456
+ 'axis': {'range': [None, 100]},
457
+ 'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
458
+ 'steps': [
459
+ {'range': [0, 50], 'color': "lightgray"},
460
+ {'range': [50, 100], 'color': "gray"}
461
+ ]
462
+ }
463
+ ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
 
465
+ fig.update_layout(height=400, font={'size': 16})
466
+ return fig
 
 
 
467
 
468
  except Exception as e:
469
+ logger.error(f"Failed to create gauge: {e}")
470
+ # ่ฟ”ๅ›ž้”™่ฏฏๅ›พ่กจ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  fig = go.Figure()
472
  fig.add_annotation(
473
+ text=f"Visualization Error: {str(e)}",
474
+ x=0.5, y=0.5,
475
  xref="paper", yref="paper",
476
  showarrow=False,
477
  font=dict(size=14)
478
  )
479
+ fig.update_layout(height=400)
480
  return fig
481
+
482
+ @staticmethod
483
+ def create_probability_bars(result: Dict, theme: str = 'default') -> go.Figure:
484
+ """Create probability bar chart"""
485
+ colors = config.THEMES.get(theme, config.THEMES['default'])
486
 
487
+ try:
488
+ if result.get('has_neutral', False):
489
+ labels = ['Negative', 'Neutral', 'Positive']
490
+ values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
491
+ bar_colors = [colors['neg'], colors['neu'], colors['pos']]
492
+ else:
493
+ labels = ['Negative', 'Positive']
494
+ values = [result['neg_prob'], result['pos_prob']]
495
+ bar_colors = [colors['neg'], colors['pos']]
496
+
497
+ fig = go.Figure(data=[
498
+ go.Bar(x=labels, y=values, marker_color=bar_colors,
499
+ text=[f'{v:.3f}' for v in values])
500
+ ])
501
+
502
+ fig.update_traces(texttemplate='%{text}', textposition='outside')
503
+ fig.update_layout(
504
+ title="Sentiment Probabilities",
505
+ yaxis_title="Probability",
506
+ height=400,
507
+ showlegend=False
508
+ )
509
+
510
+ return fig
511
+
512
+ except Exception as e:
513
+ logger.error(f"Failed to create bars: {e}")
514
  fig = go.Figure()
515
  fig.add_annotation(
516
+ text=f"Visualization Error: {str(e)}",
517
  x=0.5, y=0.5,
518
+ xref="paper", yref="paper",
519
  showarrow=False
520
  )
521
+ fig.update_layout(height=400)
522
  return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
 
524
  @staticmethod
525
  def create_batch_summary(results: List[Dict], theme: str = 'default') -> go.Figure:
526
  """Create batch analysis summary"""
527
+ colors = config.THEMES.get(theme, config.THEMES['default'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
 
529
+ try:
530
+ # Count sentiments
531
+ sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error']
532
+ if not sentiments:
533
+ fig = go.Figure()
534
+ fig.add_annotation(text="No valid results to display", x=0.5, y=0.5,
535
+ xref="paper", yref="paper", showarrow=False)
536
+ fig.update_layout(height=400, title="Batch Summary")
537
+ return fig
538
+
539
+ sentiment_counts = Counter(sentiments)
540
+
541
+ # Create pie chart
542
+ fig = go.Figure(data=[go.Pie(
543
+ labels=list(sentiment_counts.keys()),
544
+ values=list(sentiment_counts.values()),
545
+ marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
546
+ textinfo='label+percent',
547
+ hole=0.3
548
+ )])
549
+
550
+ fig.update_layout(
551
+ title=f"Batch Analysis Summary ({len(results)} texts)",
552
+ height=400
553
+ )
554
+
555
+ return fig
556
+
557
+ except Exception as e:
558
+ logger.error(f"Failed to create batch summary: {e}")
559
+ fig = go.Figure()
560
+ fig.add_annotation(text=f"Error: {str(e)}", x=0.5, y=0.5,
561
+ xref="paper", yref="paper", showarrow=False)
562
+ fig.update_layout(height=400)
563
+ return fig
564
 
565
  @staticmethod
566
  def create_confidence_distribution(results: List[Dict]) -> go.Figure:
567
  """Create confidence distribution plot"""
568
+ try:
569
+ confidences = [r['confidence'] for r in results
570
+ if 'confidence' in r and r['sentiment'] != 'Error']
571
+
572
+ if not confidences:
573
+ fig = go.Figure()
574
+ fig.add_annotation(text="No confidence data to display", x=0.5, y=0.5,
575
+ xref="paper", yref="paper", showarrow=False)
576
+ fig.update_layout(height=400, title="Confidence Distribution")
577
+ return fig
578
+
579
+ fig = go.Figure(data=[go.Histogram(
580
+ x=confidences,
581
+ nbinsx=20,
582
+ marker_color='skyblue',
583
+ opacity=0.7
584
+ )])
585
+
586
+ fig.update_layout(
587
+ title="Confidence Distribution",
588
+ xaxis_title="Confidence Score",
589
+ yaxis_title="Frequency",
590
+ height=400
591
+ )
592
+
593
+ return fig
594
+
595
+ except Exception as e:
596
+ logger.error(f"Failed to create confidence distribution: {e}")
597
+ fig = go.Figure()
598
+ fig.add_annotation(text=f"Error: {str(e)}", x=0.5, y=0.5,
599
+ xref="paper", yref="paper", showarrow=False)
600
+ fig.update_layout(height=400)
601
+ return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
 
603
+ # Main application functions with better error handling
604
  def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
605
  remove_punct: bool, remove_nums: bool):
606
+ """Enhanced single text analysis with error handling"""
607
  try:
608
  if not text.strip():
609
+ return "โŒ Please enter text to analyze", None, None
610
+
611
+ # ๅˆๅง‹ๅŒ–ๆฃ€ๆŸฅ
612
+ if not initialize_models():
613
+ return "โŒ Failed to load sentiment analysis models. Please check your internet connection and try again.", None, None
614
 
615
  # Map display names back to language codes
616
  language_map = {
 
630
  'remove_numbers': remove_nums
631
  }
632
 
633
+ # ๅˆ†ๆžๆ–‡ๆœฌ
634
  result = SentimentAnalyzer.analyze_text(text, language_code, preprocessing_options)
635
 
636
  # Add to history
 
649
  history_manager.add_entry(history_entry)
650
 
651
  # Create visualizations
652
+ gauge_fig = AdvancedVisualizer.create_sentiment_gauge(result, theme)
653
+ bars_fig = AdvancedVisualizer.create_probability_bars(result, theme)
654
 
655
  # Create info text
656
  info_text = f"""
657
+ โœ… **Analysis Results:**
658
+ - **Sentiment:** {result['sentiment']} (Confidence: {result['confidence']:.3f})
659
  - **Language:** {result['language'].upper()}
660
+ - **Keywords:** {', '.join(result['keywords']) if result['keywords'] else 'None'}
661
+ - **Text Stats:** {result['word_count']} words, {result['char_count']} characters
662
+
663
+ ๐Ÿ“Š **Probability Scores:**
664
+ - Positive: {result['pos_prob']:.3f}
665
+ - Negative: {result['neg_prob']:.3f}
666
+ - Neutral: {result.get('neu_prob', 0):.3f}
667
  """
668
 
669
  return info_text, gauge_fig, bars_fig
670
 
671
  except Exception as e:
672
+ logger.error(f"Single text analysis failed: {e}")
673
+ error_msg = f"โŒ **Analysis Failed:** {str(e)}\n\nPlease check your input and try again."
674
+ return error_msg, None, None
675
 
676
  def analyze_batch_texts(batch_text: str, language: str, theme: str,
677
  clean_text: bool, remove_punct: bool, remove_nums: bool):
678
+ """Batch text analysis with improved error handling"""
679
  try:
680
  if not batch_text.strip():
681
+ return "โŒ Please enter texts to analyze (one per line)", None, None, None
682
+
683
+ # ๅˆๅง‹ๅŒ–ๆฃ€ๆŸฅ
684
+ if not initialize_models():
685
+ return "โŒ Failed to load sentiment analysis models", None, None, None
686
 
687
  # Parse batch input
688
  texts = TextProcessor.parse_batch_input(batch_text)
689
 
690
  if len(texts) > config.BATCH_SIZE_LIMIT:
691
+ return f"โŒ Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
692
 
693
  if not texts:
694
+ return "โŒ No valid texts found", None, None, None
695
 
696
  # Map display names back to language codes
697
  language_map = {
 
736
  history_manager.add_batch_entries(batch_entries)
737
 
738
  # Create visualizations
739
+ summary_fig = AdvancedVisualizer.create_batch_summary(results, theme)
740
+ confidence_fig = AdvancedVisualizer.create_confidence_distribution(results)
741
 
742
  # Create results table
743
  df_data = []
 
747
  'Index': i+1,
748
  'Text': text[:50] + '...' if len(text) > 50 else text,
749
  'Sentiment': 'Error',
750
+ 'Confidence': '0.000',
751
  'Language': 'Unknown',
752
+ 'Error': result.get('error', 'Unknown error')
753
  })
754
  else:
755
  df_data.append({
 
758
  'Sentiment': result['sentiment'],
759
  'Confidence': f"{result['confidence']:.3f}",
760
  'Language': result['language'].upper(),
761
+ 'Keywords': ', '.join(result.get('keywords', [])[:3])
762
  })
763
 
764
  df = pd.DataFrame(df_data)
 
772
  avg_confidence = np.mean([r['confidence'] for r in successful_results])
773
 
774
  summary_text = f"""
775
+ โœ… **Batch Analysis Summary:**
776
  - **Total Texts:** {len(texts)}
777
  - **Successful:** {len(successful_results)}
778
  - **Errors:** {error_count}
779
  - **Average Confidence:** {avg_confidence:.3f}
780
+ - **Sentiment Distribution:**
781
+ - Positive: {sentiment_counts.get('Positive', 0)}
782
+ - Negative: {sentiment_counts.get('Negative', 0)}
783
+ - Neutral: {sentiment_counts.get('Neutral', 0)}
784
  """
785
  else:
786
+ summary_text = f"โŒ All {len(texts)} texts failed to analyze."
787
 
788
  return summary_text, df, summary_fig, confidence_fig
789
 
790
  except Exception as e:
791
  logger.error(f"Batch analysis failed: {e}")
792
+ return f"โŒ Error: {str(e)}", None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
793
 
794
  def get_history_stats():
795
  """Get enhanced history statistics"""
796
+ try:
797
+ stats = history_manager.get_stats()
798
+ if not stats:
799
+ return "๐Ÿ“Š No analysis history available yet. Analyze some texts to see statistics!"
800
+
801
+ return f"""
802
+ ๐Ÿ“Š **Comprehensive History Statistics:**
803
 
804
+ **๐Ÿ“ˆ Analysis Counts:**
805
  - Total Analyses: {stats['total_analyses']}
806
+ - ๐Ÿ˜Š Positive: {stats['positive_count']}
807
+ - ๐Ÿ˜ž Negative: {stats['negative_count']}
808
+ - ๐Ÿ˜ Neutral: {stats['neutral_count']}
809
 
810
+ **๐ŸŽฏ Confidence Metrics:**
811
  - Average Confidence: {stats['avg_confidence']:.3f}
812
  - Highest Confidence: {stats['max_confidence']:.3f}
813
  - Lowest Confidence: {stats['min_confidence']:.3f}
814
 
815
+ **๐ŸŒ Language Statistics:**
816
  - Languages Detected: {stats['languages_detected']}
817
  - Most Common Language: {stats['most_common_language'].upper()}
818
 
819
+ **๐Ÿ“ Text Statistics:**
820
  - Average Text Length: {stats['avg_text_length']:.1f} characters
821
+ """
822
+ except Exception as e:
823
+ logger.error(f"Failed to get history stats: {e}")
824
+ return f"โŒ Error getting statistics: {str(e)}"
825
 
826
  def filter_history_display(sentiment_filter: str, language_filter: str, min_confidence: float):
827
  """Display filtered history"""
828
+ try:
829
+ # Convert filters
830
+ sentiment = sentiment_filter if sentiment_filter != "All" else None
831
+ language = language_filter.lower() if language_filter != "All" else None
832
+
833
+ filtered_history = history_manager.filter_history(
834
+ sentiment=sentiment,
835
+ language=language,
836
+ min_confidence=min_confidence if min_confidence > 0 else None
837
+ )
838
+
839
+ if not filtered_history:
840
+ return "๐Ÿ” No entries match the filter criteria", None
841
+
842
+ # Create DataFrame for display
843
+ df_data = []
844
+ for entry in filtered_history[-20:]: # Show last 20 entries
845
+ df_data.append({
846
+ 'Timestamp': entry['timestamp'][:16], # YYYY-MM-DD HH:MM
847
+ 'Text': entry['text'],
848
+ 'Sentiment': entry['sentiment'],
849
+ 'Confidence': f"{entry['confidence']:.3f}",
850
+ 'Language': entry['language'].upper(),
851
+ 'Type': entry.get('analysis_type', 'single')
852
+ })
853
+
854
+ df = pd.DataFrame(df_data)
855
+
856
+ summary = f"""
857
+ ๐Ÿ” **Filtered Results:**
858
  - Found {len(filtered_history)} entries matching criteria
859
  - Showing most recent {min(20, len(filtered_history))} entries
860
+ """
861
+
862
+ return summary, df
863
+
864
+ except Exception as e:
865
+ logger.error(f"Failed to filter history: {e}")
866
+ return f"โŒ Error filtering history: {str(e)}", None
867
 
868
  def plot_history_dashboard():
869
+ """Create history dashboard with error handling"""
870
+ try:
871
+ history = history_manager.get_history()
872
+ if len(history) < 2:
873
+ return None, "๐Ÿ“Š Need at least 2 analyses to create dashboard. Analyze more texts!"
874
+
875
+ # Create subplots
876
+ fig = make_subplots(
877
+ rows=2, cols=2,
878
+ subplot_titles=['Sentiment Timeline', 'Confidence Distribution',
879
+ 'Language Distribution', 'Sentiment Summary'],
880
+ specs=[[{"secondary_y": False}, {"secondary_y": False}],
881
+ [{"type": "pie"}, {"type": "bar"}]]
882
+ )
883
+
884
+ # Extract data
885
+ indices = list(range(len(history)))
886
+ pos_probs = [item.get('pos_prob', 0) for item in history]
887
+ confidences = [item['confidence'] for item in history]
888
+ sentiments = [item['sentiment'] for item in history]
889
+ languages = [item.get('language', 'en') for item in history]
890
+
891
+ # Sentiment timeline
892
+ colors = ['#4CAF50' if s == 'Positive' else '#F44336' if s == 'Negative' else '#FF9800'
893
+ for s in sentiments]
894
+ fig.add_trace(
895
+ go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
896
+ marker=dict(color=colors, size=6),
897
+ name='Positive Probability'),
898
+ row=1, col=1
899
+ )
900
+
901
+ # Confidence distribution
902
+ fig.add_trace(
903
+ go.Histogram(x=confidences, nbinsx=10, name='Confidence'),
904
+ row=1, col=2
905
+ )
906
+
907
+ # Language distribution
908
+ lang_counts = Counter(languages)
909
+ fig.add_trace(
910
+ go.Pie(labels=list(lang_counts.keys()), values=list(lang_counts.values()),
911
+ name="Languages"),
912
+ row=2, col=1
913
+ )
914
+
915
+ # Sentiment summary
916
+ sent_counts = Counter(sentiments)
917
+ colors_dict = {'Positive': '#4CAF50', 'Negative': '#F44336', 'Neutral': '#FF9800'}
918
+ fig.add_trace(
919
+ go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
920
+ marker_color=[colors_dict.get(k, '#999999') for k in sent_counts.keys()]),
921
+ row=2, col=2
922
+ )
923
+
924
+ fig.update_layout(height=800, showlegend=False, title_text="Analysis Dashboard")
925
+ return fig, f"๐Ÿ“Š Dashboard showing {len(history)} analyses"
926
+
927
+ except Exception as e:
928
+ logger.error(f"Failed to create dashboard: {e}")
929
+ return None, f"โŒ Error creating dashboard: {str(e)}"
930
 
931
  def export_history_csv():
932
  """Export history to CSV"""
 
 
 
 
933
  try:
934
+ history = history_manager.get_history()
935
+ if not history:
936
+ return None, "๐Ÿ“Š No history to export"
937
+
938
  df = pd.DataFrame(history)
939
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w')
940
  df.to_csv(temp_file.name, index=False)
941
+ return temp_file.name, f"โœ… Exported {len(history)} entries to CSV"
942
  except Exception as e:
943
+ logger.error(f"CSV export failed: {e}")
944
+ return None, f"โŒ Export failed: {str(e)}"
945
 
946
  def export_history_excel():
947
  """Export history to Excel"""
 
 
 
 
948
  try:
949
+ history = history_manager.get_history()
950
+ if not history:
951
+ return None, "๐Ÿ“Š No history to export"
952
+
953
  df = pd.DataFrame(history)
954
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
955
  df.to_excel(temp_file.name, index=False)
956
+ return temp_file.name, f"โœ… Exported {len(history)} entries to Excel"
957
  except Exception as e:
958
+ logger.error(f"Excel export failed: {e}")
959
+ return None, f"โŒ Export failed: {str(e)}"
960
 
961
  def clear_all_history():
962
  """Clear analysis history"""
963
+ try:
964
+ count = history_manager.clear()
965
+ return f"๐Ÿ—‘๏ธ Cleared {count} entries from history"
966
+ except Exception as e:
967
+ logger.error(f"Failed to clear history: {e}")
968
+ return f"โŒ Error clearing history: {str(e)}"
969
 
970
  def get_recent_analyses():
971
  """Get recent analysis summary"""
972
+ try:
973
+ recent = history_manager.get_recent_history(10)
974
+ if not recent:
975
+ return "๐Ÿ“Š No recent analyses available. Start analyzing some texts!"
976
+
977
+ summary_text = "๐Ÿ•’ **Recent Analyses (Last 10):**\n\n"
978
+ for i, entry in enumerate(recent, 1):
979
+ emoji = "๐Ÿ˜Š" if entry['sentiment'] == 'Positive' else "๐Ÿ˜ž" if entry['sentiment'] == 'Negative' else "๐Ÿ˜"
980
+ summary_text += f"{i}. {emoji} **{entry['sentiment']}** ({entry['confidence']:.3f}) - {entry['text']}\n"
981
+
982
+ return summary_text
983
+ except Exception as e:
984
+ logger.error(f"Failed to get recent analyses: {e}")
985
+ return f"โŒ Error getting recent analyses: {str(e)}"
986
 
987
  # Sample data
988
  SAMPLE_TEXTS = [
 
1014
  Amazing quality and fast delivery!
1015
  Could be better, but it's okay."""
1016
 
1017
+ # Gradio Interface - ็ฎ€ๅŒ–็‰ˆๆœฌ๏ผŒไธ“ๆณจไบŽๆ ธๅฟƒๅŠŸ่ƒฝ
1018
+ with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
1019
+ gr.Markdown("# ๐ŸŽญ Multilingual Sentiment Analyzer")
1020
+ gr.Markdown("Comprehensive sentiment analysis with batch processing and multilingual support")
1021
 
1022
  with gr.Tab("๐Ÿ“ Single Analysis"):
1023
  with gr.Row():
 
1054
  )
1055
 
1056
  with gr.Column(scale=1):
1057
+ result_info = gr.Markdown("Enter text and click Analyze to see results")
1058
 
1059
  with gr.Row():
1060
  gauge_plot = gr.Plot(label="Sentiment Gauge")
1061
  bars_plot = gr.Plot(label="Probability Distribution")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1062
 
1063
  with gr.Tab("๐Ÿ“Š Batch Analysis"):
1064
  with gr.Row():
 
1095
  )
1096
 
1097
  with gr.Column(scale=1):
1098
+ batch_summary = gr.Markdown("Enter texts and click Analyze Batch to see results")
1099
 
1100
  with gr.Row():
1101
  batch_results_table = gr.DataFrame(
 
1173
  outputs=[batch_summary, batch_results_table, batch_summary_plot, batch_confidence_plot]
1174
  )
1175
 
 
 
 
 
 
 
 
1176
  # History & Analytics
1177
  stats_btn.click(
1178
  get_history_stats,
 
1210
  outputs=history_status
1211
  )
1212
 
1213
+ # ๅฏๅŠจๅบ”็”จ
1214
  if __name__ == "__main__":
1215
+ try:
1216
+ logger.info("Starting Multilingual Sentiment Analyzer...")
1217
+ demo.launch(
1218
+ share=True,
1219
+ server_name="0.0.0.0",
1220
+ server_port=7860,
1221
+ show_error=True
1222
+ )
1223
+ except Exception as e:
1224
+ logger.error(f"Failed to launch application: {e}")
1225
+ print(f"โŒ Application failed to start: {e}")
1226
+ print("Please check your dependencies and try again.")