entropy25 commited on
Commit
219103c
·
verified ·
1 Parent(s): 1ad3bd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +291 -480
app.py CHANGED
@@ -7,6 +7,14 @@ from plotly.subplots import make_subplots
7
  import numpy as np
8
  from wordcloud import WordCloud
9
  from collections import Counter, defaultdict
 
 
 
 
 
 
 
 
10
  from dataclasses import dataclass
11
  from typing import List, Dict, Optional, Tuple, Any, Callable
12
  from contextlib import contextmanager
@@ -16,7 +24,18 @@ import langdetect
16
  import pandas as pd
17
  import gc
18
 
 
 
 
 
 
19
  # Configuration
 
 
 
 
 
 
20
  CACHE_SIZE: int = 128
21
  BATCH_PROCESSING_SIZE: int = 8
22
 
@@ -35,7 +54,6 @@ import gc
35
  'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
36
  'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
37
  'zh': "uer/roberta-base-finetuned-dianping-chinese"
38
-
39
  }
40
 
41
  # Color themes for Plotly
@@ -63,19 +81,28 @@ except:
63
  # Decorators and Context Managers
64
  def handle_errors(default_return=None):
65
  """Centralized error handling decorator"""
 
 
 
 
 
 
 
 
 
66
  return decorator
67
 
68
  @contextmanager
69
  def memory_cleanup():
70
  """Context manager for memory cleanup"""
71
-
72
  try:
73
  yield
74
  finally:
75
-
76
  gc.collect()
77
 
78
  class ThemeContext:
 
 
79
  self.theme = theme
80
  self.colors = config.THEMES.get(theme, config.THEMES['default'])
81
 
@@ -83,9 +110,6 @@ class ThemeContext:
83
  class ModelManager:
84
  """Multi-language model manager with lazy loading"""
85
  _instance = None
86
-
87
-
88
-
89
 
90
  def __new__(cls):
91
  if cls._instance is None:
@@ -103,16 +127,6 @@ class ModelManager:
103
 
104
  def _load_default_models(self):
105
  """Load default models"""
106
-
107
-
108
-
109
-
110
-
111
-
112
-
113
-
114
-
115
-
116
  try:
117
  # Load multilingual model as default
118
  model_name = config.MODELS['multilingual']
@@ -241,6 +255,7 @@ class HistoryManager:
241
  def clear(self) -> int:
242
  count = len(self._history)
243
  self._history.clear()
 
244
 
245
  def size(self) -> int:
246
  return len(self._history)
@@ -266,93 +281,16 @@ class HistoryManager:
266
  'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en'
267
  }
268
 
269
- # Core Sentiment Analysis Engine
270
  class SentimentEngine:
271
  """Multi-language sentiment analysis engine"""
272
 
273
  def __init__(self):
274
  self.model_manager = ModelManager()
275
 
276
- def extract_attention_keywords(self, text: str, language: str = 'auto', top_k: int = 10) -> List[Tuple[str, float]]:
277
- """Extract keywords using attention weights"""
278
- try:
279
- if language == 'auto':
280
- language = self.model_manager.detect_language(text)
281
-
282
- model, tokenizer = self.model_manager.get_model(language)
283
-
284
- inputs = tokenizer(
285
- text, return_tensors="pt", padding=True,
286
- truncation=True, max_length=config.MAX_TEXT_LENGTH
287
- ).to(self.model_manager.device)
288
-
289
-
290
- with torch.no_grad():
291
- outputs = model(**inputs, output_attentions=True)
292
-
293
-
294
- if hasattr(outputs, 'attentions') and outputs.attentions:
295
- # Use attention weights
296
- attention = outputs.attentions[-1]
297
- avg_attention = attention.mean(dim=1)[0, 0, :]
298
-
299
- tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
300
- attention_scores = avg_attention.cpu().numpy()
301
-
302
- # Process tokens and scores
303
- word_scores = {}
304
- current_word = ""
305
- current_score = 0.0
306
-
307
- for token, score in zip(tokens, attention_scores):
308
- if token in ['[CLS]', '[SEP]', '[PAD]', '<s>', '</s>']:
309
- continue
310
-
311
- if token.startswith('##') or token.startswith('▁'):
312
- current_word += token.replace('##', '').replace('▁', '')
313
- current_score = max(current_score, score)
314
- else:
315
- if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
316
- word_scores[current_word.lower()] = current_score
317
- current_word = token
318
- current_score = score
319
-
320
-
321
-
322
-
323
-
324
-
325
-
326
- if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
327
- word_scores[current_word.lower()] = current_score
328
-
329
- # Filter and sort
330
- filtered_words = {
331
- word: score for word, score in word_scores.items()
332
- if word not in STOP_WORDS and len(word) >= config.MIN_WORD_LENGTH
333
- }
334
-
335
- sorted_words = sorted(filtered_words.items(), key=lambda x: x[1], reverse=True)
336
- return sorted_words[:top_k]
337
-
338
-
339
-
340
-
341
-
342
-
343
-
344
-
345
-
346
- except Exception as e:
347
- logger.error(f"Attention keyword extraction failed: {e}")
348
-
349
- # Fallback to simple keyword extraction
350
- keywords = TextProcessor.extract_keywords(text, top_k)
351
- return [(word, 0.1) for word in keywords]
352
-
353
  @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'keywords': []})
354
  def analyze_single(self, text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
355
- """Analyze single text with enhanced features"""
356
  if not text.strip():
357
  raise ValueError("Empty text provided")
358
 
@@ -378,7 +316,6 @@ class SentimentEngine:
378
  # Tokenize and analyze
379
  inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
380
  truncation=True, max_length=config.MAX_TEXT_LENGTH).to(self.model_manager.device)
381
-
382
 
383
  with torch.no_grad():
384
  outputs = model(**inputs)
@@ -413,13 +350,14 @@ class SentimentEngine:
413
  'has_neutral': False
414
  }
415
 
416
- # Extract keywords
417
- keywords = self.extract_attention_keywords(text, detected_lang)
 
418
 
419
  # Add metadata
420
  result.update({
421
  'language': detected_lang,
422
- 'keywords': keywords,
423
  'word_count': len(text.split()),
424
  'char_count': len(text)
425
  })
@@ -433,6 +371,12 @@ class SentimentEngine:
433
  if len(texts) > config.BATCH_SIZE_LIMIT:
434
  texts = texts[:config.BATCH_SIZE_LIMIT]
435
 
 
 
 
 
 
 
436
  if progress_callback:
437
  progress_callback((i + len(batch)) / len(texts))
438
 
@@ -452,17 +396,191 @@ class SentimentEngine:
452
  'text': text[:100] + '...' if len(text) > 100 else text,
453
  'full_text': text
454
  })
455
-
456
-
457
-
458
-
459
-
460
-
461
-
462
 
463
  return results
464
 
465
- # Advanced Plotly Visualization System
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  class PlotlyVisualizer:
467
  """Enhanced Plotly visualizations"""
468
 
@@ -547,31 +665,12 @@ class PlotlyVisualizer:
547
  @staticmethod
548
  @handle_errors(default_return=None)
549
  def create_keyword_chart(keywords: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> go.Figure:
550
- """Create keyword importance chart"""
551
  if not keywords:
552
  fig = go.Figure()
553
  fig.add_annotation(text="No keywords extracted",
554
  xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
555
  fig.update_layout(height=400, title="Keywords")
556
-
557
-
558
-
559
-
560
-
561
-
562
-
563
-
564
-
565
-
566
-
567
-
568
-
569
-
570
-
571
-
572
-
573
-
574
-
575
  return fig
576
 
577
  words = [word for word, score in keywords]
@@ -592,7 +691,7 @@ class PlotlyVisualizer:
592
 
593
  fig.update_layout(
594
  title=f"Top Keywords ({sentiment})",
595
- xaxis_title="Attention Weight",
596
  yaxis_title="Keywords",
597
  height=400,
598
  showlegend=False
@@ -625,14 +724,6 @@ class PlotlyVisualizer:
625
  )
626
 
627
  return fig
628
-
629
-
630
-
631
-
632
-
633
-
634
-
635
-
636
 
637
  @staticmethod
638
  @handle_errors(default_return=None)
@@ -730,6 +821,8 @@ class DataHandler:
730
  if not data:
731
  return None, "No data to export"
732
 
 
 
733
 
734
  if format_type == 'csv':
735
  writer = csv.writer(temp_file)
@@ -751,10 +844,10 @@ class DataHandler:
751
  ])
752
  elif format_type == 'json':
753
  json.dump(data, temp_file, indent=2, ensure_ascii=False)
 
754
  temp_file.close()
755
  return temp_file.name, f"Exported {len(data)} entries"
756
 
757
-
758
  @staticmethod
759
  @handle_errors(default_return="")
760
  def process_file(file) -> str:
@@ -765,7 +858,6 @@ class DataHandler:
765
  content = file.read().decode('utf-8')
766
 
767
  if file.name.endswith('.csv'):
768
-
769
  csv_file = io.StringIO(content)
770
  reader = csv.reader(csv_file)
771
  try:
@@ -782,6 +874,7 @@ class DataHandler:
782
  texts = []
783
  for line in lines:
784
  if line.strip():
 
785
  if text:
786
  texts.append(text)
787
  return '\n'.join(texts)
@@ -794,6 +887,7 @@ class SentimentApp:
794
 
795
  def __init__(self):
796
  self.engine = SentimentEngine()
 
797
  self.history = HistoryManager()
798
  self.data_handler = DataHandler()
799
 
@@ -805,12 +899,11 @@ class SentimentApp:
805
  ["Esta película fue increíble, me encantó la cinematografía."], # Spanish
806
  ["Ce film était magnifique, j'ai adoré la réalisation."], # French
807
  ]
808
-
809
 
810
  @handle_errors(default_return=("Please enter text", None, None, None))
811
  def analyze_single(self, text: str, language: str, theme: str, clean_text: bool,
812
  remove_punct: bool, remove_nums: bool):
813
- """Single text analysis with enhanced visualizations"""
814
  if not text.strip():
815
  return "Please enter text", None, None, None
816
 
@@ -966,6 +1059,23 @@ class SentimentApp:
966
 
967
  return summary_text, df, summary_fig, confidence_fig
968
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
969
  @handle_errors(default_return=(None, "No history available"))
970
  def plot_history(self, theme: str = 'default'):
971
  """Plot comprehensive history analysis"""
@@ -973,9 +1083,7 @@ class SentimentApp:
973
  if len(history) < 2:
974
  return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
975
 
976
-
977
  theme_ctx = ThemeContext(theme)
978
-
979
 
980
  with memory_cleanup():
981
  fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx)
@@ -1012,9 +1120,9 @@ class SentimentApp:
1012
  - **Languages Detected:** {stats['languages_detected']}
1013
  """
1014
 
1015
- # Gradio Interface
1016
  def create_interface():
1017
- """Create comprehensive Gradio interface"""
1018
  app = SentimentApp()
1019
 
1020
  with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
@@ -1063,351 +1171,42 @@ def create_interface():
1063
  probability_plot = gr.Plot(label="Probability Distribution")
1064
 
1065
  with gr.Row():
1066
- keyword_plot = gr.Plot(label="Key Contributing Words")
1067
 
1068
- with gr.Tab("Batch Analysis"):
1069
- with gr.Row():
1070
- with gr.Column():
1071
- file_upload = gr.File(
1072
- label="Upload File (CSV/TXT)",
1073
- file_types=[".csv", ".txt"]
1074
- )
1075
- batch_input = gr.Textbox(
1076
- label="Batch Input (one text per line)",
1077
- placeholder="Enter multiple texts, one per line...",
1078
- lines=10
1079
- )
1080
-
1081
- with gr.Row():
1082
- batch_language = gr.Dropdown(
1083
- choices=list(config.SUPPORTED_LANGUAGES.values()),
1084
- value="Auto Detect",
1085
- label="Language"
1086
- )
1087
- batch_theme = gr.Dropdown(
1088
- choices=list(config.THEMES.keys()),
1089
- value="default",
1090
- label="Theme"
1091
- )
1092
-
1093
- with gr.Row():
1094
- batch_clean_cb = gr.Checkbox(label="Clean Text", value=False)
1095
- batch_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
1096
- batch_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
1097
-
1098
- with gr.Row():
1099
- load_file_btn = gr.Button("Load File")
1100
- analyze_batch_btn = gr.Button("Analyze Batch", variant="primary")
1101
-
1102
- with gr.Column():
1103
- batch_summary = gr.Textbox(label="Batch Summary", lines=8)
1104
- batch_results_df = gr.Dataframe(
1105
- label="Detailed Results",
1106
- headers=["Index", "Text", "Sentiment", "Confidence", "Language", "Keywords"],
1107
- datatype=["number", "str", "str", "str", "str", "str"]
1108
- )
1109
 
1110
- with gr.Row():
1111
- batch_plot = gr.Plot(label="Batch Analysis Summary")
1112
- confidence_dist_plot = gr.Plot(label="Confidence Distribution")
1113
-
1114
- with gr.Tab("History & Analytics"):
1115
  with gr.Row():
1116
  with gr.Column():
1117
- with gr.Row():
1118
- refresh_history_btn = gr.Button("Refresh History")
1119
- clear_history_btn = gr.Button("Clear History", variant="stop")
1120
- status_btn = gr.Button("Get Status")
1121
-
1122
- history_theme = gr.Dropdown(
1123
- choices=list(config.THEMES.keys()),
1124
- value="default",
1125
- label="Dashboard Theme"
1126
  )
1127
 
1128
- with gr.Row():
1129
- export_csv_btn = gr.Button("Export CSV")
1130
- export_json_btn = gr.Button("Export JSON")
1131
-
1132
- with gr.Column():
1133
- history_status = gr.Textbox(label="History Status", lines=8)
1134
-
1135
- history_dashboard = gr.Plot(label="History Analytics Dashboard")
1136
-
1137
- with gr.Row():
1138
- csv_download = gr.File(label="CSV Download", visible=True)
1139
- json_download = gr.File(label="JSON Download", visible=True)
1140
-
1141
- # Event Handlers
1142
- analyze_btn.click(
1143
- app.analyze_single,
1144
- inputs=[text_input, language_selector, theme_selector,
1145
- clean_text_cb, remove_punct_cb, remove_nums_cb],
1146
- outputs=[result_output, gauge_plot, probability_plot, keyword_plot]
1147
- )
1148
-
1149
- load_file_btn.click(
1150
- app.data_handler.process_file,
1151
- inputs=file_upload,
1152
- outputs=batch_input
1153
- )
1154
-
1155
- analyze_batch_btn.click(
1156
- app.analyze_batch,
1157
- inputs=[batch_input, batch_language, batch_theme,
1158
- batch_clean_cb, batch_punct_cb, batch_nums_cb],
1159
- outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot]
1160
- )
1161
-
1162
- refresh_history_btn.click(
1163
- app.plot_history,
1164
- inputs=history_theme,
1165
- outputs=[history_dashboard, history_status]
1166
- )
1167
-
1168
- clear_history_btn.click(
1169
- lambda: f"Cleared {app.history.clear()} entries",
1170
- outputs=history_status
1171
- )
1172
-
1173
- status_btn.click(
1174
- app.get_history_status,
1175
- outputs=history_status
1176
- )
1177
-
1178
- export_csv_btn.click(
1179
- lambda: app.data_handler.export_data(app.history.get_all(), 'csv'),
1180
- outputs=[csv_download, history_status]
1181
- )
1182
-
1183
- export_json_btn.click(
1184
- lambda: app.data_handler.export_data(app.history.get_all(), 'json'),
1185
- outputs=[json_download, history_status]
1186
- )
1187
-
1188
- return demo
1189
-
1190
- # Application Entry Point
1191
- if __name__ == "__main__":
1192
- logging.basicConfig(
1193
- level=logging.INFO,
1194
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
1195
- )
1196
-
1197
- try:
1198
- demo = create_interface()
1199
- demo.launch(
1200
- share=True,
1201
- server_name="0.0.0.0",
1202
- server_port=7860,
1203
- show_error=True
1204
- )
1205
- except Exception as e:
1206
- logger.error(f"Failed to launch application: {e}")
1207
- raise
1208
-
1209
- @handle_errors(default_return=("Please enter texts", None, None, None))
1210
- def analyze_batch(self, batch_text: str, language: str, theme: str,
1211
- clean_text: bool, remove_punct: bool, remove_nums: bool):
1212
- """Enhanced batch analysis"""
1213
- if not batch_text.strip():
1214
- return "Please enter texts (one per line)", None, None, None
1215
-
1216
- # Parse batch input
1217
- texts = TextProcessor.parse_batch_input(batch_text)
1218
-
1219
- if len(texts) > config.BATCH_SIZE_LIMIT:
1220
- return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
1221
-
1222
- if not texts:
1223
- return "No valid texts found", None, None, None
1224
-
1225
-
1226
- # Map display names to language codes
1227
- language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
1228
- language_code = language_map.get(language, 'auto')
1229
-
1230
- preprocessing_options = {
1231
- 'clean_text': clean_text,
1232
- 'remove_punctuation': remove_punct,
1233
- 'remove_numbers': remove_nums
1234
- }
1235
-
1236
- with memory_cleanup():
1237
- results = self.engine.analyze_batch(texts, language_code, preprocessing_options)
1238
-
1239
- # Add to history
1240
- batch_entries = []
1241
- for result in results:
1242
- if 'error' not in result:
1243
- entry = {
1244
- 'text': result['text'],
1245
- 'full_text': result['full_text'],
1246
- 'sentiment': result['sentiment'],
1247
- 'confidence': result['confidence'],
1248
- 'pos_prob': result.get('pos_prob', 0),
1249
- 'neg_prob': result.get('neg_prob', 0),
1250
- 'neu_prob': result.get('neu_prob', 0),
1251
- 'language': result['language'],
1252
- 'keywords': result['keywords'],
1253
- 'word_count': result['word_count'],
1254
- 'analysis_type': 'batch',
1255
- 'batch_index': result['batch_index']
1256
- }
1257
- batch_entries.append(entry)
1258
-
1259
- self.history.add_batch(batch_entries)
1260
-
1261
- # Create visualizations
1262
- theme_ctx = ThemeContext(theme)
1263
- summary_fig = PlotlyVisualizer.create_batch_summary(results, theme_ctx)
1264
- confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
1265
-
1266
- # Create results DataFrame
1267
- df_data = []
1268
- for result in results:
1269
- if 'error' in result:
1270
- df_data.append({
1271
- 'Index': result['batch_index'] + 1,
1272
- 'Text': result['text'],
1273
- 'Sentiment': 'Error',
1274
- 'Confidence': 0.0,
1275
- 'Language': 'Unknown',
1276
- 'Error': result['error']
1277
- })
1278
- else:
1279
- keywords_str = ', '.join([word for word, _ in result['keywords'][:3]])
1280
- df_data.append({
1281
- 'Index': result['batch_index'] + 1,
1282
- 'Text': result['text'],
1283
- 'Sentiment': result['sentiment'],
1284
- 'Confidence': f"{result['confidence']:.3f}",
1285
- 'Language': result['language'].upper(),
1286
- 'Keywords': keywords_str
1287
- })
1288
-
1289
- df = pd.DataFrame(df_data)
1290
-
1291
- # Create summary text
1292
- successful_results = [r for r in results if 'error' not in r]
1293
- error_count = len(results) - len(successful_results)
1294
-
1295
- if successful_results:
1296
- sentiment_counts = Counter([r['sentiment'] for r in successful_results])
1297
- avg_confidence = np.mean([r['confidence'] for r in successful_results])
1298
- languages = Counter([r['language'] for r in successful_results])
1299
-
1300
- summary_text = f"""
1301
- **Batch Analysis Summary:**
1302
- - **Total Texts:** {len(texts)}
1303
- - **Successful:** {len(successful_results)}
1304
- - **Errors:** {error_count}
1305
- - **Average Confidence:** {avg_confidence:.3f}
1306
- - **Sentiments:** {dict(sentiment_counts)}
1307
- - **Languages Detected:** {dict(languages)}
1308
- """
1309
- else:
1310
- summary_text = f"All {len(texts)} texts failed to analyze."
1311
-
1312
- return summary_text, df, summary_fig, confidence_fig
1313
-
1314
- @handle_errors(default_return=(None, "No history available"))
1315
- def plot_history(self, theme: str = 'default'):
1316
- """Plot comprehensive history analysis"""
1317
- history = self.history.get_all()
1318
- if len(history) < 2:
1319
- return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
1320
-
1321
- theme_ctx = ThemeContext(theme)
1322
-
1323
- with memory_cleanup():
1324
- fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx)
1325
- stats = self.history.get_stats()
1326
-
1327
- stats_text = f"""
1328
- **History Statistics:**
1329
- - **Total Analyses:** {stats.get('total_analyses', 0)}
1330
- - **Positive:** {stats.get('positive_count', 0)}
1331
- - **Negative:** {stats.get('negative_count', 0)}
1332
- - **Neutral:** {stats.get('neutral_count', 0)}
1333
- - **Average Confidence:** {stats.get('avg_confidence', 0):.3f}
1334
- - **Languages:** {stats.get('languages_detected', 0)}
1335
- - **Most Common Language:** {stats.get('most_common_language', 'N/A').upper()}
1336
- """
1337
-
1338
- return fig, stats_text
1339
-
1340
- @handle_errors(default_return=("No data available",))
1341
- def get_history_status(self):
1342
- """Get current history status"""
1343
- stats = self.history.get_stats()
1344
- if not stats:
1345
- return "No analyses performed yet"
1346
-
1347
- return f"""
1348
- **Current Status:**
1349
- - **Total Analyses:** {stats['total_analyses']}
1350
- - **Recent Sentiment Distribution:**
1351
- * Positive: {stats['positive_count']}
1352
- * Negative: {stats['negative_count']}
1353
- * Neutral: {stats['neutral_count']}
1354
- - **Average Confidence:** {stats['avg_confidence']:.3f}
1355
- - **Languages Detected:** {stats['languages_detected']}
1356
- """
1357
-
1358
- # Gradio Interface
1359
- def create_interface():
1360
- """Create comprehensive Gradio interface"""
1361
- app = SentimentApp()
1362
-
1363
- with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
1364
- gr.Markdown("# 🌍 Advanced Multilingual Sentiment Analyzer")
1365
- gr.Markdown("AI-powered sentiment analysis with support for multiple languages, advanced visualizations, and explainable AI features")
1366
-
1367
- with gr.Tab("Single Analysis"):
1368
- with gr.Row():
1369
- with gr.Column():
1370
- text_input = gr.Textbox(
1371
- label="Enter Text for Analysis",
1372
- placeholder="Enter your text in any supported language...",
1373
- lines=5
1374
  )
1375
 
1376
  with gr.Row():
1377
- language_selector = gr.Dropdown(
1378
- choices=list(config.SUPPORTED_LANGUAGES.values()),
1379
- value="Auto Detect",
1380
- label="Language"
1381
- )
1382
- theme_selector = gr.Dropdown(
1383
- choices=list(config.THEMES.keys()),
1384
- value="default",
1385
- label="Theme"
1386
- )
1387
-
1388
- with gr.Row():
1389
- clean_text_cb = gr.Checkbox(label="Clean Text", value=False)
1390
- remove_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
1391
- remove_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
1392
 
1393
- analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
1394
-
1395
- gr.Examples(
1396
- examples=app.examples,
1397
- inputs=text_input,
1398
- cache_examples=False
1399
- )
1400
 
1401
  with gr.Column():
1402
- result_output = gr.Textbox(label="Analysis Results", lines=8)
1403
 
1404
  with gr.Row():
1405
- gauge_plot = gr.Plot(label="Sentiment Gauge")
1406
- probability_plot = gr.Plot(label="Probability Distribution")
1407
-
1408
- with gr.Row():
1409
-
1410
- keyword_plot = gr.Plot(label="Key Contributing Words")
1411
 
1412
  with gr.Tab("Batch Analysis"):
1413
  with gr.Row():
@@ -1481,13 +1280,10 @@ def create_interface():
1481
  with gr.Row():
1482
  csv_download = gr.File(label="CSV Download", visible=True)
1483
  json_download = gr.File(label="JSON Download", visible=True)
1484
-
1485
-
1486
-
1487
-
1488
-
1489
 
1490
  # Event Handlers
 
 
1491
  analyze_btn.click(
1492
  app.analyze_single,
1493
  inputs=[text_input, language_selector, theme_selector,
@@ -1495,6 +1291,20 @@ def create_interface():
1495
  outputs=[result_output, gauge_plot, probability_plot, keyword_plot]
1496
  )
1497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1498
  load_file_btn.click(
1499
  app.data_handler.process_file,
1500
  inputs=file_upload,
@@ -1508,6 +1318,7 @@ def create_interface():
1508
  outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot]
1509
  )
1510
 
 
1511
  refresh_history_btn.click(
1512
  app.plot_history,
1513
  inputs=history_theme,
 
7
  import numpy as np
8
  from wordcloud import WordCloud
9
  from collections import Counter, defaultdict
10
+ import re
11
+ import json
12
+ import csv
13
+ import io
14
+ import tempfile
15
+ from datetime import datetime
16
+ import logging
17
+ from functools import lru_cache, wraps
18
  from dataclasses import dataclass
19
  from typing import List, Dict, Optional, Tuple, Any, Callable
20
  from contextlib import contextmanager
 
24
  import pandas as pd
25
  import gc
26
 
27
+ # Advanced analysis imports
28
+ import shap
29
+ import lime
30
+ from lime.lime_text import LimeTextExplainer
31
+
32
  # Configuration
33
+ @dataclass
34
+ class Config:
35
+ MAX_HISTORY_SIZE: int = 1000
36
+ BATCH_SIZE_LIMIT: int = 50
37
+ MAX_TEXT_LENGTH: int = 512
38
+ MIN_WORD_LENGTH: int = 2
39
  CACHE_SIZE: int = 128
40
  BATCH_PROCESSING_SIZE: int = 8
41
 
 
54
  'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
55
  'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
56
  'zh': "uer/roberta-base-finetuned-dianping-chinese"
 
57
  }
58
 
59
  # Color themes for Plotly
 
81
  # Decorators and Context Managers
82
  def handle_errors(default_return=None):
83
  """Centralized error handling decorator"""
84
+ def decorator(func: Callable) -> Callable:
85
+ @wraps(func)
86
+ def wrapper(*args, **kwargs):
87
+ try:
88
+ return func(*args, **kwargs)
89
+ except Exception as e:
90
+ logger.error(f"{func.__name__} failed: {e}")
91
+ return default_return if default_return is not None else f"Error: {str(e)}"
92
+ return wrapper
93
  return decorator
94
 
95
  @contextmanager
96
  def memory_cleanup():
97
  """Context manager for memory cleanup"""
 
98
  try:
99
  yield
100
  finally:
 
101
  gc.collect()
102
 
103
  class ThemeContext:
104
+ """Theme management context"""
105
+ def __init__(self, theme: str = 'default'):
106
  self.theme = theme
107
  self.colors = config.THEMES.get(theme, config.THEMES['default'])
108
 
 
110
  class ModelManager:
111
  """Multi-language model manager with lazy loading"""
112
  _instance = None
 
 
 
113
 
114
  def __new__(cls):
115
  if cls._instance is None:
 
127
 
128
  def _load_default_models(self):
129
  """Load default models"""
 
 
 
 
 
 
 
 
 
 
130
  try:
131
  # Load multilingual model as default
132
  model_name = config.MODELS['multilingual']
 
255
  def clear(self) -> int:
256
  count = len(self._history)
257
  self._history.clear()
258
+ return count
259
 
260
  def size(self) -> int:
261
  return len(self._history)
 
281
  'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en'
282
  }
283
 
284
+ # Core Sentiment Analysis Engine (Modified - removed attention analysis)
285
  class SentimentEngine:
286
  """Multi-language sentiment analysis engine"""
287
 
288
  def __init__(self):
289
  self.model_manager = ModelManager()
290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'keywords': []})
292
  def analyze_single(self, text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
293
+ """Analyze single text with basic features"""
294
  if not text.strip():
295
  raise ValueError("Empty text provided")
296
 
 
316
  # Tokenize and analyze
317
  inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
318
  truncation=True, max_length=config.MAX_TEXT_LENGTH).to(self.model_manager.device)
 
319
 
320
  with torch.no_grad():
321
  outputs = model(**inputs)
 
350
  'has_neutral': False
351
  }
352
 
353
+ # Extract basic keywords
354
+ keywords = TextProcessor.extract_keywords(text, 10)
355
+ keyword_tuples = [(word, 0.1) for word in keywords] # Simple keyword extraction
356
 
357
  # Add metadata
358
  result.update({
359
  'language': detected_lang,
360
+ 'keywords': keyword_tuples,
361
  'word_count': len(text.split()),
362
  'char_count': len(text)
363
  })
 
371
  if len(texts) > config.BATCH_SIZE_LIMIT:
372
  texts = texts[:config.BATCH_SIZE_LIMIT]
373
 
374
+ results = []
375
+ batch_size = config.BATCH_PROCESSING_SIZE
376
+
377
+ for i in range(0, len(texts), batch_size):
378
+ batch = texts[i:i+batch_size]
379
+
380
  if progress_callback:
381
  progress_callback((i + len(batch)) / len(texts))
382
 
 
396
  'text': text[:100] + '...' if len(text) > 100 else text,
397
  'full_text': text
398
  })
 
 
 
 
 
 
 
399
 
400
  return results
401
 
402
+ # Advanced Analysis Engine (NEW)
403
+ class AdvancedAnalysisEngine:
404
+ """Advanced analysis using SHAP and LIME"""
405
+
406
+ def __init__(self):
407
+ self.model_manager = ModelManager()
408
+
409
+ def create_prediction_function(self, model, tokenizer, device):
410
+ """Create prediction function for LIME/SHAP"""
411
+ def predict_proba(texts):
412
+ results = []
413
+ for text in texts:
414
+ inputs = tokenizer(text, return_tensors="pt", padding=True,
415
+ truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
416
+ with torch.no_grad():
417
+ outputs = model(**inputs)
418
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
419
+ results.append(probs)
420
+ return np.array(results)
421
+ return predict_proba
422
+
423
+ @handle_errors(default_return=("Analysis failed", None, None))
424
+ def analyze_with_shap(self, text: str, language: str = 'auto') -> Tuple[str, go.Figure, Dict]:
425
+ """Perform SHAP analysis"""
426
+ if not text.strip():
427
+ return "Please enter text for analysis", None, {}
428
+
429
+ # Detect language and get model
430
+ if language == 'auto':
431
+ detected_lang = self.model_manager.detect_language(text)
432
+ else:
433
+ detected_lang = language
434
+
435
+ model, tokenizer = self.model_manager.get_model(detected_lang)
436
+
437
+ # Create prediction function
438
+ predict_fn = self.create_prediction_function(model, tokenizer, self.model_manager.device)
439
+
440
+ try:
441
+ # Initialize SHAP explainer
442
+ explainer = shap.Explainer(predict_fn, tokenizer)
443
+
444
+ # Get SHAP values
445
+ shap_values = explainer([text])
446
+
447
+ # Extract token importance
448
+ tokens = shap_values.data[0]
449
+ values = shap_values.values[0]
450
+
451
+ # Create visualization data
452
+ if len(values.shape) > 1:
453
+ # Multi-class case
454
+ pos_values = values[:, -1] if values.shape[1] == 3 else values[:, 1]
455
+ else:
456
+ pos_values = values
457
+
458
+ # Create SHAP plot
459
+ fig = go.Figure()
460
+
461
+ colors = ['red' if v < 0 else 'green' for v in pos_values]
462
+
463
+ fig.add_trace(go.Bar(
464
+ x=list(range(len(tokens))),
465
+ y=pos_values,
466
+ text=tokens,
467
+ textposition='outside',
468
+ marker_color=colors,
469
+ name='SHAP Values'
470
+ ))
471
+
472
+ fig.update_layout(
473
+ title="SHAP Analysis - Token Importance",
474
+ xaxis_title="Token Index",
475
+ yaxis_title="SHAP Value",
476
+ height=500,
477
+ xaxis=dict(tickmode='array', tickvals=list(range(len(tokens))), ticktext=tokens)
478
+ )
479
+
480
+ # Create analysis summary
481
+ analysis_data = {
482
+ 'method': 'SHAP',
483
+ 'language': detected_lang,
484
+ 'total_tokens': len(tokens),
485
+ 'positive_influence': sum(1 for v in pos_values if v > 0),
486
+ 'negative_influence': sum(1 for v in pos_values if v < 0),
487
+ 'most_important_tokens': [(tokens[i], float(pos_values[i]))
488
+ for i in np.argsort(np.abs(pos_values))[-5:]]
489
+ }
490
+
491
+ summary_text = f"""
492
+ **SHAP Analysis Results:**
493
+ - **Language:** {detected_lang.upper()}
494
+ - **Total Tokens:** {analysis_data['total_tokens']}
495
+ - **Positive Influence Tokens:** {analysis_data['positive_influence']}
496
+ - **Negative Influence Tokens:** {analysis_data['negative_influence']}
497
+ - **Most Important Tokens:** {', '.join([f"{token}({score:.3f})" for token, score in analysis_data['most_important_tokens']])}
498
+ """
499
+
500
+ return summary_text, fig, analysis_data
501
+
502
+ except Exception as e:
503
+ logger.error(f"SHAP analysis failed: {e}")
504
+ return f"SHAP analysis failed: {str(e)}", None, {}
505
+
506
+ @handle_errors(default_return=("Analysis failed", None, None))
507
+ def analyze_with_lime(self, text: str, language: str = 'auto') -> Tuple[str, go.Figure, Dict]:
508
+ """Perform LIME analysis"""
509
+ if not text.strip():
510
+ return "Please enter text for analysis", None, {}
511
+
512
+ # Detect language and get model
513
+ if language == 'auto':
514
+ detected_lang = self.model_manager.detect_language(text)
515
+ else:
516
+ detected_lang = language
517
+
518
+ model, tokenizer = self.model_manager.get_model(detected_lang)
519
+
520
+ # Create prediction function
521
+ predict_fn = self.create_prediction_function(model, tokenizer, self.model_manager.device)
522
+
523
+ try:
524
+ # Initialize LIME explainer
525
+ explainer = LimeTextExplainer(class_names=['Negative', 'Neutral', 'Positive'])
526
+
527
+ # Get LIME explanation
528
+ exp = explainer.explain_instance(text, predict_fn, num_features=20)
529
+
530
+ # Extract feature importance
531
+ lime_data = exp.as_list()
532
+
533
+ # Create visualization
534
+ words = [item[0] for item in lime_data]
535
+ scores = [item[1] for item in lime_data]
536
+
537
+ fig = go.Figure()
538
+
539
+ colors = ['red' if s < 0 else 'green' for s in scores]
540
+
541
+ fig.add_trace(go.Bar(
542
+ y=words,
543
+ x=scores,
544
+ orientation='h',
545
+ marker_color=colors,
546
+ text=[f'{s:.3f}' for s in scores],
547
+ textposition='auto',
548
+ name='LIME Importance'
549
+ ))
550
+
551
+ fig.update_layout(
552
+ title="LIME Analysis - Feature Importance",
553
+ xaxis_title="Importance Score",
554
+ yaxis_title="Words/Phrases",
555
+ height=500
556
+ )
557
+
558
+ # Create analysis summary
559
+ analysis_data = {
560
+ 'method': 'LIME',
561
+ 'language': detected_lang,
562
+ 'features_analyzed': len(lime_data),
563
+ 'positive_features': sum(1 for _, score in lime_data if score > 0),
564
+ 'negative_features': sum(1 for _, score in lime_data if score < 0),
565
+ 'feature_importance': lime_data
566
+ }
567
+
568
+ summary_text = f"""
569
+ **LIME Analysis Results:**
570
+ - **Language:** {detected_lang.upper()}
571
+ - **Features Analyzed:** {analysis_data['features_analyzed']}
572
+ - **Positive Features:** {analysis_data['positive_features']}
573
+ - **Negative Features:** {analysis_data['negative_features']}
574
+ - **Top Features:** {', '.join([f"{word}({score:.3f})" for word, score in lime_data[:5]])}
575
+ """
576
+
577
+ return summary_text, fig, analysis_data
578
+
579
+ except Exception as e:
580
+ logger.error(f"LIME analysis failed: {e}")
581
+ return f"LIME analysis failed: {str(e)}", None, {}
582
+
583
+ # Advanced Plotly Visualization System (Updated - removed attention visualization)
584
  class PlotlyVisualizer:
585
  """Enhanced Plotly visualizations"""
586
 
 
665
  @staticmethod
666
  @handle_errors(default_return=None)
667
  def create_keyword_chart(keywords: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> go.Figure:
668
+ """Create basic keyword chart"""
669
  if not keywords:
670
  fig = go.Figure()
671
  fig.add_annotation(text="No keywords extracted",
672
  xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
673
  fig.update_layout(height=400, title="Keywords")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
674
  return fig
675
 
676
  words = [word for word, score in keywords]
 
691
 
692
  fig.update_layout(
693
  title=f"Top Keywords ({sentiment})",
694
+ xaxis_title="Frequency Score",
695
  yaxis_title="Keywords",
696
  height=400,
697
  showlegend=False
 
724
  )
725
 
726
  return fig
 
 
 
 
 
 
 
 
727
 
728
  @staticmethod
729
  @handle_errors(default_return=None)
 
821
  if not data:
822
  return None, "No data to export"
823
 
824
+ temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False,
825
+ suffix=f'.{format_type}', encoding='utf-8')
826
 
827
  if format_type == 'csv':
828
  writer = csv.writer(temp_file)
 
844
  ])
845
  elif format_type == 'json':
846
  json.dump(data, temp_file, indent=2, ensure_ascii=False)
847
+
848
  temp_file.close()
849
  return temp_file.name, f"Exported {len(data)} entries"
850
 
 
851
  @staticmethod
852
  @handle_errors(default_return="")
853
  def process_file(file) -> str:
 
858
  content = file.read().decode('utf-8')
859
 
860
  if file.name.endswith('.csv'):
 
861
  csv_file = io.StringIO(content)
862
  reader = csv.reader(csv_file)
863
  try:
 
874
  texts = []
875
  for line in lines:
876
  if line.strip():
877
+ text = line.strip().strip('"')
878
  if text:
879
  texts.append(text)
880
  return '\n'.join(texts)
 
887
 
888
  def __init__(self):
889
  self.engine = SentimentEngine()
890
+ self.advanced_engine = AdvancedAnalysisEngine() # NEW
891
  self.history = HistoryManager()
892
  self.data_handler = DataHandler()
893
 
 
899
  ["Esta película fue increíble, me encantó la cinematografía."], # Spanish
900
  ["Ce film était magnifique, j'ai adoré la réalisation."], # French
901
  ]
 
902
 
903
  @handle_errors(default_return=("Please enter text", None, None, None))
904
  def analyze_single(self, text: str, language: str, theme: str, clean_text: bool,
905
  remove_punct: bool, remove_nums: bool):
906
+ """Single text analysis with basic visualizations (removed attention analysis)"""
907
  if not text.strip():
908
  return "Please enter text", None, None, None
909
 
 
1059
 
1060
  return summary_text, df, summary_fig, confidence_fig
1061
 
1062
+ # NEW: Advanced analysis methods
1063
+ @handle_errors(default_return=("Please enter text", None))
1064
+ def analyze_with_shap(self, text: str, language: str):
1065
+ """Perform SHAP analysis"""
1066
+ language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
1067
+ language_code = language_map.get(language, 'auto')
1068
+
1069
+ return self.advanced_engine.analyze_with_shap(text, language_code)
1070
+
1071
+ @handle_errors(default_return=("Please enter text", None))
1072
+ def analyze_with_lime(self, text: str, language: str):
1073
+ """Perform LIME analysis"""
1074
+ language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
1075
+ language_code = language_map.get(language, 'auto')
1076
+
1077
+ return self.advanced_engine.analyze_with_lime(text, language_code)
1078
+
1079
  @handle_errors(default_return=(None, "No history available"))
1080
  def plot_history(self, theme: str = 'default'):
1081
  """Plot comprehensive history analysis"""
 
1083
  if len(history) < 2:
1084
  return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
1085
 
 
1086
  theme_ctx = ThemeContext(theme)
 
1087
 
1088
  with memory_cleanup():
1089
  fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx)
 
1120
  - **Languages Detected:** {stats['languages_detected']}
1121
  """
1122
 
1123
+ # Gradio Interface (Updated with Advanced Analysis tab)
1124
  def create_interface():
1125
+ """Create comprehensive Gradio interface with Advanced Analysis tab"""
1126
  app = SentimentApp()
1127
 
1128
  with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
 
1171
  probability_plot = gr.Plot(label="Probability Distribution")
1172
 
1173
  with gr.Row():
1174
+ keyword_plot = gr.Plot(label="Basic Keywords")
1175
 
1176
+ # NEW: Advanced Analysis Tab
1177
+ with gr.Tab("Advanced Analysis"):
1178
+ gr.Markdown("## 🔬 Explainable AI Analysis")
1179
+ gr.Markdown("Use SHAP and LIME to understand which words and phrases most influence the sentiment prediction.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1180
 
 
 
 
 
 
1181
  with gr.Row():
1182
  with gr.Column():
1183
+ advanced_text_input = gr.Textbox(
1184
+ label="Enter Text for Advanced Analysis",
1185
+ placeholder="Enter text to analyze with SHAP and LIME...",
1186
+ lines=6
 
 
 
 
 
1187
  )
1188
 
1189
+ advanced_language = gr.Dropdown(
1190
+ choices=list(config.SUPPORTED_LANGUAGES.values()),
1191
+ value="Auto Detect",
1192
+ label="Language"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1193
  )
1194
 
1195
  with gr.Row():
1196
+ shap_btn = gr.Button("SHAP Analysis", variant="primary")
1197
+ lime_btn = gr.Button("LIME Analysis", variant="secondary")
 
 
 
 
 
 
 
 
 
 
 
 
 
1198
 
1199
+ gr.Markdown("""
1200
+ **Analysis Methods:**
1201
+ - **SHAP**: Shows token-level importance scores
1202
+ - **LIME**: Explains predictions by perturbing input features
1203
+ """)
 
 
1204
 
1205
  with gr.Column():
1206
+ advanced_results = gr.Textbox(label="Analysis Summary", lines=10)
1207
 
1208
  with gr.Row():
1209
+ advanced_plot = gr.Plot(label="Feature Importance Visualization")
 
 
 
 
 
1210
 
1211
  with gr.Tab("Batch Analysis"):
1212
  with gr.Row():
 
1280
  with gr.Row():
1281
  csv_download = gr.File(label="CSV Download", visible=True)
1282
  json_download = gr.File(label="JSON Download", visible=True)
 
 
 
 
 
1283
 
1284
  # Event Handlers
1285
+
1286
+ # Single Analysis
1287
  analyze_btn.click(
1288
  app.analyze_single,
1289
  inputs=[text_input, language_selector, theme_selector,
 
1291
  outputs=[result_output, gauge_plot, probability_plot, keyword_plot]
1292
  )
1293
 
1294
+ # Advanced Analysis (NEW)
1295
+ shap_btn.click(
1296
+ app.analyze_with_shap,
1297
+ inputs=[advanced_text_input, advanced_language],
1298
+ outputs=[advanced_results, advanced_plot]
1299
+ )
1300
+
1301
+ lime_btn.click(
1302
+ app.analyze_with_lime,
1303
+ inputs=[advanced_text_input, advanced_language],
1304
+ outputs=[advanced_results, advanced_plot]
1305
+ )
1306
+
1307
+ # Batch Analysis
1308
  load_file_btn.click(
1309
  app.data_handler.process_file,
1310
  inputs=file_upload,
 
1318
  outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot]
1319
  )
1320
 
1321
+ # History & Analytics
1322
  refresh_history_btn.click(
1323
  app.plot_history,
1324
  inputs=history_theme,