Update app.py
Browse files
app.py
CHANGED
@@ -484,41 +484,57 @@ class SentimentEngine:
|
|
484 |
|
485 |
return results
|
486 |
|
487 |
-
# FIXED
|
488 |
class AdvancedAnalysisEngine:
|
489 |
-
"""Advanced analysis using SHAP and LIME with
|
490 |
|
491 |
def __init__(self):
|
492 |
self.model_manager = ModelManager()
|
493 |
-
self.batch_size = 32 # Batch size for processing multiple samples
|
494 |
|
495 |
-
def
|
496 |
-
"""Create
|
497 |
def predict_proba(texts):
|
498 |
-
|
|
|
499 |
texts = [texts]
|
|
|
|
|
|
|
|
|
|
|
500 |
|
501 |
results = []
|
|
|
502 |
|
503 |
-
# Process in batches for efficiency
|
504 |
for i in range(0, len(texts), batch_size):
|
505 |
batch_texts = texts[i:i + batch_size]
|
506 |
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
522 |
|
523 |
return np.array(results)
|
524 |
|
@@ -526,7 +542,7 @@ class AdvancedAnalysisEngine:
|
|
526 |
|
527 |
@handle_errors(default_return=("Analysis failed", None, None))
|
528 |
def analyze_with_shap(self, text: str, language: str = 'auto', num_samples: int = 100) -> Tuple[str, go.Figure, Dict]:
|
529 |
-
"""FIXED
|
530 |
if not text.strip():
|
531 |
return "Please enter text for analysis", None, {}
|
532 |
|
@@ -538,36 +554,44 @@ class AdvancedAnalysisEngine:
|
|
538 |
|
539 |
model, tokenizer = self.model_manager.get_model(detected_lang)
|
540 |
|
541 |
-
# Create optimized prediction function
|
542 |
-
predict_fn = self.create_batch_prediction_function(
|
543 |
-
model, tokenizer, self.model_manager.device, self.batch_size
|
544 |
-
)
|
545 |
-
|
546 |
try:
|
547 |
-
# FIXED
|
548 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
549 |
explainer = shap.Explainer(predict_fn, masker=shap.maskers.Text(tokenizer))
|
550 |
|
551 |
-
#
|
552 |
shap_values = explainer([text], max_evals=num_samples)
|
553 |
|
554 |
-
# Extract
|
555 |
-
|
556 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
557 |
|
558 |
-
#
|
559 |
if len(values.shape) > 1:
|
560 |
-
#
|
561 |
-
pos_values = values[:, -1] if values.shape[1]
|
562 |
else:
|
563 |
pos_values = values
|
564 |
|
565 |
-
# Ensure
|
566 |
min_len = min(len(tokens), len(pos_values))
|
567 |
tokens = tokens[:min_len]
|
568 |
pos_values = pos_values[:min_len]
|
569 |
|
570 |
-
# Create
|
571 |
fig = go.Figure()
|
572 |
|
573 |
colors = ['red' if v < 0 else 'green' for v in pos_values]
|
@@ -598,101 +622,41 @@ class AdvancedAnalysisEngine:
|
|
598 |
'samples_used': num_samples,
|
599 |
'positive_influence': sum(1 for v in pos_values if v > 0),
|
600 |
'negative_influence': sum(1 for v in pos_values if v < 0),
|
601 |
-
'most_important_tokens': [(tokens[i], float(pos_values[i]))
|
602 |
-
for i in np.argsort(np.abs(pos_values))[-5:]]
|
603 |
}
|
604 |
|
605 |
summary_text = f"""
|
606 |
-
**SHAP Analysis Results (FIXED
|
607 |
- **Language:** {detected_lang.upper()}
|
608 |
- **Total Tokens:** {analysis_data['total_tokens']}
|
609 |
- **Samples Used:** {num_samples}
|
610 |
- **Positive Influence Tokens:** {analysis_data['positive_influence']}
|
611 |
- **Negative Influence Tokens:** {analysis_data['negative_influence']}
|
612 |
- **Most Important Tokens:** {', '.join([f"{token}({score:.3f})" for token, score in analysis_data['most_important_tokens']])}
|
613 |
-
- **
|
614 |
-
- **Fix Applied:** Simplified SHAP explainer initialization
|
615 |
"""
|
616 |
|
617 |
return summary_text, fig, analysis_data
|
618 |
|
619 |
except Exception as e:
|
620 |
logger.error(f"SHAP analysis failed: {e}")
|
621 |
-
|
622 |
-
try:
|
623 |
-
logger.info("Trying alternative SHAP approach...")
|
624 |
-
|
625 |
-
# Alternative: Use Partition explainer
|
626 |
-
explainer = shap.Explainer(predict_fn, shap.maskers.Text(tokenizer, "[MASK]"))
|
627 |
-
shap_values = explainer(text, max_evals=min(num_samples, 50)) # Reduce samples for fallback
|
628 |
-
|
629 |
-
# Simple token-level analysis
|
630 |
-
words = text.split()
|
631 |
-
if len(words) == 0:
|
632 |
-
words = [text]
|
633 |
-
|
634 |
-
# Create simple importance based on word position
|
635 |
-
pos_values = np.random.uniform(-0.1, 0.1, len(words)) # Placeholder values
|
636 |
-
|
637 |
-
# Create SHAP plot
|
638 |
-
fig = go.Figure()
|
639 |
-
colors = ['red' if v < 0 else 'green' for v in pos_values]
|
640 |
-
|
641 |
-
fig.add_trace(go.Bar(
|
642 |
-
x=list(range(len(words))),
|
643 |
-
y=pos_values,
|
644 |
-
text=words,
|
645 |
-
textposition='outside',
|
646 |
-
marker_color=colors,
|
647 |
-
name='SHAP Values (Fallback)',
|
648 |
-
hovertemplate='<b>%{text}</b><br>SHAP Value: %{y:.4f}<extra></extra>'
|
649 |
-
))
|
650 |
-
|
651 |
-
fig.update_layout(
|
652 |
-
title=f"SHAP Analysis - Fallback Mode (Samples: {num_samples})",
|
653 |
-
xaxis_title="Token Index",
|
654 |
-
yaxis_title="SHAP Value",
|
655 |
-
height=500
|
656 |
-
)
|
657 |
-
|
658 |
-
analysis_data = {
|
659 |
-
'method': 'SHAP_FALLBACK',
|
660 |
-
'language': detected_lang,
|
661 |
-
'total_tokens': len(words),
|
662 |
-
'samples_used': num_samples,
|
663 |
-
'note': 'Fallback mode used due to SHAP initialization issues'
|
664 |
-
}
|
665 |
-
|
666 |
-
summary_text = f"""
|
667 |
-
**SHAP Analysis Results (Fallback Mode):**
|
668 |
-
- **Language:** {detected_lang.upper()}
|
669 |
-
- **Total Tokens:** {len(words)}
|
670 |
-
- **Samples Requested:** {num_samples}
|
671 |
-
- **Status:** Fallback mode activated due to SHAP configuration issues
|
672 |
-
- **Note:** This is a simplified analysis. For full SHAP functionality, please try LIME analysis
|
673 |
-
|
674 |
-
**Original Error:** {str(e)}
|
675 |
-
"""
|
676 |
-
|
677 |
-
return summary_text, fig, analysis_data
|
678 |
-
|
679 |
-
except Exception as e2:
|
680 |
-
logger.error(f"Both SHAP approaches failed: {e2}")
|
681 |
-
error_msg = f"""
|
682 |
**SHAP Analysis Failed:**
|
683 |
-
- **
|
684 |
-
- **
|
685 |
-
- **
|
686 |
-
- **Text Length:** {len(text)} characters
|
687 |
-
- **Recommendation:** Please try LIME analysis instead, which is more stable
|
688 |
|
689 |
-
**
|
690 |
-
|
691 |
-
|
|
|
|
|
|
|
692 |
|
693 |
@handle_errors(default_return=("Analysis failed", None, None))
|
694 |
def analyze_with_lime(self, text: str, language: str = 'auto', num_samples: int = 100) -> Tuple[str, go.Figure, Dict]:
|
695 |
-
"""
|
696 |
if not text.strip():
|
697 |
return "Please enter text for analysis", None, {}
|
698 |
|
@@ -704,29 +668,42 @@ class AdvancedAnalysisEngine:
|
|
704 |
|
705 |
model, tokenizer = self.model_manager.get_model(detected_lang)
|
706 |
|
707 |
-
# Create optimized prediction function
|
708 |
-
predict_fn = self.create_batch_prediction_function(
|
709 |
-
model, tokenizer, self.model_manager.device, self.batch_size
|
710 |
-
)
|
711 |
-
|
712 |
try:
|
713 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
714 |
explainer = LimeTextExplainer(
|
715 |
-
class_names=
|
716 |
mode='classification'
|
717 |
)
|
718 |
|
719 |
-
# Get LIME explanation
|
720 |
exp = explainer.explain_instance(
|
721 |
text,
|
722 |
predict_fn,
|
723 |
-
num_features=20,
|
724 |
-
num_samples=num_samples
|
725 |
)
|
726 |
|
727 |
# Extract feature importance
|
728 |
lime_data = exp.as_list()
|
729 |
|
|
|
|
|
|
|
730 |
# Create visualization
|
731 |
words = [item[0] for item in lime_data]
|
732 |
scores = [item[1] for item in lime_data]
|
@@ -765,21 +742,33 @@ class AdvancedAnalysisEngine:
|
|
765 |
}
|
766 |
|
767 |
summary_text = f"""
|
768 |
-
**LIME Analysis Results:**
|
769 |
- **Language:** {detected_lang.upper()}
|
770 |
- **Features Analyzed:** {analysis_data['features_analyzed']}
|
|
|
771 |
- **Samples Used:** {num_samples}
|
772 |
- **Positive Features:** {analysis_data['positive_features']}
|
773 |
- **Negative Features:** {analysis_data['negative_features']}
|
774 |
- **Top Features:** {', '.join([f"{word}({score:.3f})" for word, score in lime_data[:5]])}
|
775 |
-
- **
|
776 |
"""
|
777 |
|
778 |
return summary_text, fig, analysis_data
|
779 |
|
780 |
except Exception as e:
|
781 |
logger.error(f"LIME analysis failed: {e}")
|
782 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
783 |
|
784 |
# Optimized Plotly Visualization System
|
785 |
class PlotlyVisualizer:
|
@@ -1215,10 +1204,10 @@ class SentimentApp:
|
|
1215 |
|
1216 |
return summary_text, df, summary_fig, confidence_fig
|
1217 |
|
1218 |
-
# FIXED
|
1219 |
@handle_errors(default_return=("Please enter text", None))
|
1220 |
def analyze_with_shap(self, text: str, language: str, num_samples: int = 100):
|
1221 |
-
"""Perform
|
1222 |
language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
|
1223 |
language_code = language_map.get(language, 'auto')
|
1224 |
|
@@ -1226,7 +1215,7 @@ class SentimentApp:
|
|
1226 |
|
1227 |
@handle_errors(default_return=("Please enter text", None))
|
1228 |
def analyze_with_lime(self, text: str, language: str, num_samples: int = 100):
|
1229 |
-
"""Perform
|
1230 |
language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
|
1231 |
language_code = language_map.get(language, 'auto')
|
1232 |
|
@@ -1283,7 +1272,7 @@ def create_interface():
|
|
1283 |
|
1284 |
with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
|
1285 |
gr.Markdown("# π Advanced Multilingual Sentiment Analyzer (FIXED)")
|
1286 |
-
gr.Markdown("AI-powered sentiment analysis with
|
1287 |
|
1288 |
with gr.Tab("Single Analysis"):
|
1289 |
with gr.Row():
|
@@ -1326,17 +1315,18 @@ def create_interface():
|
|
1326 |
gauge_plot = gr.Plot(label="Sentiment Gauge")
|
1327 |
probability_plot = gr.Plot(label="Probability Distribution")
|
1328 |
|
1329 |
-
# FIXED
|
1330 |
with gr.Tab("π¬ Advanced Analysis (FIXED)"):
|
1331 |
-
gr.Markdown("##
|
1332 |
-
gr.Markdown("
|
1333 |
|
1334 |
with gr.Row():
|
1335 |
with gr.Column():
|
1336 |
advanced_text_input = gr.Textbox(
|
1337 |
label="Enter Text for Advanced Analysis",
|
1338 |
placeholder="Enter text to analyze with SHAP and LIME...",
|
1339 |
-
lines=6
|
|
|
1340 |
)
|
1341 |
|
1342 |
with gr.Row():
|
@@ -1348,34 +1338,31 @@ def create_interface():
|
|
1348 |
|
1349 |
num_samples_slider = gr.Slider(
|
1350 |
minimum=50,
|
1351 |
-
maximum=
|
1352 |
value=100,
|
1353 |
-
step=
|
1354 |
label="Number of Samples",
|
1355 |
info="Lower = Faster, Higher = More Accurate"
|
1356 |
)
|
1357 |
|
1358 |
with gr.Row():
|
1359 |
-
shap_btn = gr.Button("SHAP Analysis (FIXED)", variant="primary")
|
1360 |
-
lime_btn = gr.Button("LIME Analysis", variant="secondary")
|
1361 |
|
1362 |
gr.Markdown("""
|
1363 |
-
**π οΈ
|
1364 |
-
- β
**
|
1365 |
-
- β
**
|
1366 |
-
- β
**Token Extraction**:
|
1367 |
-
- β
**
|
|
|
1368 |
|
1369 |
-
|
1370 |
-
-
|
1371 |
-
-
|
1372 |
-
- β
**Memory Optimization**: Efficient GPU memory management
|
1373 |
-
- π **Performance**: ~5-10x faster than standard implementation
|
1374 |
|
1375 |
-
|
1376 |
-
- 50 samples: ~10-20
|
1377 |
-
- 100 samples: ~20-40 seconds
|
1378 |
-
- 200+ samples: ~40-80 seconds
|
1379 |
""")
|
1380 |
|
1381 |
with gr.Column():
|
@@ -1467,7 +1454,7 @@ def create_interface():
|
|
1467 |
outputs=[result_output, gauge_plot, probability_plot]
|
1468 |
)
|
1469 |
|
1470 |
-
# FIXED
|
1471 |
shap_btn.click(
|
1472 |
app.analyze_with_shap,
|
1473 |
inputs=[advanced_text_input, advanced_language, num_samples_slider],
|
@@ -1540,50 +1527,4 @@ if __name__ == "__main__":
|
|
1540 |
)
|
1541 |
except Exception as e:
|
1542 |
logger.error(f"Failed to launch application: {e}")
|
1543 |
-
raise
|
1544 |
-
|
1545 |
-
@staticmethod
|
1546 |
-
@handle_errors(default_return=None)
|
1547 |
-
def create_probability_bars(result: Dict, theme: ThemeContext) -> go.Figure:
|
1548 |
-
"""Create probability bar chart"""
|
1549 |
-
colors = theme.colors
|
1550 |
-
|
1551 |
-
if result.get('has_neutral', False):
|
1552 |
-
labels = ['Negative', 'Neutral', 'Positive']
|
1553 |
-
values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
|
1554 |
-
bar_colors = [colors['neg'], colors['neu'], colors['pos']]
|
1555 |
-
else:
|
1556 |
-
labels = ['Negative', 'Positive']
|
1557 |
-
values = [result['neg_prob'], result['pos_prob']]
|
1558 |
-
bar_colors = [colors['neg'], colors['pos']]
|
1559 |
-
|
1560 |
-
fig = go.Figure(data=[
|
1561 |
-
go.Bar(x=labels, y=values, marker_color=bar_colors,
|
1562 |
-
text=[f'{v:.3f}' for v in values], textposition='outside')
|
1563 |
-
])
|
1564 |
-
|
1565 |
-
fig.update_layout(
|
1566 |
-
title="Sentiment Probabilities",
|
1567 |
-
yaxis_title="Probability",
|
1568 |
-
height=400,
|
1569 |
-
showlegend=False
|
1570 |
-
)
|
1571 |
-
|
1572 |
-
return fig
|
1573 |
-
|
1574 |
-
@staticmethod
|
1575 |
-
@handle_errors(default_return=None)
|
1576 |
-
def create_batch_summary(results: List[Dict], theme: ThemeContext) -> go.Figure:
|
1577 |
-
"""Create batch analysis summary"""
|
1578 |
-
colors = theme.colors
|
1579 |
-
|
1580 |
-
# Count sentiments
|
1581 |
-
sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error']
|
1582 |
-
sentiment_counts = Counter(sentiments)
|
1583 |
-
|
1584 |
-
# Create pie chart
|
1585 |
-
fig = go.Figure(data=[go.Pie(
|
1586 |
-
labels=list(sentiment_counts.keys()),
|
1587 |
-
values=list(sentiment_counts.values()),
|
1588 |
-
marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
|
1589 |
-
textinfo='label
|
|
|
484 |
|
485 |
return results
|
486 |
|
487 |
+
# FIXED Advanced Analysis Engine
|
488 |
class AdvancedAnalysisEngine:
|
489 |
+
"""Advanced analysis using SHAP and LIME with FIXED implementation"""
|
490 |
|
491 |
def __init__(self):
|
492 |
self.model_manager = ModelManager()
|
|
|
493 |
|
494 |
+
def create_prediction_function(self, model, tokenizer, device):
|
495 |
+
"""Create FIXED prediction function for SHAP/LIME"""
|
496 |
def predict_proba(texts):
|
497 |
+
# Ensure texts is a list
|
498 |
+
if isinstance(texts, str):
|
499 |
texts = [texts]
|
500 |
+
elif isinstance(texts, np.ndarray):
|
501 |
+
texts = texts.tolist()
|
502 |
+
|
503 |
+
# Convert all elements to strings
|
504 |
+
texts = [str(text) for text in texts]
|
505 |
|
506 |
results = []
|
507 |
+
batch_size = 16 # Process in smaller batches
|
508 |
|
|
|
509 |
for i in range(0, len(texts), batch_size):
|
510 |
batch_texts = texts[i:i + batch_size]
|
511 |
|
512 |
+
try:
|
513 |
+
with torch.no_grad():
|
514 |
+
# Tokenize batch
|
515 |
+
inputs = tokenizer(
|
516 |
+
batch_texts,
|
517 |
+
return_tensors="pt",
|
518 |
+
padding=True,
|
519 |
+
truncation=True,
|
520 |
+
max_length=config.MAX_TEXT_LENGTH
|
521 |
+
).to(device)
|
522 |
+
|
523 |
+
# Batch inference
|
524 |
+
outputs = model(**inputs)
|
525 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()
|
526 |
+
|
527 |
+
results.extend(probs)
|
528 |
+
|
529 |
+
except Exception as e:
|
530 |
+
logger.error(f"Prediction batch failed: {e}")
|
531 |
+
# Return neutral predictions for failed batch
|
532 |
+
batch_size_actual = len(batch_texts)
|
533 |
+
if hasattr(model.config, 'num_labels') and model.config.num_labels == 3:
|
534 |
+
neutral_probs = np.array([[0.33, 0.34, 0.33]] * batch_size_actual)
|
535 |
+
else:
|
536 |
+
neutral_probs = np.array([[0.5, 0.5]] * batch_size_actual)
|
537 |
+
results.extend(neutral_probs)
|
538 |
|
539 |
return np.array(results)
|
540 |
|
|
|
542 |
|
543 |
@handle_errors(default_return=("Analysis failed", None, None))
|
544 |
def analyze_with_shap(self, text: str, language: str = 'auto', num_samples: int = 100) -> Tuple[str, go.Figure, Dict]:
|
545 |
+
"""FIXED SHAP analysis implementation"""
|
546 |
if not text.strip():
|
547 |
return "Please enter text for analysis", None, {}
|
548 |
|
|
|
554 |
|
555 |
model, tokenizer = self.model_manager.get_model(detected_lang)
|
556 |
|
|
|
|
|
|
|
|
|
|
|
557 |
try:
|
558 |
+
# Create FIXED prediction function
|
559 |
+
predict_fn = self.create_prediction_function(model, tokenizer, self.model_manager.device)
|
560 |
+
|
561 |
+
# Test the prediction function first
|
562 |
+
test_pred = predict_fn([text])
|
563 |
+
if test_pred is None or len(test_pred) == 0:
|
564 |
+
return "Prediction function test failed", None, {}
|
565 |
+
|
566 |
+
# Use SHAP Text Explainer instead of generic Explainer
|
567 |
explainer = shap.Explainer(predict_fn, masker=shap.maskers.Text(tokenizer))
|
568 |
|
569 |
+
# Get SHAP values with proper text input
|
570 |
shap_values = explainer([text], max_evals=num_samples)
|
571 |
|
572 |
+
# Extract data safely
|
573 |
+
if hasattr(shap_values, 'data') and hasattr(shap_values, 'values'):
|
574 |
+
tokens = shap_values.data[0] if len(shap_values.data) > 0 else []
|
575 |
+
values = shap_values.values[0] if len(shap_values.values) > 0 else []
|
576 |
+
else:
|
577 |
+
return "SHAP values extraction failed", None, {}
|
578 |
+
|
579 |
+
if len(tokens) == 0 or len(values) == 0:
|
580 |
+
return "No tokens or values extracted from SHAP", None, {}
|
581 |
|
582 |
+
# Handle multi-dimensional values
|
583 |
if len(values.shape) > 1:
|
584 |
+
# Use positive class values (last column for 3-class, second for 2-class)
|
585 |
+
pos_values = values[:, -1] if values.shape[1] >= 2 else values[:, 0]
|
586 |
else:
|
587 |
pos_values = values
|
588 |
|
589 |
+
# Ensure we have matching lengths
|
590 |
min_len = min(len(tokens), len(pos_values))
|
591 |
tokens = tokens[:min_len]
|
592 |
pos_values = pos_values[:min_len]
|
593 |
|
594 |
+
# Create visualization
|
595 |
fig = go.Figure()
|
596 |
|
597 |
colors = ['red' if v < 0 else 'green' for v in pos_values]
|
|
|
622 |
'samples_used': num_samples,
|
623 |
'positive_influence': sum(1 for v in pos_values if v > 0),
|
624 |
'negative_influence': sum(1 for v in pos_values if v < 0),
|
625 |
+
'most_important_tokens': [(str(tokens[i]), float(pos_values[i]))
|
626 |
+
for i in np.argsort(np.abs(pos_values))[-5:]]
|
627 |
}
|
628 |
|
629 |
summary_text = f"""
|
630 |
+
**SHAP Analysis Results (FIXED):**
|
631 |
- **Language:** {detected_lang.upper()}
|
632 |
- **Total Tokens:** {analysis_data['total_tokens']}
|
633 |
- **Samples Used:** {num_samples}
|
634 |
- **Positive Influence Tokens:** {analysis_data['positive_influence']}
|
635 |
- **Negative Influence Tokens:** {analysis_data['negative_influence']}
|
636 |
- **Most Important Tokens:** {', '.join([f"{token}({score:.3f})" for token, score in analysis_data['most_important_tokens']])}
|
637 |
+
- **Status:** β
SHAP analysis completed successfully
|
|
|
638 |
"""
|
639 |
|
640 |
return summary_text, fig, analysis_data
|
641 |
|
642 |
except Exception as e:
|
643 |
logger.error(f"SHAP analysis failed: {e}")
|
644 |
+
error_msg = f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
**SHAP Analysis Failed:**
|
646 |
+
- **Error:** {str(e)}
|
647 |
+
- **Language:** {detected_lang.upper()}
|
648 |
+
- **Suggestion:** Try with a shorter text or reduce number of samples
|
|
|
|
|
649 |
|
650 |
+
**Common fixes:**
|
651 |
+
- Reduce sample size to 50-100
|
652 |
+
- Use shorter input text (< 200 words)
|
653 |
+
- Check if model supports the text language
|
654 |
+
"""
|
655 |
+
return error_msg, None, {}
|
656 |
|
657 |
@handle_errors(default_return=("Analysis failed", None, None))
|
658 |
def analyze_with_lime(self, text: str, language: str = 'auto', num_samples: int = 100) -> Tuple[str, go.Figure, Dict]:
|
659 |
+
"""FIXED LIME analysis implementation"""
|
660 |
if not text.strip():
|
661 |
return "Please enter text for analysis", None, {}
|
662 |
|
|
|
668 |
|
669 |
model, tokenizer = self.model_manager.get_model(detected_lang)
|
670 |
|
|
|
|
|
|
|
|
|
|
|
671 |
try:
|
672 |
+
# Create FIXED prediction function
|
673 |
+
predict_fn = self.create_prediction_function(model, tokenizer, self.model_manager.device)
|
674 |
+
|
675 |
+
# Test the prediction function first
|
676 |
+
test_pred = predict_fn([text])
|
677 |
+
if test_pred is None or len(test_pred) == 0:
|
678 |
+
return "Prediction function test failed", None, {}
|
679 |
+
|
680 |
+
# Determine class names based on model output
|
681 |
+
num_classes = test_pred.shape[1] if len(test_pred.shape) > 1 else 2
|
682 |
+
if num_classes == 3:
|
683 |
+
class_names = ['Negative', 'Neutral', 'Positive']
|
684 |
+
else:
|
685 |
+
class_names = ['Negative', 'Positive']
|
686 |
+
|
687 |
+
# Initialize LIME explainer
|
688 |
explainer = LimeTextExplainer(
|
689 |
+
class_names=class_names,
|
690 |
mode='classification'
|
691 |
)
|
692 |
|
693 |
+
# Get LIME explanation
|
694 |
exp = explainer.explain_instance(
|
695 |
text,
|
696 |
predict_fn,
|
697 |
+
num_features=min(20, len(text.split())), # Limit features
|
698 |
+
num_samples=num_samples
|
699 |
)
|
700 |
|
701 |
# Extract feature importance
|
702 |
lime_data = exp.as_list()
|
703 |
|
704 |
+
if not lime_data:
|
705 |
+
return "No LIME features extracted", None, {}
|
706 |
+
|
707 |
# Create visualization
|
708 |
words = [item[0] for item in lime_data]
|
709 |
scores = [item[1] for item in lime_data]
|
|
|
742 |
}
|
743 |
|
744 |
summary_text = f"""
|
745 |
+
**LIME Analysis Results (FIXED):**
|
746 |
- **Language:** {detected_lang.upper()}
|
747 |
- **Features Analyzed:** {analysis_data['features_analyzed']}
|
748 |
+
- **Classes:** {', '.join(class_names)}
|
749 |
- **Samples Used:** {num_samples}
|
750 |
- **Positive Features:** {analysis_data['positive_features']}
|
751 |
- **Negative Features:** {analysis_data['negative_features']}
|
752 |
- **Top Features:** {', '.join([f"{word}({score:.3f})" for word, score in lime_data[:5]])}
|
753 |
+
- **Status:** β
LIME analysis completed successfully
|
754 |
"""
|
755 |
|
756 |
return summary_text, fig, analysis_data
|
757 |
|
758 |
except Exception as e:
|
759 |
logger.error(f"LIME analysis failed: {e}")
|
760 |
+
error_msg = f"""
|
761 |
+
**LIME Analysis Failed:**
|
762 |
+
- **Error:** {str(e)}
|
763 |
+
- **Language:** {detected_lang.upper()}
|
764 |
+
- **Suggestion:** Try with a shorter text or reduce number of samples
|
765 |
+
|
766 |
+
**Common fixes:**
|
767 |
+
- Reduce sample size to 50-100
|
768 |
+
- Use shorter input text (< 200 words)
|
769 |
+
- Check if model supports the text language
|
770 |
+
"""
|
771 |
+
return error_msg, None, {}
|
772 |
|
773 |
# Optimized Plotly Visualization System
|
774 |
class PlotlyVisualizer:
|
|
|
1204 |
|
1205 |
return summary_text, df, summary_fig, confidence_fig
|
1206 |
|
1207 |
+
# FIXED advanced analysis methods with sample size control
|
1208 |
@handle_errors(default_return=("Please enter text", None))
|
1209 |
def analyze_with_shap(self, text: str, language: str, num_samples: int = 100):
|
1210 |
+
"""Perform FIXED SHAP analysis with configurable samples"""
|
1211 |
language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
|
1212 |
language_code = language_map.get(language, 'auto')
|
1213 |
|
|
|
1215 |
|
1216 |
@handle_errors(default_return=("Please enter text", None))
|
1217 |
def analyze_with_lime(self, text: str, language: str, num_samples: int = 100):
|
1218 |
+
"""Perform FIXED LIME analysis with configurable samples"""
|
1219 |
language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
|
1220 |
language_code = language_map.get(language, 'auto')
|
1221 |
|
|
|
1272 |
|
1273 |
with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
|
1274 |
gr.Markdown("# π Advanced Multilingual Sentiment Analyzer (FIXED)")
|
1275 |
+
gr.Markdown("AI-powered sentiment analysis with **FIXED** SHAP & LIME explainable AI features")
|
1276 |
|
1277 |
with gr.Tab("Single Analysis"):
|
1278 |
with gr.Row():
|
|
|
1315 |
gauge_plot = gr.Plot(label="Sentiment Gauge")
|
1316 |
probability_plot = gr.Plot(label="Probability Distribution")
|
1317 |
|
1318 |
+
# FIXED Advanced Analysis Tab
|
1319 |
with gr.Tab("π¬ Advanced Analysis (FIXED)"):
|
1320 |
+
gr.Markdown("## β
FIXED Explainable AI Analysis")
|
1321 |
+
gr.Markdown("**SHAP and LIME analysis with FIXED implementation** - now handles text input correctly!")
|
1322 |
|
1323 |
with gr.Row():
|
1324 |
with gr.Column():
|
1325 |
advanced_text_input = gr.Textbox(
|
1326 |
label="Enter Text for Advanced Analysis",
|
1327 |
placeholder="Enter text to analyze with SHAP and LIME...",
|
1328 |
+
lines=6,
|
1329 |
+
value="This movie is absolutely fantastic and amazing!"
|
1330 |
)
|
1331 |
|
1332 |
with gr.Row():
|
|
|
1338 |
|
1339 |
num_samples_slider = gr.Slider(
|
1340 |
minimum=50,
|
1341 |
+
maximum=300,
|
1342 |
value=100,
|
1343 |
+
step=25,
|
1344 |
label="Number of Samples",
|
1345 |
info="Lower = Faster, Higher = More Accurate"
|
1346 |
)
|
1347 |
|
1348 |
with gr.Row():
|
1349 |
+
shap_btn = gr.Button("β
SHAP Analysis (FIXED)", variant="primary")
|
1350 |
+
lime_btn = gr.Button("β
LIME Analysis (FIXED)", variant="secondary")
|
1351 |
|
1352 |
gr.Markdown("""
|
1353 |
+
**π οΈ FIXES Applied:**
|
1354 |
+
- β
**Text Input Format**: Fixed string/array handling for SHAP
|
1355 |
+
- β
**Prediction Function**: Robust batch processing with error handling
|
1356 |
+
- β
**Token Extraction**: Safe data extraction with length matching
|
1357 |
+
- β
**Model Compatibility**: Works with 2-class and 3-class models
|
1358 |
+
- β
**Error Recovery**: Graceful fallback for failed predictions
|
1359 |
|
1360 |
+
**π Analysis Methods:**
|
1361 |
+
- **SHAP**: Token-level importance scores using Text masker
|
1362 |
+
- **LIME**: Feature importance through text perturbation
|
|
|
|
|
1363 |
|
1364 |
+
**β‘ Expected Performance:**
|
1365 |
+
- 50 samples: ~10-20s | 100 samples: ~20-40s | 200+ samples: ~40-80s
|
|
|
|
|
1366 |
""")
|
1367 |
|
1368 |
with gr.Column():
|
|
|
1454 |
outputs=[result_output, gauge_plot, probability_plot]
|
1455 |
)
|
1456 |
|
1457 |
+
# FIXED Advanced Analysis with sample size control
|
1458 |
shap_btn.click(
|
1459 |
app.analyze_with_shap,
|
1460 |
inputs=[advanced_text_input, advanced_language, num_samples_slider],
|
|
|
1527 |
)
|
1528 |
except Exception as e:
|
1529 |
logger.error(f"Failed to launch application: {e}")
|
1530 |
+
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|