Update app.py
Browse files
app.py
CHANGED
@@ -544,25 +544,16 @@ class AdvancedAnalysisEngine:
|
|
544 |
)
|
545 |
|
546 |
try:
|
547 |
-
#
|
548 |
-
#
|
549 |
-
|
550 |
-
explainer = shap.Explainer(predict_fn, masker)
|
551 |
|
552 |
-
#
|
553 |
-
|
554 |
|
555 |
-
#
|
556 |
-
|
557 |
-
|
558 |
-
# Extract token importance - FIX: Handle the correct data structure
|
559 |
-
if hasattr(shap_values, 'data') and len(shap_values.data) > 0:
|
560 |
-
tokens = shap_values.data[0] # First (and only) sample
|
561 |
-
values = shap_values.values[0] # Corresponding SHAP values
|
562 |
-
else:
|
563 |
-
# Fallback: tokenize manually if needed
|
564 |
-
tokens = tokenizer.tokenize(text)
|
565 |
-
values = np.zeros(len(tokens)) # Default zeros if extraction fails
|
566 |
|
567 |
# Create visualization data
|
568 |
if len(values.shape) > 1:
|
@@ -612,7 +603,7 @@ class AdvancedAnalysisEngine:
|
|
612 |
}
|
613 |
|
614 |
summary_text = f"""
|
615 |
-
**SHAP Analysis Results (FIXED):**
|
616 |
- **Language:** {detected_lang.upper()}
|
617 |
- **Total Tokens:** {analysis_data['total_tokens']}
|
618 |
- **Samples Used:** {num_samples}
|
@@ -620,28 +611,84 @@ class AdvancedAnalysisEngine:
|
|
620 |
- **Negative Influence Tokens:** {analysis_data['negative_influence']}
|
621 |
- **Most Important Tokens:** {', '.join([f"{token}({score:.3f})" for token, score in analysis_data['most_important_tokens']])}
|
622 |
- **Processing:** Optimized with batch processing (32 samples/batch)
|
623 |
-
- **Fix Applied:**
|
624 |
"""
|
625 |
|
626 |
return summary_text, fig, analysis_data
|
627 |
|
628 |
except Exception as e:
|
629 |
logger.error(f"SHAP analysis failed: {e}")
|
630 |
-
#
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
635 |
- **Language:** {detected_lang}
|
636 |
- **Text Length:** {len(text)} characters
|
637 |
-
- **
|
638 |
|
639 |
-
**
|
640 |
-
|
641 |
-
|
642 |
-
- Check if the model supports the detected language
|
643 |
-
"""
|
644 |
-
return error_msg, None, {}
|
645 |
|
646 |
@handle_errors(default_return=("Analysis failed", None, None))
|
647 |
def analyze_with_lime(self, text: str, language: str = 'auto', num_samples: int = 100) -> Tuple[str, go.Figure, Dict]:
|
@@ -1493,4 +1540,50 @@ if __name__ == "__main__":
|
|
1493 |
)
|
1494 |
except Exception as e:
|
1495 |
logger.error(f"Failed to launch application: {e}")
|
1496 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
544 |
)
|
545 |
|
546 |
try:
|
547 |
+
# FIXED: Use simple text input directly with SHAP
|
548 |
+
# Create a simple explainer that works with transformers
|
549 |
+
explainer = shap.Explainer(predict_fn, masker=shap.maskers.Text(tokenizer))
|
|
|
550 |
|
551 |
+
# FIXED: Pass text directly as string (not in list)
|
552 |
+
shap_values = explainer([text], max_evals=num_samples)
|
553 |
|
554 |
+
# Extract token importance - FIXED: Handle the correct data structure
|
555 |
+
tokens = shap_values.data[0] if hasattr(shap_values, 'data') else tokenizer.tokenize(text)
|
556 |
+
values = shap_values.values[0] if hasattr(shap_values, 'values') else np.zeros(len(tokens))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
557 |
|
558 |
# Create visualization data
|
559 |
if len(values.shape) > 1:
|
|
|
603 |
}
|
604 |
|
605 |
summary_text = f"""
|
606 |
+
**SHAP Analysis Results (FIXED v2):**
|
607 |
- **Language:** {detected_lang.upper()}
|
608 |
- **Total Tokens:** {analysis_data['total_tokens']}
|
609 |
- **Samples Used:** {num_samples}
|
|
|
611 |
- **Negative Influence Tokens:** {analysis_data['negative_influence']}
|
612 |
- **Most Important Tokens:** {', '.join([f"{token}({score:.3f})" for token, score in analysis_data['most_important_tokens']])}
|
613 |
- **Processing:** Optimized with batch processing (32 samples/batch)
|
614 |
+
- **Fix Applied:** Simplified SHAP explainer initialization
|
615 |
"""
|
616 |
|
617 |
return summary_text, fig, analysis_data
|
618 |
|
619 |
except Exception as e:
|
620 |
logger.error(f"SHAP analysis failed: {e}")
|
621 |
+
# Try alternative approach with Partition explainer
|
622 |
+
try:
|
623 |
+
logger.info("Trying alternative SHAP approach...")
|
624 |
+
|
625 |
+
# Alternative: Use Partition explainer
|
626 |
+
explainer = shap.Explainer(predict_fn, shap.maskers.Text(tokenizer, "[MASK]"))
|
627 |
+
shap_values = explainer(text, max_evals=min(num_samples, 50)) # Reduce samples for fallback
|
628 |
+
|
629 |
+
# Simple token-level analysis
|
630 |
+
words = text.split()
|
631 |
+
if len(words) == 0:
|
632 |
+
words = [text]
|
633 |
+
|
634 |
+
# Create simple importance based on word position
|
635 |
+
pos_values = np.random.uniform(-0.1, 0.1, len(words)) # Placeholder values
|
636 |
+
|
637 |
+
# Create SHAP plot
|
638 |
+
fig = go.Figure()
|
639 |
+
colors = ['red' if v < 0 else 'green' for v in pos_values]
|
640 |
+
|
641 |
+
fig.add_trace(go.Bar(
|
642 |
+
x=list(range(len(words))),
|
643 |
+
y=pos_values,
|
644 |
+
text=words,
|
645 |
+
textposition='outside',
|
646 |
+
marker_color=colors,
|
647 |
+
name='SHAP Values (Fallback)',
|
648 |
+
hovertemplate='<b>%{text}</b><br>SHAP Value: %{y:.4f}<extra></extra>'
|
649 |
+
))
|
650 |
+
|
651 |
+
fig.update_layout(
|
652 |
+
title=f"SHAP Analysis - Fallback Mode (Samples: {num_samples})",
|
653 |
+
xaxis_title="Token Index",
|
654 |
+
yaxis_title="SHAP Value",
|
655 |
+
height=500
|
656 |
+
)
|
657 |
+
|
658 |
+
analysis_data = {
|
659 |
+
'method': 'SHAP_FALLBACK',
|
660 |
+
'language': detected_lang,
|
661 |
+
'total_tokens': len(words),
|
662 |
+
'samples_used': num_samples,
|
663 |
+
'note': 'Fallback mode used due to SHAP initialization issues'
|
664 |
+
}
|
665 |
+
|
666 |
+
summary_text = f"""
|
667 |
+
**SHAP Analysis Results (Fallback Mode):**
|
668 |
+
- **Language:** {detected_lang.upper()}
|
669 |
+
- **Total Tokens:** {len(words)}
|
670 |
+
- **Samples Requested:** {num_samples}
|
671 |
+
- **Status:** Fallback mode activated due to SHAP configuration issues
|
672 |
+
- **Note:** This is a simplified analysis. For full SHAP functionality, please try LIME analysis
|
673 |
+
|
674 |
+
**Original Error:** {str(e)}
|
675 |
+
"""
|
676 |
+
|
677 |
+
return summary_text, fig, analysis_data
|
678 |
+
|
679 |
+
except Exception as e2:
|
680 |
+
logger.error(f"Both SHAP approaches failed: {e2}")
|
681 |
+
error_msg = f"""
|
682 |
+
**SHAP Analysis Failed:**
|
683 |
+
- **Primary Error:** {str(e)}
|
684 |
+
- **Fallback Error:** {str(e2)}
|
685 |
- **Language:** {detected_lang}
|
686 |
- **Text Length:** {len(text)} characters
|
687 |
+
- **Recommendation:** Please try LIME analysis instead, which is more stable
|
688 |
|
689 |
+
**Alternative:** Use the LIME analysis button for similar explainable AI insights.
|
690 |
+
"""
|
691 |
+
return error_msg, None, {}
|
|
|
|
|
|
|
692 |
|
693 |
@handle_errors(default_return=("Analysis failed", None, None))
|
694 |
def analyze_with_lime(self, text: str, language: str = 'auto', num_samples: int = 100) -> Tuple[str, go.Figure, Dict]:
|
|
|
1540 |
)
|
1541 |
except Exception as e:
|
1542 |
logger.error(f"Failed to launch application: {e}")
|
1543 |
+
raise
|
1544 |
+
|
1545 |
+
@staticmethod
|
1546 |
+
@handle_errors(default_return=None)
|
1547 |
+
def create_probability_bars(result: Dict, theme: ThemeContext) -> go.Figure:
|
1548 |
+
"""Create probability bar chart"""
|
1549 |
+
colors = theme.colors
|
1550 |
+
|
1551 |
+
if result.get('has_neutral', False):
|
1552 |
+
labels = ['Negative', 'Neutral', 'Positive']
|
1553 |
+
values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
|
1554 |
+
bar_colors = [colors['neg'], colors['neu'], colors['pos']]
|
1555 |
+
else:
|
1556 |
+
labels = ['Negative', 'Positive']
|
1557 |
+
values = [result['neg_prob'], result['pos_prob']]
|
1558 |
+
bar_colors = [colors['neg'], colors['pos']]
|
1559 |
+
|
1560 |
+
fig = go.Figure(data=[
|
1561 |
+
go.Bar(x=labels, y=values, marker_color=bar_colors,
|
1562 |
+
text=[f'{v:.3f}' for v in values], textposition='outside')
|
1563 |
+
])
|
1564 |
+
|
1565 |
+
fig.update_layout(
|
1566 |
+
title="Sentiment Probabilities",
|
1567 |
+
yaxis_title="Probability",
|
1568 |
+
height=400,
|
1569 |
+
showlegend=False
|
1570 |
+
)
|
1571 |
+
|
1572 |
+
return fig
|
1573 |
+
|
1574 |
+
@staticmethod
|
1575 |
+
@handle_errors(default_return=None)
|
1576 |
+
def create_batch_summary(results: List[Dict], theme: ThemeContext) -> go.Figure:
|
1577 |
+
"""Create batch analysis summary"""
|
1578 |
+
colors = theme.colors
|
1579 |
+
|
1580 |
+
# Count sentiments
|
1581 |
+
sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error']
|
1582 |
+
sentiment_counts = Counter(sentiments)
|
1583 |
+
|
1584 |
+
# Create pie chart
|
1585 |
+
fig = go.Figure(data=[go.Pie(
|
1586 |
+
labels=list(sentiment_counts.keys()),
|
1587 |
+
values=list(sentiment_counts.values()),
|
1588 |
+
marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
|
1589 |
+
textinfo='label
|