Update app.py
Browse files
app.py
CHANGED
@@ -1,727 +1,1817 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
-
import
|
|
|
|
|
|
|
3 |
import numpy as np
|
4 |
-
import
|
|
|
5 |
import re
|
|
|
|
|
6 |
import io
|
|
|
7 |
from datetime import datetime
|
8 |
-
|
9 |
-
from
|
10 |
-
|
11 |
-
from
|
12 |
-
import
|
13 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
import time
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def __init__(self):
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
)
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
# Map labels to sentiment
|
62 |
-
if 'pos' in label or label == 'positive':
|
63 |
-
sentiment = 'positive'
|
64 |
-
elif 'neg' in label or label == 'negative':
|
65 |
-
sentiment = 'negative'
|
66 |
-
else:
|
67 |
-
continue
|
68 |
-
|
69 |
-
# Count aspects
|
70 |
-
if word not in all_aspects[sentiment]:
|
71 |
-
all_aspects[sentiment][word] = 0
|
72 |
-
all_aspects[sentiment][word] += 1
|
73 |
-
|
74 |
-
detailed_aspects.append({
|
75 |
-
'review': review[:50] + '...',
|
76 |
-
'aspect': word,
|
77 |
-
'sentiment': sentiment,
|
78 |
-
'confidence': round(confidence, 3)
|
79 |
-
})
|
80 |
-
except:
|
81 |
-
continue
|
82 |
|
83 |
-
#
|
84 |
-
|
85 |
-
|
|
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
}
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
clean_review = self.preprocess_text(review)
|
107 |
-
result = sentiment_analyzer(clean_review)[0]
|
108 |
-
|
109 |
-
label = result['label'].lower()
|
110 |
-
score = result['score']
|
111 |
-
|
112 |
-
if 'pos' in label:
|
113 |
-
sentiment = 'positive'
|
114 |
-
elif 'neg' in label:
|
115 |
-
sentiment = 'negative'
|
116 |
-
else:
|
117 |
-
sentiment = 'neutral'
|
118 |
-
|
119 |
-
sentiments[sentiment] += 1
|
120 |
-
results.append({
|
121 |
-
'text': review[:100] + '...' if len(review) > 100 else review,
|
122 |
-
'sentiment': sentiment,
|
123 |
-
'confidence': round(score, 3)
|
124 |
-
})
|
125 |
|
126 |
-
|
127 |
-
sentiment_percentages = {k: round(v/total*100, 1) for k, v in sentiments.items()}
|
128 |
|
129 |
-
|
130 |
-
|
131 |
|
132 |
-
|
133 |
-
'
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
-
def
|
140 |
-
|
141 |
-
|
|
|
|
|
|
|
|
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
if
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
-
for
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
score = 0
|
153 |
-
flags = []
|
154 |
-
|
155 |
-
# Text-based checks
|
156 |
-
if len(review) < 20:
|
157 |
-
score += 0.3
|
158 |
-
flags.append("too_short")
|
159 |
-
|
160 |
-
words = review.lower().split()
|
161 |
-
unique_ratio = len(set(words)) / len(words) if words else 0
|
162 |
-
if unique_ratio < 0.5:
|
163 |
-
score += 0.4
|
164 |
-
flags.append("repetitive")
|
165 |
-
|
166 |
-
punct_ratio = len(re.findall(r'[!?.]', review)) / len(review) if review else 0
|
167 |
-
if punct_ratio > 0.1:
|
168 |
-
score += 0.2
|
169 |
-
flags.append("excessive_punctuation")
|
170 |
-
|
171 |
-
generic_phrases = ['amazing', 'perfect', 'best ever', 'highly recommend']
|
172 |
-
if any(phrase in review.lower() for phrase in generic_phrases):
|
173 |
-
score += 0.1
|
174 |
-
flags.append("generic_language")
|
175 |
-
|
176 |
-
# Add metadata flags if available
|
177 |
-
if i < len(metadata_flags):
|
178 |
-
if metadata_flags[i]:
|
179 |
-
score += 0.3
|
180 |
-
flags.extend(metadata_flags[i])
|
181 |
-
|
182 |
-
fake_scores.append({
|
183 |
-
'text': review[:100] + '...' if len(review) > 100 else review,
|
184 |
-
'fake_probability': min(round(score, 3), 1.0),
|
185 |
-
'status': 'suspicious' if score > 0.5 else 'authentic',
|
186 |
-
'flags': flags
|
187 |
-
})
|
188 |
-
|
189 |
-
suspicious_count = sum(1 for item in fake_scores if item['fake_probability'] > 0.5)
|
190 |
|
191 |
return {
|
192 |
-
'
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
'
|
198 |
-
'
|
|
|
|
|
199 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
|
|
204 |
|
205 |
-
#
|
206 |
-
if
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
dt = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
|
211 |
-
times.append((i, dt))
|
212 |
-
except:
|
213 |
-
continue
|
214 |
-
|
215 |
-
times.sort(key=lambda x: x[1])
|
216 |
-
|
217 |
-
# Check for clusters
|
218 |
-
for i in range(len(times) - 5):
|
219 |
-
if (times[i + 5][1] - times[i][1]).total_seconds() < 300: # 5 mins
|
220 |
-
for j in range(i, i + 6):
|
221 |
-
flags_per_review[times[j][0]].append("time_cluster")
|
222 |
-
|
223 |
-
# Username pattern analysis
|
224 |
-
for i, username in enumerate(usernames):
|
225 |
-
if re.match(r"user_\d{4,}", username):
|
226 |
-
flags_per_review[i].append("suspicious_username")
|
227 |
-
if len(username) < 4:
|
228 |
-
flags_per_review[i].append("short_username")
|
229 |
-
|
230 |
-
return flags_per_review
|
231 |
-
|
232 |
-
def assess_quality(self, reviews: List[str], custom_weights: Dict = None) -> Tuple[Dict, go.Figure]:
|
233 |
-
"""Assess review quality with customizable weights and radar chart"""
|
234 |
-
default_weights = {
|
235 |
-
'length': 0.25,
|
236 |
-
'detail': 0.25,
|
237 |
-
'structure': 0.25,
|
238 |
-
'helpfulness': 0.25
|
239 |
-
}
|
240 |
|
241 |
-
|
242 |
-
|
243 |
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
# Detail factor
|
255 |
-
detail_words = ['because', 'however', 'although', 'specifically', 'particularly']
|
256 |
-
detail_score = min(sum(1 for word in detail_words if word in review.lower()) / 3, 1.0)
|
257 |
-
factors['detail'] = round(detail_score, 2)
|
258 |
-
|
259 |
-
# Structure factor
|
260 |
-
sentences = len(re.split(r'[.!?]', review))
|
261 |
-
structure_score = min(sentences / 5, 1.0)
|
262 |
-
factors['structure'] = round(structure_score, 2)
|
263 |
-
|
264 |
-
# Helpfulness factor
|
265 |
-
helpful_words = ['pros', 'cons', 'recommend', 'suggest', 'tip', 'advice']
|
266 |
-
helpful_score = min(sum(1 for word in helpful_words if word in review.lower()) / 2, 1.0)
|
267 |
-
factors['helpfulness'] = round(helpful_score, 2)
|
268 |
-
|
269 |
-
# Calculate weighted score
|
270 |
-
total_score = sum(factors[k] * weights[k] for k in factors.keys())
|
271 |
-
|
272 |
-
quality_scores.append({
|
273 |
-
'text': review[:100] + '...' if len(review) > 100 else review,
|
274 |
-
'quality_score': round(total_score, 3),
|
275 |
-
'factors': factors,
|
276 |
-
'grade': 'A' if total_score > 0.8 else 'B' if total_score > 0.6 else 'C' if total_score > 0.4 else 'D'
|
277 |
-
})
|
278 |
-
|
279 |
-
avg_quality = sum(item['quality_score'] for item in quality_scores) / len(quality_scores) if quality_scores else 0
|
280 |
-
|
281 |
-
# Create radar chart for average factors
|
282 |
-
avg_factors = {}
|
283 |
-
for factor in ['length', 'detail', 'structure', 'helpfulness']:
|
284 |
-
avg_factors[factor] = sum(item['factors'][factor] for item in quality_scores) / len(quality_scores) if quality_scores else 0
|
285 |
-
|
286 |
-
fig = go.Figure()
|
287 |
-
fig.add_trace(go.Scatterpolar(
|
288 |
-
r=list(avg_factors.values()),
|
289 |
-
theta=list(avg_factors.keys()),
|
290 |
-
fill='toself',
|
291 |
-
name='Quality Factors'
|
292 |
-
))
|
293 |
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
visible=True,
|
298 |
-
range=[0, 1]
|
299 |
-
)),
|
300 |
-
showlegend=True,
|
301 |
-
title="Average Quality Factors"
|
302 |
-
)
|
303 |
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
'high_quality_count': sum(1 for item in quality_scores if item['quality_score'] > 0.7),
|
309 |
-
'weights_used': weights
|
310 |
-
},
|
311 |
-
'details': quality_scores,
|
312 |
-
'factor_averages': avg_factors
|
313 |
-
}, fig
|
314 |
-
|
315 |
-
def compare_competitors(self, product_a_reviews: List[str], product_b_reviews: List[str]) -> Tuple[Dict, go.Figure]:
|
316 |
-
"""Compare sentiment between two products"""
|
317 |
-
analysis_a = self.analyze_sentiment(product_a_reviews)
|
318 |
-
analysis_b = self.analyze_sentiment(product_b_reviews)
|
319 |
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
subplot_titles=['Product A', 'Product B']
|
324 |
-
)
|
325 |
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
331 |
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
|
|
337 |
|
338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
|
340 |
-
|
341 |
-
|
342 |
-
'product_b': analysis_b,
|
343 |
-
'winner': 'Product A' if analysis_a['summary']['positive'] > analysis_b['summary']['positive'] else 'Product B'
|
344 |
-
}
|
345 |
|
346 |
-
|
347 |
-
|
348 |
-
def generate_report(self, analysis_data: Dict, report_type: str = "basic") -> str:
|
349 |
-
"""Generate analysis report with export capability"""
|
350 |
-
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
351 |
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
|
|
|
|
|
|
|
|
|
|
356 |
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
- Main complaints: {', '.join([aspect[0] for aspect in top_neg[:3]])}
|
375 |
-
- Key strengths: {', '.join([aspect[0] for aspect in top_pos[:3]])}
|
376 |
-
|
377 |
-
## Recommendations
|
378 |
-
- Address negative aspects: {', '.join([aspect[0] for aspect in top_neg[:2]])}
|
379 |
-
- Leverage positive aspects in marketing
|
380 |
-
- Monitor sentiment trends over time
|
381 |
-
"""
|
382 |
-
|
383 |
-
elif report_type == "fake":
|
384 |
-
return f"""# Fake Review Detection Report
|
385 |
-
Generated: {timestamp}
|
386 |
-
|
387 |
-
## Summary
|
388 |
-
- Total Reviews: {analysis_data.get('summary', {}).get('total_reviews', 0)}
|
389 |
-
- Suspicious Reviews: {analysis_data.get('summary', {}).get('suspicious_reviews', 0)}
|
390 |
-
- Authenticity Rate: {analysis_data.get('summary', {}).get('authenticity_rate', 0)}%
|
391 |
-
|
392 |
-
## Risk Assessment
|
393 |
-
- Overall Risk: {'High' if analysis_data.get('summary', {}).get('authenticity_rate', 0) < 70 else 'Low'}
|
394 |
-
- Action Required: {'Yes' if analysis_data.get('summary', {}).get('suspicious_reviews', 0) > 0 else 'No'}
|
395 |
-
|
396 |
-
## Common Fraud Indicators
|
397 |
-
- Short reviews with generic language
|
398 |
-
- Repetitive content patterns
|
399 |
-
- Suspicious timing clusters
|
400 |
-
- Unusual username patterns
|
401 |
-
"""
|
402 |
|
403 |
-
return
|
404 |
-
|
405 |
-
# Global analyzer instance
|
406 |
-
analyzer = ReviewAnalyzer()
|
407 |
|
408 |
-
|
409 |
-
"""
|
410 |
-
if not text.strip():
|
411 |
-
return []
|
412 |
|
413 |
-
|
414 |
-
|
415 |
-
line = line.strip()
|
416 |
-
if line and len(line) > 10:
|
417 |
-
reviews.append(line)
|
418 |
-
|
419 |
-
return reviews
|
420 |
-
|
421 |
-
def process_csv_upload(file) -> Tuple[List[str], Dict]:
|
422 |
-
"""Process uploaded CSV file"""
|
423 |
-
if file is None:
|
424 |
-
return [], {}
|
425 |
|
426 |
-
|
427 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
|
|
|
|
|
|
433 |
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
time_col = col
|
440 |
-
elif 'user' in col_lower or 'name' in col_lower:
|
441 |
-
user_col = col
|
442 |
|
443 |
-
|
444 |
-
return [], {"error": "No review column found. Expected columns: 'review', 'comment', or 'text'"}
|
445 |
|
446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
|
448 |
-
|
449 |
-
if
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
|
454 |
-
|
455 |
|
456 |
-
|
457 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
|
|
|
|
|
|
|
|
469 |
|
470 |
-
|
471 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
|
473 |
-
|
474 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
475 |
|
476 |
fig = go.Figure(data=[
|
477 |
-
go.Bar(x=
|
478 |
-
|
479 |
-
marker_color=['green', 'red', 'gray'])
|
480 |
])
|
481 |
-
fig.update_layout(title="Sentiment Distribution", yaxis_title="Percentage")
|
482 |
|
483 |
-
|
484 |
-
|
485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
486 |
|
487 |
-
def fake_detection_interface(reviews_text: str, csv_file):
|
488 |
-
"""Interface for fake review detection"""
|
489 |
-
reviews = []
|
490 |
-
metadata = {}
|
491 |
-
|
492 |
-
if csv_file is not None:
|
493 |
-
reviews, metadata = process_csv_upload(csv_file)
|
494 |
-
if 'error' in metadata:
|
495 |
-
return metadata['error']
|
496 |
-
else:
|
497 |
-
reviews = process_reviews_input(reviews_text)
|
498 |
-
|
499 |
-
if not reviews:
|
500 |
-
return "Please enter reviews or upload a CSV file."
|
501 |
-
|
502 |
-
try:
|
503 |
-
result = analyzer.detect_fake_reviews(reviews, metadata if metadata else None)
|
504 |
-
return json.dumps(result, indent=2)
|
505 |
-
except Exception as e:
|
506 |
-
return f"Error: {str(e)}"
|
507 |
|
508 |
-
def quality_assessment_interface(reviews_text: str, csv_file, length_weight: float, detail_weight: float, structure_weight: float, help_weight: float):
|
509 |
-
"""Interface for quality assessment with custom weights"""
|
510 |
-
reviews = []
|
511 |
-
|
512 |
-
if csv_file is not None:
|
513 |
-
reviews, metadata = process_csv_upload(csv_file)
|
514 |
-
if 'error' in metadata:
|
515 |
-
return metadata['error'], None
|
516 |
-
else:
|
517 |
-
reviews = process_reviews_input(reviews_text)
|
518 |
-
|
519 |
-
if not reviews:
|
520 |
-
return "Please enter reviews or upload a CSV file.", None
|
521 |
-
|
522 |
-
try:
|
523 |
-
custom_weights = {
|
524 |
-
'length': length_weight,
|
525 |
-
'detail': detail_weight,
|
526 |
-
'structure': structure_weight,
|
527 |
-
'helpfulness': help_weight
|
528 |
-
}
|
529 |
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
|
|
|
|
|
|
534 |
|
535 |
-
def competitor_comparison_interface(product_a_text: str, product_b_text: str):
|
536 |
-
"""Interface for competitor comparison"""
|
537 |
-
if not product_a_text.strip() or not product_b_text.strip():
|
538 |
-
return "Please enter reviews for both products.", None
|
539 |
-
|
540 |
-
reviews_a = process_reviews_input(product_a_text)
|
541 |
-
reviews_b = process_reviews_input(product_b_text)
|
542 |
-
|
543 |
-
if not reviews_a or not reviews_b:
|
544 |
-
return "Please provide valid reviews for both products.", None
|
545 |
-
|
546 |
-
try:
|
547 |
-
result, fig = analyzer.compare_competitors(reviews_a, reviews_b)
|
548 |
-
return json.dumps(result, indent=2), fig
|
549 |
-
except Exception as e:
|
550 |
-
return f"Error: {str(e)}", None
|
551 |
|
552 |
-
def generate_report_interface(analysis_result: str, report_type: str):
|
553 |
-
"""Interface for report generation"""
|
554 |
-
if not analysis_result.strip():
|
555 |
-
return "No analysis data available. Please run an analysis first."
|
556 |
-
|
557 |
-
try:
|
558 |
-
data = json.loads(analysis_result)
|
559 |
-
report = analyzer.generate_report(data, report_type.lower())
|
560 |
-
return report
|
561 |
-
except Exception as e:
|
562 |
-
return f"Error generating report: {str(e)}"
|
563 |
-
|
564 |
-
# Create Gradio interface
|
565 |
-
with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
|
566 |
-
gr.Markdown("# 🛒 SmartReview Pro")
|
567 |
-
gr.Markdown("Advanced review analysis platform with AI-powered insights")
|
568 |
-
|
569 |
-
with gr.Tab("📊 Sentiment Analysis"):
|
570 |
-
gr.Markdown("### Analyze customer sentiment and extract key aspects")
|
571 |
-
with gr.Row():
|
572 |
-
with gr.Column():
|
573 |
-
sentiment_input = gr.Textbox(
|
574 |
-
lines=8,
|
575 |
-
placeholder="Enter reviews (one per line) or upload CSV...",
|
576 |
-
label="Reviews"
|
577 |
-
)
|
578 |
-
sentiment_csv = gr.File(
|
579 |
-
label="Upload CSV (columns: review/comment/text, optional: timestamp, username)",
|
580 |
-
file_types=[".csv"]
|
581 |
-
)
|
582 |
-
sentiment_btn = gr.Button("Analyze Sentiment", variant="primary")
|
583 |
-
with gr.Column():
|
584 |
-
sentiment_output = gr.Textbox(label="Analysis Results", lines=15)
|
585 |
-
sentiment_chart = gr.Plot(label="Sentiment Distribution")
|
586 |
-
|
587 |
-
sentiment_btn.click(
|
588 |
-
sentiment_analysis_interface,
|
589 |
-
inputs=[sentiment_input, sentiment_csv],
|
590 |
-
outputs=[sentiment_output, sentiment_chart]
|
591 |
-
)
|
592 |
-
|
593 |
-
with gr.Tab("🔍 Fake Review Detection"):
|
594 |
-
gr.Markdown("### Detect suspicious reviews using text analysis and metadata")
|
595 |
-
with gr.Row():
|
596 |
-
with gr.Column():
|
597 |
-
fake_input = gr.Textbox(
|
598 |
-
lines=8,
|
599 |
-
placeholder="Enter reviews to analyze...",
|
600 |
-
label="Reviews"
|
601 |
-
)
|
602 |
-
fake_csv = gr.File(
|
603 |
-
label="Upload CSV (supports timestamp & username analysis)",
|
604 |
-
file_types=[".csv"]
|
605 |
-
)
|
606 |
-
fake_btn = gr.Button("Detect Fake Reviews", variant="primary")
|
607 |
-
with gr.Column():
|
608 |
-
fake_output = gr.Textbox(label="Detection Results", lines=15)
|
609 |
-
|
610 |
-
fake_btn.click(
|
611 |
-
fake_detection_interface,
|
612 |
-
inputs=[fake_input, fake_csv],
|
613 |
-
outputs=[fake_output]
|
614 |
-
)
|
615 |
-
|
616 |
-
with gr.Tab("⭐ Quality Assessment"):
|
617 |
-
gr.Markdown("### Assess review quality with customizable weights")
|
618 |
-
with gr.Row():
|
619 |
-
with gr.Column():
|
620 |
-
quality_input = gr.Textbox(
|
621 |
-
lines=8,
|
622 |
-
placeholder="Enter reviews to assess...",
|
623 |
-
label="Reviews"
|
624 |
-
)
|
625 |
-
quality_csv = gr.File(
|
626 |
-
label="Upload CSV",
|
627 |
-
file_types=[".csv"]
|
628 |
-
)
|
629 |
-
|
630 |
-
gr.Markdown("**Customize Quality Weights:**")
|
631 |
-
with gr.Row():
|
632 |
-
length_weight = gr.Slider(0, 1, 0.25, label="Length Weight")
|
633 |
-
detail_weight = gr.Slider(0, 1, 0.25, label="Detail Weight")
|
634 |
-
with gr.Row():
|
635 |
-
structure_weight = gr.Slider(0, 1, 0.25, label="Structure Weight")
|
636 |
-
help_weight = gr.Slider(0, 1, 0.25, label="Helpfulness Weight")
|
637 |
-
|
638 |
-
quality_btn = gr.Button("Assess Quality", variant="primary")
|
639 |
-
with gr.Column():
|
640 |
-
quality_output = gr.Textbox(label="Quality Assessment", lines=12)
|
641 |
-
quality_radar = gr.Plot(label="Quality Factors Radar Chart")
|
642 |
-
|
643 |
-
quality_btn.click(
|
644 |
-
quality_assessment_interface,
|
645 |
-
inputs=[quality_input, quality_csv, length_weight, detail_weight, structure_weight, help_weight],
|
646 |
-
outputs=[quality_output, quality_radar]
|
647 |
-
)
|
648 |
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
|
671 |
-
outputs=[comp_output, comp_chart]
|
672 |
)
|
|
|
|
|
673 |
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
696 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
697 |
|
698 |
-
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
### Core Capabilities:
|
710 |
-
- **Sentiment Analysis**: AI-powered emotion detection with keyword extraction
|
711 |
-
- **Fake Review Detection**: Multi-layer authenticity verification
|
712 |
-
- **Quality Assessment**: Comprehensive review helpfulness scoring
|
713 |
-
- **Competitor Comparison**: Side-by-side sentiment analysis
|
714 |
-
- **Professional Reports**: Detailed insights with actionable recommendations
|
715 |
-
|
716 |
-
### CSV Format:
|
717 |
-
Required columns: `review` or `comment` or `text`
|
718 |
-
Optional columns: `timestamp`, `username` (for enhanced fake detection)
|
719 |
-
|
720 |
-
### Pricing:
|
721 |
-
- **Free**: 50 analyses/day, basic features
|
722 |
-
- **Pro ($299/month)**: Unlimited analyses, CSV upload, custom reports
|
723 |
-
- **Enterprise**: API access, custom models, priority support
|
724 |
-
""")
|
725 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
726 |
if __name__ == "__main__":
|
727 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
import gradio as gr
|
3 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
4 |
+
import plotly.graph_objects as go
|
5 |
+
import plotly.express as px
|
6 |
+
from plotly.subplots import make_subplots
|
7 |
import numpy as np
|
8 |
+
from wordcloud import WordCloud
|
9 |
+
from collections import Counter, defaultdict, OrderedDict
|
10 |
import re
|
11 |
+
import json
|
12 |
+
import csv
|
13 |
import io
|
14 |
+
import tempfile
|
15 |
from datetime import datetime
|
16 |
+
import logging
|
17 |
+
from functools import lru_cache, wraps
|
18 |
+
from dataclasses import dataclass
|
19 |
+
from typing import List, Dict, Optional, Tuple, Any, Callable
|
20 |
+
from contextlib import contextmanager
|
21 |
+
import nltk
|
22 |
+
from nltk.corpus import stopwords
|
23 |
+
import langdetect
|
24 |
+
import pandas as pd
|
25 |
+
import gc
|
26 |
+
import threading
|
27 |
+
import asyncio
|
28 |
+
from concurrent.futures import ThreadPoolExecutor
|
29 |
import time
|
30 |
|
31 |
+
# Advanced analysis imports
|
32 |
+
import shap
|
33 |
+
import lime
|
34 |
+
from lime.lime_text import LimeTextExplainer
|
35 |
+
|
36 |
+
# Configuration
|
37 |
+
@dataclass
|
38 |
+
class Config:
|
39 |
+
MAX_HISTORY_SIZE: int = 1000
|
40 |
+
BATCH_SIZE_LIMIT: int = 50
|
41 |
+
MAX_TEXT_LENGTH: int = 512
|
42 |
+
MIN_WORD_LENGTH: int = 2
|
43 |
+
CACHE_SIZE: int = 128
|
44 |
+
BATCH_PROCESSING_SIZE: int = 8
|
45 |
+
MODEL_CACHE_SIZE: int = 2 # Maximum models to keep in memory
|
46 |
+
|
47 |
+
# Supported languages and models
|
48 |
+
SUPPORTED_LANGUAGES = {
|
49 |
+
'auto': 'Auto Detect',
|
50 |
+
'en': 'English',
|
51 |
+
'zh': 'Chinese',
|
52 |
+
'es': 'Spanish',
|
53 |
+
'fr': 'French',
|
54 |
+
'de': 'German',
|
55 |
+
'sv': 'Swedish'
|
56 |
+
}
|
57 |
+
|
58 |
+
MODELS = {
|
59 |
+
'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
|
60 |
+
'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
|
61 |
+
'zh': "uer/roberta-base-finetuned-dianping-chinese"
|
62 |
+
}
|
63 |
+
|
64 |
+
# Color themes for Plotly
|
65 |
+
THEMES = {
|
66 |
+
'default': {'pos': '#4CAF50', 'neg': '#F44336', 'neu': '#FF9800'},
|
67 |
+
'ocean': {'pos': '#0077BE', 'neg': '#FF6B35', 'neu': '#00BCD4'},
|
68 |
+
'dark': {'pos': '#66BB6A', 'neg': '#EF5350', 'neu': '#FFA726'},
|
69 |
+
'rainbow': {'pos': '#9C27B0', 'neg': '#E91E63', 'neu': '#FF5722'}
|
70 |
+
}
|
71 |
+
|
72 |
+
config = Config()
|
73 |
+
|
74 |
+
# Logging setup
|
75 |
+
logging.basicConfig(level=logging.INFO)
|
76 |
+
logger = logging.getLogger(__name__)
|
77 |
+
|
78 |
+
# Initialize NLTK
|
79 |
+
try:
|
80 |
+
nltk.download('stopwords', quiet=True)
|
81 |
+
nltk.download('punkt', quiet=True)
|
82 |
+
STOP_WORDS = set(stopwords.words('english'))
|
83 |
+
except:
|
84 |
+
STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
|
85 |
+
|
86 |
+
# Decorators and Context Managers
|
87 |
+
def handle_errors(default_return=None):
|
88 |
+
"""Centralized error handling decorator"""
|
89 |
+
def decorator(func: Callable) -> Callable:
|
90 |
+
@wraps(func)
|
91 |
+
def wrapper(*args, **kwargs):
|
92 |
+
try:
|
93 |
+
return func(*args, **kwargs)
|
94 |
+
except Exception as e:
|
95 |
+
logger.error(f"{func.__name__} failed: {e}")
|
96 |
+
return default_return if default_return is not None else f"Error: {str(e)}"
|
97 |
+
return wrapper
|
98 |
+
return decorator
|
99 |
+
|
100 |
+
@contextmanager
|
101 |
+
def memory_cleanup():
|
102 |
+
"""Context manager for memory cleanup"""
|
103 |
+
try:
|
104 |
+
yield
|
105 |
+
finally:
|
106 |
+
gc.collect()
|
107 |
+
if torch.cuda.is_available():
|
108 |
+
torch.cuda.empty_cache()
|
109 |
+
|
110 |
+
class ThemeContext:
|
111 |
+
"""Theme management context"""
|
112 |
+
def __init__(self, theme: str = 'default'):
|
113 |
+
self.theme = theme
|
114 |
+
self.colors = config.THEMES.get(theme, config.THEMES['default'])
|
115 |
+
|
116 |
+
class LRUModelCache:
|
117 |
+
"""LRU Cache for models with memory management"""
|
118 |
+
def __init__(self, max_size: int = 2):
|
119 |
+
self.max_size = max_size
|
120 |
+
self.cache = OrderedDict()
|
121 |
+
self.lock = threading.Lock()
|
122 |
+
|
123 |
+
def get(self, key):
|
124 |
+
with self.lock:
|
125 |
+
if key in self.cache:
|
126 |
+
# Move to end (most recently used)
|
127 |
+
self.cache.move_to_end(key)
|
128 |
+
return self.cache[key]
|
129 |
+
return None
|
130 |
+
|
131 |
+
def put(self, key, value):
|
132 |
+
with self.lock:
|
133 |
+
if key in self.cache:
|
134 |
+
self.cache.move_to_end(key)
|
135 |
+
else:
|
136 |
+
if len(self.cache) >= self.max_size:
|
137 |
+
# Remove least recently used
|
138 |
+
oldest_key = next(iter(self.cache))
|
139 |
+
old_model, old_tokenizer = self.cache.pop(oldest_key)
|
140 |
+
# Force cleanup
|
141 |
+
del old_model, old_tokenizer
|
142 |
+
gc.collect()
|
143 |
+
if torch.cuda.is_available():
|
144 |
+
torch.cuda.empty_cache()
|
145 |
+
|
146 |
+
self.cache[key] = value
|
147 |
+
|
148 |
+
def clear(self):
|
149 |
+
with self.lock:
|
150 |
+
for model, tokenizer in self.cache.values():
|
151 |
+
del model, tokenizer
|
152 |
+
self.cache.clear()
|
153 |
+
gc.collect()
|
154 |
+
if torch.cuda.is_available():
|
155 |
+
torch.cuda.empty_cache()
|
156 |
|
157 |
+
# Enhanced Model Manager with Optimized Memory Management
|
158 |
+
class ModelManager:
|
159 |
+
"""Optimized multi-language model manager with LRU cache and lazy loading"""
|
160 |
+
_instance = None
|
161 |
+
|
162 |
+
def __new__(cls):
|
163 |
+
if cls._instance is None:
|
164 |
+
cls._instance = super().__new__(cls)
|
165 |
+
cls._instance._initialized = False
|
166 |
+
return cls._instance
|
167 |
+
|
168 |
def __init__(self):
|
169 |
+
if not self._initialized:
|
170 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
171 |
+
self.model_cache = LRUModelCache(config.MODEL_CACHE_SIZE)
|
172 |
+
self.loading_lock = threading.Lock()
|
173 |
+
self._initialized = True
|
174 |
+
logger.info(f"ModelManager initialized on device: {self.device}")
|
175 |
+
|
176 |
+
def _load_model(self, model_name: str, cache_key: str):
|
177 |
+
"""Load model with memory optimization"""
|
178 |
+
try:
|
179 |
+
logger.info(f"Loading model: {model_name}")
|
180 |
+
|
181 |
+
# Load with memory optimization
|
182 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
183 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
184 |
+
model_name,
|
185 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
186 |
+
device_map="auto" if torch.cuda.is_available() else None
|
187 |
)
|
188 |
+
|
189 |
+
if not torch.cuda.is_available():
|
190 |
+
model.to(self.device)
|
191 |
+
|
192 |
+
# Set to eval mode to save memory
|
193 |
+
model.eval()
|
194 |
+
|
195 |
+
# Cache the model
|
196 |
+
self.model_cache.put(cache_key, (model, tokenizer))
|
197 |
+
logger.info(f"Model {model_name} loaded and cached successfully")
|
198 |
+
|
199 |
+
return model, tokenizer
|
200 |
+
|
201 |
+
except Exception as e:
|
202 |
+
logger.error(f"Failed to load model {model_name}: {e}")
|
203 |
+
raise
|
204 |
+
|
205 |
+
def get_model(self, language='en'):
|
206 |
+
"""Get model for specific language with lazy loading and caching"""
|
207 |
+
# Determine cache key and model name
|
208 |
+
if language == 'zh':
|
209 |
+
cache_key = 'zh'
|
210 |
+
model_name = config.MODELS['zh']
|
211 |
+
else:
|
212 |
+
cache_key = 'multilingual'
|
213 |
+
model_name = config.MODELS['multilingual']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
+
# Try to get from cache first
|
216 |
+
cached_model = self.model_cache.get(cache_key)
|
217 |
+
if cached_model is not None:
|
218 |
+
return cached_model
|
219 |
|
220 |
+
# Load model if not in cache (with thread safety)
|
221 |
+
with self.loading_lock:
|
222 |
+
# Double-check pattern
|
223 |
+
cached_model = self.model_cache.get(cache_key)
|
224 |
+
if cached_model is not None:
|
225 |
+
return cached_model
|
226 |
+
|
227 |
+
return self._load_model(model_name, cache_key)
|
228 |
+
|
229 |
+
@staticmethod
|
230 |
+
def detect_language(text: str) -> str:
|
231 |
+
"""Detect text language"""
|
232 |
+
try:
|
233 |
+
detected = langdetect.detect(text)
|
234 |
+
language_mapping = {
|
235 |
+
'zh-cn': 'zh',
|
236 |
+
'zh-tw': 'zh'
|
237 |
}
|
238 |
+
detected = language_mapping.get(detected, detected)
|
239 |
+
return detected if detected in config.SUPPORTED_LANGUAGES else 'en'
|
240 |
+
except:
|
241 |
+
return 'en'
|
242 |
+
|
243 |
+
# Simplified Text Processing
|
244 |
+
class TextProcessor:
|
245 |
+
"""Optimized text processing with multi-language support"""
|
246 |
|
247 |
+
@staticmethod
|
248 |
+
@lru_cache(maxsize=config.CACHE_SIZE)
|
249 |
+
def clean_text(text: str, remove_punctuation: bool = True, remove_numbers: bool = False) -> str:
|
250 |
+
"""Clean text with language awareness"""
|
251 |
+
text = text.strip()
|
252 |
|
253 |
+
# Don't clean Chinese text aggressively
|
254 |
+
if re.search(r'[\u4e00-\u9fff]', text):
|
255 |
+
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
|
257 |
+
text = text.lower()
|
|
|
258 |
|
259 |
+
if remove_numbers:
|
260 |
+
text = re.sub(r'\d+', '', text)
|
261 |
|
262 |
+
if remove_punctuation:
|
263 |
+
text = re.sub(r'[^\w\s]', '', text)
|
264 |
+
|
265 |
+
words = text.split()
|
266 |
+
cleaned_words = [w for w in words if w not in STOP_WORDS and len(w) >= config.MIN_WORD_LENGTH]
|
267 |
+
return ' '.join(cleaned_words)
|
268 |
+
|
269 |
+
@staticmethod
|
270 |
+
def parse_batch_input(text: str) -> List[str]:
|
271 |
+
"""Parse batch input from textarea"""
|
272 |
+
lines = text.strip().split('\n')
|
273 |
+
return [line.strip() for line in lines if line.strip()]
|
274 |
+
|
275 |
+
# Enhanced History Manager
|
276 |
+
class HistoryManager:
|
277 |
+
"""Enhanced history management with filtering"""
|
278 |
+
def __init__(self):
|
279 |
+
self._history = []
|
280 |
+
|
281 |
+
def add(self, entry: Dict):
|
282 |
+
"""Add entry with timestamp"""
|
283 |
+
entry['timestamp'] = datetime.now().isoformat()
|
284 |
+
self._history.append(entry)
|
285 |
+
if len(self._history) > config.MAX_HISTORY_SIZE:
|
286 |
+
self._history = self._history[-config.MAX_HISTORY_SIZE:]
|
287 |
+
|
288 |
+
def add_batch(self, entries: List[Dict]):
|
289 |
+
"""Add multiple entries"""
|
290 |
+
for entry in entries:
|
291 |
+
self.add(entry)
|
292 |
+
|
293 |
+
def get_all(self) -> List[Dict]:
|
294 |
+
return self._history.copy()
|
295 |
|
296 |
+
def get_recent(self, n: int = 10) -> List[Dict]:
|
297 |
+
return self._history[-n:] if self._history else []
|
298 |
+
|
299 |
+
def filter_by(self, sentiment: str = None, language: str = None,
|
300 |
+
min_confidence: float = None) -> List[Dict]:
|
301 |
+
"""Filter history by criteria"""
|
302 |
+
filtered = self._history
|
303 |
|
304 |
+
if sentiment:
|
305 |
+
filtered = [h for h in filtered if h['sentiment'] == sentiment]
|
306 |
+
if language:
|
307 |
+
filtered = [h for h in filtered if h.get('language', 'en') == language]
|
308 |
+
if min_confidence:
|
309 |
+
filtered = [h for h in filtered if h['confidence'] >= min_confidence]
|
310 |
+
|
311 |
+
return filtered
|
312 |
+
|
313 |
+
def clear(self) -> int:
|
314 |
+
count = len(self._history)
|
315 |
+
self._history.clear()
|
316 |
+
return count
|
317 |
+
|
318 |
+
def size(self) -> int:
|
319 |
+
return len(self._history)
|
320 |
+
|
321 |
+
def get_stats(self) -> Dict:
|
322 |
+
"""Get comprehensive statistics"""
|
323 |
+
if not self._history:
|
324 |
+
return {}
|
325 |
|
326 |
+
sentiments = [item['sentiment'] for item in self._history]
|
327 |
+
confidences = [item['confidence'] for item in self._history]
|
328 |
+
languages = [item.get('language', 'en') for item in self._history]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
|
330 |
return {
|
331 |
+
'total_analyses': len(self._history),
|
332 |
+
'positive_count': sentiments.count('Positive'),
|
333 |
+
'negative_count': sentiments.count('Negative'),
|
334 |
+
'neutral_count': sentiments.count('Neutral'),
|
335 |
+
'avg_confidence': np.mean(confidences),
|
336 |
+
'max_confidence': np.max(confidences),
|
337 |
+
'min_confidence': np.min(confidences),
|
338 |
+
'languages_detected': len(set(languages)),
|
339 |
+
'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en'
|
340 |
}
|
341 |
+
|
342 |
+
# Core Sentiment Analysis Engine with Performance Optimizations
|
343 |
+
class SentimentEngine:
|
344 |
+
"""Optimized multi-language sentiment analysis engine"""
|
345 |
+
|
346 |
+
def __init__(self):
|
347 |
+
self.model_manager = ModelManager()
|
348 |
+
self.executor = ThreadPoolExecutor(max_workers=4)
|
349 |
|
350 |
+
@handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0})
|
351 |
+
def analyze_single(self, text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
|
352 |
+
"""Optimized single text analysis"""
|
353 |
+
if not text.strip():
|
354 |
+
raise ValueError("Empty text provided")
|
355 |
|
356 |
+
# Detect language
|
357 |
+
if language == 'auto':
|
358 |
+
detected_lang = self.model_manager.detect_language(text)
|
359 |
+
else:
|
360 |
+
detected_lang = language
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
|
362 |
+
# Get appropriate model
|
363 |
+
model, tokenizer = self.model_manager.get_model(detected_lang)
|
364 |
|
365 |
+
# Preprocessing
|
366 |
+
options = preprocessing_options or {}
|
367 |
+
processed_text = text
|
368 |
+
if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
|
369 |
+
processed_text = TextProcessor.clean_text(
|
370 |
+
text,
|
371 |
+
options.get('remove_punctuation', True),
|
372 |
+
options.get('remove_numbers', False)
|
373 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
|
375 |
+
# Tokenize and analyze with memory optimization
|
376 |
+
inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
|
377 |
+
truncation=True, max_length=config.MAX_TEXT_LENGTH).to(self.model_manager.device)
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
|
379 |
+
# Use no_grad for inference to save memory
|
380 |
+
with torch.no_grad():
|
381 |
+
outputs = model(**inputs)
|
382 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
|
384 |
+
# Clear GPU cache after inference
|
385 |
+
if torch.cuda.is_available():
|
386 |
+
torch.cuda.empty_cache()
|
|
|
|
|
387 |
|
388 |
+
# Handle different model outputs
|
389 |
+
if len(probs) == 3: # negative, neutral, positive
|
390 |
+
sentiment_idx = np.argmax(probs)
|
391 |
+
sentiment_labels = ['Negative', 'Neutral', 'Positive']
|
392 |
+
sentiment = sentiment_labels[sentiment_idx]
|
393 |
+
confidence = float(probs[sentiment_idx])
|
394 |
+
|
395 |
+
result = {
|
396 |
+
'sentiment': sentiment,
|
397 |
+
'confidence': confidence,
|
398 |
+
'neg_prob': float(probs[0]),
|
399 |
+
'neu_prob': float(probs[1]),
|
400 |
+
'pos_prob': float(probs[2]),
|
401 |
+
'has_neutral': True
|
402 |
+
}
|
403 |
+
else: # negative, positive
|
404 |
+
pred = np.argmax(probs)
|
405 |
+
sentiment = "Positive" if pred == 1 else "Negative"
|
406 |
+
confidence = float(probs[pred])
|
407 |
+
|
408 |
+
result = {
|
409 |
+
'sentiment': sentiment,
|
410 |
+
'confidence': confidence,
|
411 |
+
'neg_prob': float(probs[0]),
|
412 |
+
'pos_prob': float(probs[1]),
|
413 |
+
'neu_prob': 0.0,
|
414 |
+
'has_neutral': False
|
415 |
+
}
|
416 |
|
417 |
+
# Add metadata
|
418 |
+
result.update({
|
419 |
+
'language': detected_lang,
|
420 |
+
'word_count': len(text.split()),
|
421 |
+
'char_count': len(text)
|
422 |
+
})
|
423 |
|
424 |
+
return result
|
425 |
+
|
426 |
+
def _analyze_text_batch(self, text: str, language: str, preprocessing_options: Dict, index: int) -> Dict:
|
427 |
+
"""Single text analysis for batch processing"""
|
428 |
+
try:
|
429 |
+
result = self.analyze_single(text, language, preprocessing_options)
|
430 |
+
result['batch_index'] = index
|
431 |
+
result['text'] = text[:100] + '...' if len(text) > 100 else text
|
432 |
+
result['full_text'] = text
|
433 |
+
return result
|
434 |
+
except Exception as e:
|
435 |
+
return {
|
436 |
+
'sentiment': 'Error',
|
437 |
+
'confidence': 0.0,
|
438 |
+
'error': str(e),
|
439 |
+
'batch_index': index,
|
440 |
+
'text': text[:100] + '...' if len(text) > 100 else text,
|
441 |
+
'full_text': text
|
442 |
+
}
|
443 |
+
|
444 |
+
|
445 |
+
@handle_errors(default_return=[])
|
446 |
+
def analyze_batch(self, texts: List[str], language: str = 'auto',
|
447 |
+
preprocessing_options: Dict = None, progress_callback=None) -> List[Dict]:
|
448 |
+
"""Optimized parallel batch processing"""
|
449 |
+
if len(texts) > config.BATCH_SIZE_LIMIT:
|
450 |
+
texts = texts[:config.BATCH_SIZE_LIMIT]
|
451 |
|
452 |
+
if not texts:
|
453 |
+
return []
|
|
|
|
|
|
|
454 |
|
455 |
+
# Pre-load model to avoid race conditions
|
456 |
+
self.model_manager.get_model(language if language != 'auto' else 'en')
|
|
|
|
|
|
|
457 |
|
458 |
+
# Use ThreadPoolExecutor for parallel processing
|
459 |
+
with ThreadPoolExecutor(max_workers=min(4, len(texts))) as executor:
|
460 |
+
futures = []
|
461 |
+
for i, text in enumerate(texts):
|
462 |
+
future = executor.submit(
|
463 |
+
self._analyze_text_batch,
|
464 |
+
text, language, preprocessing_options, i
|
465 |
+
)
|
466 |
+
futures.append(future)
|
467 |
|
468 |
+
results = []
|
469 |
+
for i, future in enumerate(futures):
|
470 |
+
if progress_callback:
|
471 |
+
progress_callback((i + 1) / len(futures))
|
472 |
+
|
473 |
+
try:
|
474 |
+
result = future.result(timeout=30) # 30 second timeout per text
|
475 |
+
results.append(result)
|
476 |
+
except Exception as e:
|
477 |
+
results.append({
|
478 |
+
'sentiment': 'Error',
|
479 |
+
'confidence': 0.0,
|
480 |
+
'error': f"Timeout or error: {str(e)}",
|
481 |
+
'batch_index': i,
|
482 |
+
'text': texts[i][:100] + '...' if len(texts[i]) > 100 else texts[i],
|
483 |
+
'full_text': texts[i]
|
484 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
485 |
|
486 |
+
return results
|
|
|
|
|
|
|
487 |
|
488 |
+
class AdvancedAnalysisEngine:
|
489 |
+
"""Advanced analysis using SHAP and LIME with FIXED implementation"""
|
|
|
|
|
490 |
|
491 |
+
def __init__(self):
|
492 |
+
self.model_manager = ModelManager()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
493 |
|
494 |
+
def create_prediction_function(self, model, tokenizer, device):
|
495 |
+
"""Create FIXED prediction function for SHAP/LIME"""
|
496 |
+
def predict_proba(texts):
|
497 |
+
# Ensure texts is a list
|
498 |
+
if isinstance(texts, str):
|
499 |
+
texts = [texts]
|
500 |
+
elif isinstance(texts, np.ndarray):
|
501 |
+
texts = texts.tolist()
|
502 |
+
|
503 |
+
# Convert all elements to strings
|
504 |
+
texts = [str(text) for text in texts]
|
505 |
+
|
506 |
+
results = []
|
507 |
+
batch_size = 16 # Process in smaller batches
|
508 |
+
|
509 |
+
for i in range(0, len(texts), batch_size):
|
510 |
+
batch_texts = texts[i:i + batch_size]
|
511 |
+
|
512 |
+
try:
|
513 |
+
with torch.no_grad():
|
514 |
+
# Tokenize batch
|
515 |
+
inputs = tokenizer(
|
516 |
+
batch_texts,
|
517 |
+
return_tensors="pt",
|
518 |
+
padding=True,
|
519 |
+
truncation=True,
|
520 |
+
max_length=config.MAX_TEXT_LENGTH
|
521 |
+
).to(device)
|
522 |
+
|
523 |
+
# Batch inference
|
524 |
+
outputs = model(**inputs)
|
525 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()
|
526 |
+
|
527 |
+
results.extend(probs)
|
528 |
+
|
529 |
+
except Exception as e:
|
530 |
+
logger.error(f"Prediction batch failed: {e}")
|
531 |
+
# Return neutral predictions for failed batch
|
532 |
+
batch_size_actual = len(batch_texts)
|
533 |
+
if hasattr(model.config, 'num_labels') and model.config.num_labels == 3:
|
534 |
+
neutral_probs = np.array([[0.33, 0.34, 0.33]] * batch_size_actual)
|
535 |
+
else:
|
536 |
+
neutral_probs = np.array([[0.5, 0.5]] * batch_size_actual)
|
537 |
+
results.extend(neutral_probs)
|
538 |
+
|
539 |
+
return np.array(results)
|
540 |
|
541 |
+
return predict_proba
|
542 |
+
|
543 |
+
@handle_errors(default_return=("Analysis failed", None, None))
|
544 |
+
def analyze_with_shap(self, text: str, language: str = 'auto', num_samples: int = 100) -> Tuple[str, go.Figure, Dict]:
|
545 |
+
"""FIXED SHAP analysis implementation"""
|
546 |
+
if not text.strip():
|
547 |
+
return "Please enter text for analysis", None, {}
|
548 |
|
549 |
+
# Detect language and get model
|
550 |
+
if language == 'auto':
|
551 |
+
detected_lang = self.model_manager.detect_language(text)
|
552 |
+
else:
|
553 |
+
detected_lang = language
|
|
|
|
|
|
|
554 |
|
555 |
+
model, tokenizer = self.model_manager.get_model(detected_lang)
|
|
|
556 |
|
557 |
+
try:
|
558 |
+
# Create FIXED prediction function
|
559 |
+
predict_fn = self.create_prediction_function(model, tokenizer, self.model_manager.device)
|
560 |
+
|
561 |
+
# Test the prediction function first
|
562 |
+
test_pred = predict_fn([text])
|
563 |
+
if test_pred is None or len(test_pred) == 0:
|
564 |
+
return "Prediction function test failed", None, {}
|
565 |
+
|
566 |
+
# Use SHAP Text Explainer instead of generic Explainer
|
567 |
+
explainer = shap.Explainer(predict_fn, masker=shap.maskers.Text(tokenizer))
|
568 |
+
|
569 |
+
# Get SHAP values with proper text input
|
570 |
+
shap_values = explainer([text], max_evals=num_samples)
|
571 |
+
|
572 |
+
# Extract data safely
|
573 |
+
if hasattr(shap_values, 'data') and hasattr(shap_values, 'values'):
|
574 |
+
tokens = shap_values.data[0] if len(shap_values.data) > 0 else []
|
575 |
+
values = shap_values.values[0] if len(shap_values.values) > 0 else []
|
576 |
+
else:
|
577 |
+
return "SHAP values extraction failed", None, {}
|
578 |
+
|
579 |
+
if len(tokens) == 0 or len(values) == 0:
|
580 |
+
return "No tokens or values extracted from SHAP", None, {}
|
581 |
+
|
582 |
+
# Handle multi-dimensional values
|
583 |
+
if len(values.shape) > 1:
|
584 |
+
# Use positive class values (last column for 3-class, second for 2-class)
|
585 |
+
pos_values = values[:, -1] if values.shape[1] >= 2 else values[:, 0]
|
586 |
+
else:
|
587 |
+
pos_values = values
|
588 |
+
|
589 |
+
# Ensure we have matching lengths
|
590 |
+
min_len = min(len(tokens), len(pos_values))
|
591 |
+
tokens = tokens[:min_len]
|
592 |
+
pos_values = pos_values[:min_len]
|
593 |
+
|
594 |
+
# Create visualization
|
595 |
+
fig = go.Figure()
|
596 |
+
|
597 |
+
colors = ['red' if v < 0 else 'green' for v in pos_values]
|
598 |
+
|
599 |
+
fig.add_trace(go.Bar(
|
600 |
+
x=list(range(len(tokens))),
|
601 |
+
y=pos_values,
|
602 |
+
text=tokens,
|
603 |
+
textposition='outside',
|
604 |
+
marker_color=colors,
|
605 |
+
name='SHAP Values',
|
606 |
+
hovertemplate='<b>%{text}</b><br>SHAP Value: %{y:.4f}<extra></extra>'
|
607 |
+
))
|
608 |
+
|
609 |
+
fig.update_layout(
|
610 |
+
title=f"SHAP Analysis - Token Importance (Samples: {num_samples})",
|
611 |
+
xaxis_title="Token Index",
|
612 |
+
yaxis_title="SHAP Value",
|
613 |
+
height=500,
|
614 |
+
xaxis=dict(tickmode='array', tickvals=list(range(len(tokens))), ticktext=tokens)
|
615 |
+
)
|
616 |
+
|
617 |
+
# Create analysis summary
|
618 |
+
analysis_data = {
|
619 |
+
'method': 'SHAP',
|
620 |
+
'language': detected_lang,
|
621 |
+
'total_tokens': len(tokens),
|
622 |
+
'samples_used': num_samples,
|
623 |
+
'positive_influence': sum(1 for v in pos_values if v > 0),
|
624 |
+
'negative_influence': sum(1 for v in pos_values if v < 0),
|
625 |
+
'most_important_tokens': [(str(tokens[i]), float(pos_values[i]))
|
626 |
+
for i in np.argsort(np.abs(pos_values))[-5:]]
|
627 |
+
}
|
628 |
+
|
629 |
+
summary_text = f"""
|
630 |
+
**SHAP Analysis Results:**
|
631 |
+
- **Language:** {detected_lang.upper()}
|
632 |
+
- **Total Tokens:** {analysis_data['total_tokens']}
|
633 |
+
- **Samples Used:** {num_samples}
|
634 |
+
- **Positive Influence Tokens:** {analysis_data['positive_influence']}
|
635 |
+
- **Negative Influence Tokens:** {analysis_data['negative_influence']}
|
636 |
+
- **Most Important Tokens:** {', '.join([f"{token}({score:.3f})" for token, score in analysis_data['most_important_tokens']])}
|
637 |
+
- **Status:** SHAP analysis completed successfully
|
638 |
+
"""
|
639 |
+
|
640 |
+
return summary_text, fig, analysis_data
|
641 |
+
|
642 |
+
except Exception as e:
|
643 |
+
logger.error(f"SHAP analysis failed: {e}")
|
644 |
+
error_msg = f"""
|
645 |
+
**SHAP Analysis Failed:**
|
646 |
+
- **Error:** {str(e)}
|
647 |
+
- **Language:** {detected_lang.upper()}
|
648 |
+
- **Suggestion:** Try with a shorter text or reduce number of samples
|
649 |
+
|
650 |
+
**Common fixes:**
|
651 |
+
- Reduce sample size to 50-100
|
652 |
+
- Use shorter input text (< 200 words)
|
653 |
+
- Check if model supports the text language
|
654 |
+
"""
|
655 |
+
return error_msg, None, {}
|
656 |
+
|
657 |
+
@handle_errors(default_return=("Analysis failed", None, None))
|
658 |
+
def analyze_with_lime(self, text: str, language: str = 'auto', num_samples: int = 100) -> Tuple[str, go.Figure, Dict]:
|
659 |
+
"""FIXED LIME analysis implementation - Bug Fix for mode parameter"""
|
660 |
+
if not text.strip():
|
661 |
+
return "Please enter text for analysis", None, {}
|
662 |
|
663 |
+
# Detect language and get model
|
664 |
+
if language == 'auto':
|
665 |
+
detected_lang = self.model_manager.detect_language(text)
|
666 |
+
else:
|
667 |
+
detected_lang = language
|
668 |
|
669 |
+
model, tokenizer = self.model_manager.get_model(detected_lang)
|
670 |
|
671 |
+
try:
|
672 |
+
# Create FIXED prediction function
|
673 |
+
predict_fn = self.create_prediction_function(model, tokenizer, self.model_manager.device)
|
674 |
+
|
675 |
+
# Test the prediction function first
|
676 |
+
test_pred = predict_fn([text])
|
677 |
+
if test_pred is None or len(test_pred) == 0:
|
678 |
+
return "Prediction function test failed", None, {}
|
679 |
+
|
680 |
+
# Determine class names based on model output
|
681 |
+
num_classes = test_pred.shape[1] if len(test_pred.shape) > 1 else 2
|
682 |
+
if num_classes == 3:
|
683 |
+
class_names = ['Negative', 'Neutral', 'Positive']
|
684 |
+
else:
|
685 |
+
class_names = ['Negative', 'Positive']
|
686 |
+
|
687 |
+
# Initialize LIME explainer - FIXED: Remove 'mode' parameter
|
688 |
+
explainer = LimeTextExplainer(class_names=class_names)
|
689 |
+
|
690 |
+
# Get LIME explanation
|
691 |
+
exp = explainer.explain_instance(
|
692 |
+
text,
|
693 |
+
predict_fn,
|
694 |
+
num_features=min(20, len(text.split())), # Limit features
|
695 |
+
num_samples=num_samples
|
696 |
+
)
|
697 |
+
|
698 |
+
# Extract feature importance
|
699 |
+
lime_data = exp.as_list()
|
700 |
+
|
701 |
+
if not lime_data:
|
702 |
+
return "No LIME features extracted", None, {}
|
703 |
+
|
704 |
+
# Create visualization
|
705 |
+
words = [item[0] for item in lime_data]
|
706 |
+
scores = [item[1] for item in lime_data]
|
707 |
+
|
708 |
+
fig = go.Figure()
|
709 |
+
|
710 |
+
colors = ['red' if s < 0 else 'green' for s in scores]
|
711 |
+
|
712 |
+
fig.add_trace(go.Bar(
|
713 |
+
y=words,
|
714 |
+
x=scores,
|
715 |
+
orientation='h',
|
716 |
+
marker_color=colors,
|
717 |
+
text=[f'{s:.3f}' for s in scores],
|
718 |
+
textposition='auto',
|
719 |
+
name='LIME Importance',
|
720 |
+
hovertemplate='<b>%{y}</b><br>Importance: %{x:.4f}<extra></extra>'
|
721 |
+
))
|
722 |
+
|
723 |
+
fig.update_layout(
|
724 |
+
title=f"LIME Analysis - Feature Importance (Samples: {num_samples})",
|
725 |
+
xaxis_title="Importance Score",
|
726 |
+
yaxis_title="Words/Phrases",
|
727 |
+
height=500
|
728 |
+
)
|
729 |
+
|
730 |
+
# Create analysis summary
|
731 |
+
analysis_data = {
|
732 |
+
'method': 'LIME',
|
733 |
+
'language': detected_lang,
|
734 |
+
'features_analyzed': len(lime_data),
|
735 |
+
'samples_used': num_samples,
|
736 |
+
'positive_features': sum(1 for _, score in lime_data if score > 0),
|
737 |
+
'negative_features': sum(1 for _, score in lime_data if score < 0),
|
738 |
+
'feature_importance': lime_data
|
739 |
+
}
|
740 |
+
|
741 |
+
summary_text = f"""
|
742 |
+
**LIME Analysis Results:**
|
743 |
+
- **Language:** {detected_lang.upper()}
|
744 |
+
- **Features Analyzed:** {analysis_data['features_analyzed']}
|
745 |
+
- **Classes:** {', '.join(class_names)}
|
746 |
+
- **Samples Used:** {num_samples}
|
747 |
+
- **Positive Features:** {analysis_data['positive_features']}
|
748 |
+
- **Negative Features:** {analysis_data['negative_features']}
|
749 |
+
- **Top Features:** {', '.join([f"{word}({score:.3f})" for word, score in lime_data[:5]])}
|
750 |
+
- **Status:** LIME analysis completed successfully
|
751 |
+
"""
|
752 |
+
|
753 |
+
return summary_text, fig, analysis_data
|
754 |
+
|
755 |
+
except Exception as e:
|
756 |
+
logger.error(f"LIME analysis failed: {e}")
|
757 |
+
error_msg = f"""
|
758 |
+
**LIME Analysis Failed:**
|
759 |
+
- **Error:** {str(e)}
|
760 |
+
- **Language:** {detected_lang.upper()}
|
761 |
+
- **Suggestion:** Try with a shorter text or reduce number of samples
|
762 |
|
763 |
+
**Bug Fix Applied:**
|
764 |
+
- ✅ Removed 'mode' parameter from LimeTextExplainer initialization
|
765 |
+
- ✅ This should resolve the "unexpected keyword argument 'mode'" error
|
766 |
+
|
767 |
+
**Common fixes:**
|
768 |
+
- Reduce sample size to 50-100
|
769 |
+
- Use shorter input text (< 200 words)
|
770 |
+
- Check if model supports the text language
|
771 |
+
"""
|
772 |
+
return error_msg, None, {}
|
773 |
+
|
774 |
+
# Optimized Plotly Visualization System
|
775 |
+
class PlotlyVisualizer:
|
776 |
+
"""Enhanced Plotly visualizations"""
|
777 |
|
778 |
+
@staticmethod
|
779 |
+
@handle_errors(default_return=None)
|
780 |
+
def create_sentiment_gauge(result: Dict, theme: ThemeContext) -> go.Figure:
|
781 |
+
"""Create animated sentiment gauge"""
|
782 |
+
colors = theme.colors
|
783 |
+
|
784 |
+
if result.get('has_neutral', False):
|
785 |
+
# Three-way gauge
|
786 |
+
fig = go.Figure(go.Indicator(
|
787 |
+
mode="gauge+number+delta",
|
788 |
+
value=result['pos_prob'] * 100,
|
789 |
+
domain={'x': [0, 1], 'y': [0, 1]},
|
790 |
+
title={'text': f"Sentiment: {result['sentiment']}"},
|
791 |
+
delta={'reference': 50},
|
792 |
+
gauge={
|
793 |
+
'axis': {'range': [None, 100]},
|
794 |
+
'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
|
795 |
+
'steps': [
|
796 |
+
{'range': [0, 33], 'color': colors['neg']},
|
797 |
+
{'range': [33, 67], 'color': colors['neu']},
|
798 |
+
{'range': [67, 100], 'color': colors['pos']}
|
799 |
+
],
|
800 |
+
'threshold': {
|
801 |
+
'line': {'color': "red", 'width': 4},
|
802 |
+
'thickness': 0.75,
|
803 |
+
'value': 90
|
804 |
+
}
|
805 |
+
}
|
806 |
+
))
|
807 |
+
else:
|
808 |
+
# Two-way gauge
|
809 |
+
fig = go.Figure(go.Indicator(
|
810 |
+
mode="gauge+number",
|
811 |
+
value=result['confidence'] * 100,
|
812 |
+
domain={'x': [0, 1], 'y': [0, 1]},
|
813 |
+
title={'text': f"Confidence: {result['sentiment']}"},
|
814 |
+
gauge={
|
815 |
+
'axis': {'range': [None, 100]},
|
816 |
+
'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
|
817 |
+
'steps': [
|
818 |
+
{'range': [0, 50], 'color': "lightgray"},
|
819 |
+
{'range': [50, 100], 'color': "gray"}
|
820 |
+
]
|
821 |
+
}
|
822 |
+
))
|
823 |
+
|
824 |
+
fig.update_layout(height=400, font={'size': 16})
|
825 |
+
return fig
|
826 |
+
|
827 |
+
|
828 |
+
|
829 |
+
|
830 |
|
831 |
+
@staticmethod
|
832 |
+
@handle_errors(default_return=None)
|
833 |
+
def create_probability_bars(result: Dict, theme: ThemeContext) -> go.Figure:
|
834 |
+
"""Create probability bar chart"""
|
835 |
+
colors = theme.colors
|
836 |
+
|
837 |
+
if result.get('has_neutral', False):
|
838 |
+
labels = ['Negative', 'Neutral', 'Positive']
|
839 |
+
values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
|
840 |
+
bar_colors = [colors['neg'], colors['neu'], colors['pos']]
|
841 |
+
else:
|
842 |
+
labels = ['Negative', 'Positive']
|
843 |
+
values = [result['neg_prob'], result['pos_prob']]
|
844 |
+
bar_colors = [colors['neg'], colors['pos']]
|
845 |
|
846 |
fig = go.Figure(data=[
|
847 |
+
go.Bar(x=labels, y=values, marker_color=bar_colors,
|
848 |
+
text=[f'{v:.3f}' for v in values], textposition='outside')
|
|
|
849 |
])
|
|
|
850 |
|
851 |
+
fig.update_layout(
|
852 |
+
title="Sentiment Probabilities",
|
853 |
+
yaxis_title="Probability",
|
854 |
+
height=400,
|
855 |
+
showlegend=False
|
856 |
+
)
|
857 |
+
|
858 |
+
|
859 |
+
|
860 |
+
|
861 |
+
|
862 |
+
|
863 |
+
|
864 |
+
|
865 |
+
|
866 |
+
|
867 |
+
|
868 |
+
|
869 |
+
|
870 |
+
|
871 |
+
|
872 |
+
|
873 |
+
|
874 |
+
|
875 |
+
|
876 |
+
|
877 |
+
|
878 |
+
|
879 |
+
|
880 |
+
|
881 |
+
|
882 |
+
|
883 |
+
|
884 |
+
|
885 |
+
|
886 |
+
|
887 |
+
|
888 |
+
|
889 |
+
|
890 |
+
|
891 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
892 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
893 |
|
894 |
+
return fig
|
895 |
+
|
896 |
+
|
897 |
+
|
898 |
+
|
899 |
+
|
900 |
+
|
901 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
902 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
903 |
|
904 |
+
@staticmethod
|
905 |
+
@handle_errors(default_return=None)
|
906 |
+
def create_batch_summary(results: List[Dict], theme: ThemeContext) -> go.Figure:
|
907 |
+
"""Create batch analysis summary"""
|
908 |
+
colors = theme.colors
|
909 |
+
|
910 |
+
# Count sentiments
|
911 |
+
sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error']
|
912 |
+
sentiment_counts = Counter(sentiments)
|
913 |
+
|
914 |
+
# Create pie chart
|
915 |
+
fig = go.Figure(data=[go.Pie(
|
916 |
+
labels=list(sentiment_counts.keys()),
|
917 |
+
values=list(sentiment_counts.values()),
|
918 |
+
marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
|
919 |
+
textinfo='label+percent',
|
920 |
+
hole=0.3
|
921 |
+
)])
|
922 |
+
|
923 |
+
fig.update_layout(
|
924 |
+
title=f"Batch Analysis Summary ({len(results)} texts)",
|
925 |
+
height=400
|
|
|
926 |
)
|
927 |
+
|
928 |
+
return fig
|
929 |
|
930 |
+
@staticmethod
|
931 |
+
@handle_errors(default_return=None)
|
932 |
+
def create_confidence_distribution(results: List[Dict]) -> go.Figure:
|
933 |
+
"""Create confidence distribution plot"""
|
934 |
+
confidences = [r['confidence'] for r in results if 'confidence' in r and r['sentiment'] != 'Error']
|
935 |
+
|
936 |
+
|
937 |
+
|
938 |
+
|
939 |
+
|
940 |
+
|
941 |
+
|
942 |
+
|
943 |
+
|
944 |
+
|
945 |
+
|
946 |
+
|
947 |
+
|
948 |
+
|
949 |
+
|
950 |
+
|
951 |
+
|
952 |
+
|
953 |
+
|
954 |
+
|
955 |
+
|
956 |
+
|
957 |
+
|
958 |
+
|
959 |
+
if not confidences:
|
960 |
+
return go.Figure()
|
961 |
+
|
962 |
+
fig = go.Figure(data=[go.Histogram(
|
963 |
+
x=confidences,
|
964 |
+
nbinsx=20,
|
965 |
+
marker_color='skyblue',
|
966 |
+
opacity=0.7
|
967 |
+
)])
|
968 |
+
|
969 |
+
|
970 |
+
|
971 |
+
|
972 |
+
|
973 |
+
|
974 |
+
|
975 |
+
|
976 |
+
|
977 |
+
|
978 |
+
|
979 |
+
|
980 |
+
|
981 |
+
|
982 |
+
|
983 |
+
|
984 |
+
|
985 |
+
|
986 |
+
|
987 |
+
|
988 |
+
|
989 |
+
|
990 |
+
|
991 |
+
|
992 |
+
|
993 |
+
|
994 |
+
|
995 |
+
|
996 |
+
|
997 |
+
|
998 |
+
|
999 |
+
|
1000 |
+
|
1001 |
+
|
1002 |
+
|
1003 |
+
|
1004 |
+
|
1005 |
+
|
1006 |
+
|
1007 |
+
|
1008 |
+
|
1009 |
+
|
1010 |
+
|
1011 |
+
|
1012 |
+
fig.update_layout(
|
1013 |
+
title="Confidence Distribution",
|
1014 |
+
xaxis_title="Confidence Score",
|
1015 |
+
yaxis_title="Frequency",
|
1016 |
+
height=400
|
1017 |
+
)
|
1018 |
+
|
1019 |
+
return fig
|
1020 |
+
|
1021 |
+
@staticmethod
|
1022 |
+
@handle_errors(default_return=None)
|
1023 |
+
def create_history_dashboard(history: List[Dict], theme: ThemeContext) -> go.Figure:
|
1024 |
+
"""Create comprehensive history dashboard"""
|
1025 |
+
if len(history) < 2:
|
1026 |
+
return go.Figure()
|
1027 |
+
|
1028 |
+
# Create subplots
|
1029 |
+
fig = make_subplots(
|
1030 |
+
rows=2, cols=2,
|
1031 |
+
subplot_titles=['Sentiment Timeline', 'Confidence Distribution',
|
1032 |
+
'Language Distribution', 'Sentiment Summary'],
|
1033 |
+
specs=[[{"secondary_y": False}, {"secondary_y": False}],
|
1034 |
+
[{"type": "pie"}, {"type": "bar"}]]
|
1035 |
+
)
|
1036 |
+
|
1037 |
+
# Extract data
|
1038 |
+
indices = list(range(len(history)))
|
1039 |
+
pos_probs = [item.get('pos_prob', 0) for item in history]
|
1040 |
+
confidences = [item['confidence'] for item in history]
|
1041 |
+
sentiments = [item['sentiment'] for item in history]
|
1042 |
+
languages = [item.get('language', 'en') for item in history]
|
1043 |
+
|
1044 |
+
# Sentiment timeline
|
1045 |
+
colors_map = {'Positive': theme.colors['pos'], 'Negative': theme.colors['neg'], 'Neutral': theme.colors['neu']}
|
1046 |
+
colors = [colors_map.get(s, '#999999') for s in sentiments]
|
1047 |
+
|
1048 |
+
fig.add_trace(
|
1049 |
+
go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
|
1050 |
+
marker=dict(color=colors, size=8),
|
1051 |
+
name='Positive Probability'),
|
1052 |
+
row=1, col=1
|
1053 |
+
)
|
1054 |
+
|
1055 |
+
# Confidence distribution
|
1056 |
+
fig.add_trace(
|
1057 |
+
go.Histogram(x=confidences, nbinsx=10, name='Confidence'),
|
1058 |
+
row=1, col=2
|
1059 |
)
|
1060 |
+
|
1061 |
+
# Language distribution
|
1062 |
+
lang_counts = Counter(languages)
|
1063 |
+
fig.add_trace(
|
1064 |
+
go.Pie(labels=list(lang_counts.keys()), values=list(lang_counts.values()),
|
1065 |
+
name="Languages"),
|
1066 |
+
row=2, col=1
|
1067 |
+
)
|
1068 |
+
|
1069 |
+
# Sentiment summary
|
1070 |
+
sent_counts = Counter(sentiments)
|
1071 |
+
sent_colors = [colors_map.get(k, '#999999') for k in sent_counts.keys()]
|
1072 |
+
fig.add_trace(
|
1073 |
+
go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
|
1074 |
+
marker_color=sent_colors),
|
1075 |
+
row=2, col=2
|
1076 |
+
)
|
1077 |
+
|
1078 |
+
fig.update_layout(height=800, showlegend=False)
|
1079 |
+
return fig
|
1080 |
+
|
1081 |
+
# Universal Data Handler
|
1082 |
+
class DataHandler:
|
1083 |
+
"""Enhanced data operations"""
|
1084 |
+
|
1085 |
+
@staticmethod
|
1086 |
+
@handle_errors(default_return=(None, "Export failed"))
|
1087 |
+
def export_data(data: List[Dict], format_type: str) -> Tuple[Optional[str], str]:
|
1088 |
+
"""Export data with comprehensive information"""
|
1089 |
+
if not data:
|
1090 |
+
return None, "No data to export"
|
1091 |
+
|
1092 |
+
temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False,
|
1093 |
+
suffix=f'.{format_type}', encoding='utf-8')
|
1094 |
+
|
1095 |
+
if format_type == 'csv':
|
1096 |
+
writer = csv.writer(temp_file)
|
1097 |
+
writer.writerow(['Timestamp', 'Text', 'Sentiment', 'Confidence', 'Language',
|
1098 |
+
'Pos_Prob', 'Neg_Prob', 'Neu_Prob', 'Word_Count'])
|
1099 |
+
for entry in data:
|
1100 |
+
writer.writerow([
|
1101 |
+
entry.get('timestamp', ''),
|
1102 |
+
entry.get('text', ''),
|
1103 |
+
entry.get('sentiment', ''),
|
1104 |
+
f"{entry.get('confidence', 0):.4f}",
|
1105 |
+
entry.get('language', 'en'),
|
1106 |
+
f"{entry.get('pos_prob', 0):.4f}",
|
1107 |
+
f"{entry.get('neg_prob', 0):.4f}",
|
1108 |
+
f"{entry.get('neu_prob', 0):.4f}",
|
1109 |
+
entry.get('word_count', 0)
|
1110 |
+
])
|
1111 |
+
elif format_type == 'json':
|
1112 |
+
json.dump(data, temp_file, indent=2, ensure_ascii=False)
|
1113 |
+
|
1114 |
+
temp_file.close()
|
1115 |
+
return temp_file.name, f"Exported {len(data)} entries"
|
1116 |
+
|
1117 |
+
@staticmethod
|
1118 |
+
@handle_errors(default_return="")
|
1119 |
+
def process_file(file) -> str:
|
1120 |
+
"""Process uploaded files"""
|
1121 |
+
if not file:
|
1122 |
+
return ""
|
1123 |
+
|
1124 |
+
content = file.read().decode('utf-8')
|
1125 |
+
|
1126 |
+
if file.name.endswith('.csv'):
|
1127 |
+
csv_file = io.StringIO(content)
|
1128 |
+
reader = csv.reader(csv_file)
|
1129 |
+
try:
|
1130 |
+
next(reader) # Skip header
|
1131 |
+
texts = []
|
1132 |
+
for row in reader:
|
1133 |
+
if row and row[0].strip():
|
1134 |
+
text = row[0].strip().strip('"')
|
1135 |
+
if text:
|
1136 |
+
texts.append(text)
|
1137 |
+
return '\n'.join(texts)
|
1138 |
+
except:
|
1139 |
+
lines = content.strip().split('\n')[1:]
|
1140 |
+
texts = []
|
1141 |
+
for line in lines:
|
1142 |
+
if line.strip():
|
1143 |
+
text = line.strip().strip('"')
|
1144 |
+
if text:
|
1145 |
+
texts.append(text)
|
1146 |
+
return '\n'.join(texts)
|
1147 |
+
|
1148 |
+
return content
|
1149 |
+
|
1150 |
+
|
1151 |
+
|
1152 |
+
|
1153 |
+
|
1154 |
+
|
1155 |
+
|
1156 |
+
|
1157 |
+
|
1158 |
+
|
1159 |
+
|
1160 |
+
|
1161 |
+
|
1162 |
+
class SentimentApp:
|
1163 |
+
"""Optimized multilingual sentiment analysis application"""
|
1164 |
+
|
1165 |
+
def __init__(self):
|
1166 |
+
self.engine = SentimentEngine()
|
1167 |
+
self.advanced_engine = AdvancedAnalysisEngine()
|
1168 |
+
self.history = HistoryManager()
|
1169 |
+
self.data_handler = DataHandler()
|
1170 |
+
|
1171 |
+
# Multi-language examples
|
1172 |
+
self.examples = [
|
1173 |
+
# Auto Detect
|
1174 |
+
["The film had its moments, but overall it felt a bit too long and lacked emotional depth. Some scenes were visually impressive, yet they failed to connect emotionally. By the end, I found myself disengaged and unsatisfied."],
|
1175 |
+
|
1176 |
+
# English
|
1177 |
+
["I was completely blown away by the movie — the performances were raw and powerful, and the story stayed with me long after the credits rolled. Every scene felt purposeful, and the emotional arc was handled with incredible nuance. It's the kind of film that makes you reflect deeply on your own life."],
|
1178 |
+
|
1179 |
+
|
1180 |
+
|
1181 |
+
# Chinese
|
1182 |
+
["这部电影节奏拖沓,剧情老套,完全没有让我产生任何共鸣,是一次失望的观影体验。演员的表演也显得做作,缺乏真实感。看到最后甚至有点不耐烦,整体表现乏善可陈。"],
|
1183 |
+
|
1184 |
+
|
1185 |
+
|
1186 |
+
|
1187 |
+
|
1188 |
+
|
1189 |
+
|
1190 |
+
|
1191 |
+
# Spanish
|
1192 |
+
["Una obra maestra del cine contemporáneo, con actuaciones sobresalientes, un guion bien escrito y una dirección impecable. Cada plano parecía cuidadosamente pensado, y la historia avanzaba con una intensidad emocional que mantenía al espectador cautivado. Definitivamente una película que vale la pena volver a ver."],
|
1193 |
+
|
1194 |
+
|
1195 |
+
|
1196 |
+
# French
|
1197 |
+
["Je m'attendais à beaucoup mieux. Le scénario était confus, les dialogues ennuyeux, et je me suis presque endormi au milieu du film. Même la mise en scène, habituellement un point fort, manquait cruellement d'inspiration cette fois-ci."],
|
1198 |
+
|
1199 |
+
|
1200 |
+
# German
|
1201 |
+
["Der Film war ein emotionales Erlebnis mit großartigen Bildern, einem mitreißenden Soundtrack und einer Geschichte, die zum Nachdenken anregt. Besonders beeindruckend war die schauspielerische Leistung der Hauptdarsteller, die eine tiefe Menschlichkeit vermittelten. Es ist ein Film, der lange nachwirkt."],
|
1202 |
+
|
1203 |
+
|
1204 |
+
|
1205 |
+
|
1206 |
+
|
1207 |
+
|
1208 |
+
|
1209 |
+
# Swedish
|
1210 |
+
["Filmen var en besvikelse – tråkig handling, överdrivet skådespeleri och ett slut som inte gav något avslut alls. Den kändes forcerad och saknade en tydlig röd tråd. Jag gick från biografen med en känsla av tomhet och frustration."]
|
1211 |
+
]
|
1212 |
+
|
1213 |
+
|
1214 |
+
|
1215 |
+
|
1216 |
+
|
1217 |
+
|
1218 |
+
|
1219 |
+
|
1220 |
+
|
1221 |
+
|
1222 |
+
|
1223 |
+
|
1224 |
+
|
1225 |
+
|
1226 |
+
|
1227 |
+
|
1228 |
+
|
1229 |
+
|
1230 |
+
@handle_errors(default_return=("Please enter text", None, None))
|
1231 |
+
def analyze_single(self, text: str, language: str, theme: str, clean_text: bool,
|
1232 |
+
remove_punct: bool, remove_nums: bool):
|
1233 |
+
"""Optimized single text analysis"""
|
1234 |
+
if not text.strip():
|
1235 |
+
return "Please enter text", None, None
|
1236 |
+
|
1237 |
+
# Map display names to language codes
|
1238 |
+
language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
|
1239 |
+
language_code = language_map.get(language, 'auto')
|
1240 |
+
|
1241 |
+
preprocessing_options = {
|
1242 |
+
'clean_text': clean_text,
|
1243 |
+
'remove_punctuation': remove_punct,
|
1244 |
+
'remove_numbers': remove_nums
|
1245 |
+
}
|
1246 |
+
|
1247 |
+
with memory_cleanup():
|
1248 |
+
result = self.engine.analyze_single(text, language_code, preprocessing_options)
|
1249 |
+
|
1250 |
+
# Add to history
|
1251 |
+
history_entry = {
|
1252 |
+
'text': text[:100] + '...' if len(text) > 100 else text,
|
1253 |
+
'full_text': text,
|
1254 |
+
'sentiment': result['sentiment'],
|
1255 |
+
'confidence': result['confidence'],
|
1256 |
+
'pos_prob': result.get('pos_prob', 0),
|
1257 |
+
'neg_prob': result.get('neg_prob', 0),
|
1258 |
+
'neu_prob': result.get('neu_prob', 0),
|
1259 |
+
'language': result['language'],
|
1260 |
+
'word_count': result['word_count'],
|
1261 |
+
'analysis_type': 'single'
|
1262 |
+
}
|
1263 |
+
self.history.add(history_entry)
|
1264 |
+
|
1265 |
+
# Create visualizations
|
1266 |
+
theme_ctx = ThemeContext(theme)
|
1267 |
+
gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme_ctx)
|
1268 |
+
bars_fig = PlotlyVisualizer.create_probability_bars(result, theme_ctx)
|
1269 |
+
|
1270 |
+
# Create comprehensive result text
|
1271 |
+
info_text = f"""
|
1272 |
+
**Analysis Results:**
|
1273 |
+
- **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
|
1274 |
+
- **Language:** {result['language'].upper()}
|
1275 |
+
- **Statistics:** {result['word_count']} words, {result['char_count']} characters
|
1276 |
+
- **Probabilities:** Positive: {result.get('pos_prob', 0):.3f}, Negative: {result.get('neg_prob', 0):.3f}, Neutral: {result.get('neu_prob', 0):.3f}
|
1277 |
+
"""
|
1278 |
+
|
1279 |
+
return info_text, gauge_fig, bars_fig
|
1280 |
|
1281 |
+
@handle_errors(default_return=("Please enter texts", None, None, None))
|
1282 |
+
def analyze_batch(self, batch_text: str, language: str, theme: str,
|
1283 |
+
clean_text: bool, remove_punct: bool, remove_nums: bool):
|
1284 |
+
"""Enhanced batch analysis with parallel processing"""
|
1285 |
+
if not batch_text.strip():
|
1286 |
+
return "Please enter texts (one per line)", None, None, None
|
1287 |
+
|
1288 |
+
# Parse batch input
|
1289 |
+
texts = TextProcessor.parse_batch_input(batch_text)
|
1290 |
+
|
1291 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1292 |
|
1293 |
+
|
1294 |
+
|
1295 |
+
|
1296 |
+
|
1297 |
+
if len(texts) > config.BATCH_SIZE_LIMIT:
|
1298 |
+
return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
|
1299 |
+
|
1300 |
+
if not texts:
|
1301 |
+
return "No valid texts found", None, None, None
|
1302 |
+
|
1303 |
+
# Map display names to language codes
|
1304 |
+
language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
|
1305 |
+
language_code = language_map.get(language, 'auto')
|
1306 |
+
|
1307 |
+
|
1308 |
+
|
1309 |
+
preprocessing_options = {
|
1310 |
+
'clean_text': clean_text,
|
1311 |
+
'remove_punctuation': remove_punct,
|
1312 |
+
'remove_numbers': remove_nums
|
1313 |
+
}
|
1314 |
+
|
1315 |
+
with memory_cleanup():
|
1316 |
+
results = self.engine.analyze_batch(texts, language_code, preprocessing_options)
|
1317 |
+
|
1318 |
+
# Add to history
|
1319 |
+
batch_entries = []
|
1320 |
+
for result in results:
|
1321 |
+
if 'error' not in result:
|
1322 |
+
entry = {
|
1323 |
+
'text': result['text'],
|
1324 |
+
'full_text': result['full_text'],
|
1325 |
+
'sentiment': result['sentiment'],
|
1326 |
+
'confidence': result['confidence'],
|
1327 |
+
'pos_prob': result.get('pos_prob', 0),
|
1328 |
+
'neg_prob': result.get('neg_prob', 0),
|
1329 |
+
'neu_prob': result.get('neu_prob', 0),
|
1330 |
+
'language': result['language'],
|
1331 |
+
'word_count': result['word_count'],
|
1332 |
+
'analysis_type': 'batch',
|
1333 |
+
'batch_index': result['batch_index']
|
1334 |
+
}
|
1335 |
+
batch_entries.append(entry)
|
1336 |
+
|
1337 |
+
self.history.add_batch(batch_entries)
|
1338 |
+
|
1339 |
+
# Create visualizations
|
1340 |
+
theme_ctx = ThemeContext(theme)
|
1341 |
+
summary_fig = PlotlyVisualizer.create_batch_summary(results, theme_ctx)
|
1342 |
+
confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
|
1343 |
+
|
1344 |
+
# Create results DataFrame
|
1345 |
+
df_data = []
|
1346 |
+
for result in results:
|
1347 |
+
if 'error' in result:
|
1348 |
+
df_data.append({
|
1349 |
+
'Index': result['batch_index'] + 1,
|
1350 |
+
'Text': result['text'],
|
1351 |
+
'Sentiment': 'Error',
|
1352 |
+
'Confidence': 0.0,
|
1353 |
+
'Language': 'Unknown',
|
1354 |
+
'Error': result['error']
|
1355 |
+
})
|
1356 |
+
else:
|
1357 |
+
df_data.append({
|
1358 |
+
'Index': result['batch_index'] + 1,
|
1359 |
+
'Text': result['text'],
|
1360 |
+
'Sentiment': result['sentiment'],
|
1361 |
+
'Confidence': f"{result['confidence']:.3f}",
|
1362 |
+
'Language': result['language'].upper(),
|
1363 |
+
'Word_Count': result.get('word_count', 0)
|
1364 |
+
})
|
1365 |
+
|
1366 |
+
df = pd.DataFrame(df_data)
|
1367 |
+
|
1368 |
+
# Create summary text
|
1369 |
+
successful_results = [r for r in results if 'error' not in r]
|
1370 |
+
error_count = len(results) - len(successful_results)
|
1371 |
+
|
1372 |
+
if successful_results:
|
1373 |
+
sentiment_counts = Counter([r['sentiment'] for r in successful_results])
|
1374 |
+
avg_confidence = np.mean([r['confidence'] for r in successful_results])
|
1375 |
+
languages = Counter([r['language'] for r in successful_results])
|
1376 |
+
|
1377 |
+
summary_text = f"""
|
1378 |
+
**Batch Analysis Summary:**
|
1379 |
+
- **Total Texts:** {len(texts)}
|
1380 |
+
- **Successful:** {len(successful_results)}
|
1381 |
+
- **Errors:** {error_count}
|
1382 |
+
- **Average Confidence:** {avg_confidence:.3f}
|
1383 |
+
- **Sentiments:** {dict(sentiment_counts)}
|
1384 |
+
- **Languages Detected:** {dict(languages)}
|
1385 |
+
"""
|
1386 |
+
else:
|
1387 |
+
summary_text = f"All {len(texts)} texts failed to analyze."
|
1388 |
+
|
1389 |
+
return summary_text, df, summary_fig, confidence_fig
|
1390 |
+
|
1391 |
+
# FIXED advanced analysis methods with sample size control
|
1392 |
+
@handle_errors(default_return=("Please enter text", None))
|
1393 |
+
def analyze_with_shap(self, text: str, language: str, num_samples: int = 100):
|
1394 |
+
"""Perform FIXED SHAP analysis with configurable samples"""
|
1395 |
+
language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
|
1396 |
+
language_code = language_map.get(language, 'auto')
|
1397 |
+
|
1398 |
+
return self.advanced_engine.analyze_with_shap(text, language_code, num_samples)
|
1399 |
+
|
1400 |
+
@handle_errors(default_return=("Please enter text", None))
|
1401 |
+
def analyze_with_lime(self, text: str, language: str, num_samples: int = 100):
|
1402 |
+
"""Perform FIXED LIME analysis with configurable samples"""
|
1403 |
+
language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
|
1404 |
+
language_code = language_map.get(language, 'auto')
|
1405 |
+
|
1406 |
+
return self.advanced_engine.analyze_with_lime(text, language_code, num_samples)
|
1407 |
+
|
1408 |
+
@handle_errors(default_return=(None, "No history available"))
|
1409 |
+
def plot_history(self, theme: str = 'default'):
|
1410 |
+
"""Plot comprehensive history analysis"""
|
1411 |
+
history = self.history.get_all()
|
1412 |
+
if len(history) < 2:
|
1413 |
+
return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
|
1414 |
+
|
1415 |
+
theme_ctx = ThemeContext(theme)
|
1416 |
+
|
1417 |
+
|
1418 |
+
|
1419 |
+
|
1420 |
+
|
1421 |
+
|
1422 |
+
with memory_cleanup():
|
1423 |
+
fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx)
|
1424 |
+
stats = self.history.get_stats()
|
1425 |
+
|
1426 |
+
stats_text = f"""
|
1427 |
+
**History Statistics:**
|
1428 |
+
- **Total Analyses:** {stats.get('total_analyses', 0)}
|
1429 |
+
- **Positive:** {stats.get('positive_count', 0)}
|
1430 |
+
- **Negative:** {stats.get('negative_count', 0)}
|
1431 |
+
- **Neutral:** {stats.get('neutral_count', 0)}
|
1432 |
+
- **Average Confidence:** {stats.get('avg_confidence', 0):.3f}
|
1433 |
+
- **Languages:** {stats.get('languages_detected', 0)}
|
1434 |
+
- **Most Common Language:** {stats.get('most_common_language', 'N/A').upper()}
|
1435 |
+
"""
|
1436 |
+
|
1437 |
+
return fig, stats_text
|
1438 |
+
|
1439 |
+
@handle_errors(default_return=("No data available",))
|
1440 |
+
def get_history_status(self):
|
1441 |
+
"""Get current history status"""
|
1442 |
+
stats = self.history.get_stats()
|
1443 |
+
if not stats:
|
1444 |
+
return "No analyses performed yet"
|
1445 |
+
|
1446 |
+
return f"""
|
1447 |
+
**Current Status:**
|
1448 |
+
- **Total Analyses:** {stats['total_analyses']}
|
1449 |
+
- **Recent Sentiment Distribution:**
|
1450 |
+
* Positive: {stats['positive_count']}
|
1451 |
+
* Negative: {stats['negative_count']}
|
1452 |
+
* Neutral: {stats['neutral_count']}
|
1453 |
+
- **Average Confidence:** {stats['avg_confidence']:.3f}
|
1454 |
+
- **Languages Detected:** {stats['languages_detected']}
|
1455 |
+
"""
|
1456 |
+
|
1457 |
+
# Optimized Gradio Interface
|
1458 |
+
def create_interface():
|
1459 |
+
"""Create comprehensive Gradio interface with optimizations"""
|
1460 |
+
app = SentimentApp()
|
1461 |
+
|
1462 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
|
1463 |
+
gr.Markdown("# 🌍 Multilingual Sentiment Analyzer")
|
1464 |
+
gr.Markdown("AI-powered sentiment analysis with SHAP & LIME explainable AI features")
|
1465 |
+
|
1466 |
+
with gr.Tab("Single Analysis"):
|
1467 |
+
with gr.Row():
|
1468 |
+
with gr.Column():
|
1469 |
+
text_input = gr.Textbox(
|
1470 |
+
label="Enter Text for Analysis",
|
1471 |
+
placeholder="Enter your text in any supported language...",
|
1472 |
+
lines=5
|
1473 |
+
)
|
1474 |
+
|
1475 |
+
with gr.Row():
|
1476 |
+
language_selector = gr.Dropdown(
|
1477 |
+
choices=list(config.SUPPORTED_LANGUAGES.values()),
|
1478 |
+
value="Auto Detect",
|
1479 |
+
label="Language"
|
1480 |
+
)
|
1481 |
+
theme_selector = gr.Dropdown(
|
1482 |
+
choices=list(config.THEMES.keys()),
|
1483 |
+
value="default",
|
1484 |
+
label="Theme"
|
1485 |
+
)
|
1486 |
+
|
1487 |
+
with gr.Row():
|
1488 |
+
clean_text_cb = gr.Checkbox(label="Clean Text", value=False)
|
1489 |
+
remove_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
|
1490 |
+
remove_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
|
1491 |
+
|
1492 |
+
analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
|
1493 |
+
|
1494 |
+
gr.Examples(
|
1495 |
+
examples=app.examples,
|
1496 |
+
inputs=text_input,
|
1497 |
+
cache_examples=False
|
1498 |
+
)
|
1499 |
+
|
1500 |
+
with gr.Column():
|
1501 |
+
result_output = gr.Textbox(label="Analysis Results", lines=8)
|
1502 |
+
|
1503 |
+
with gr.Row():
|
1504 |
+
gauge_plot = gr.Plot(label="Sentiment Gauge")
|
1505 |
+
probability_plot = gr.Plot(label="Probability Distribution")
|
1506 |
+
|
1507 |
+
# FIXED Advanced Analysis Tab
|
1508 |
+
with gr.Tab("Advanced Analysis"):
|
1509 |
+
gr.Markdown("## Explainable AI Analysis")
|
1510 |
+
gr.Markdown("**SHAP and LIME analysis with FIXED implementation** - now handles text input correctly!")
|
1511 |
+
|
1512 |
+
with gr.Row():
|
1513 |
+
with gr.Column():
|
1514 |
+
advanced_text_input = gr.Textbox(
|
1515 |
+
label="Enter Text for Advanced Analysis",
|
1516 |
+
placeholder="Enter text to analyze with SHAP and LIME...",
|
1517 |
+
lines=6,
|
1518 |
+
value="This movie is absolutely fantastic and amazing!"
|
1519 |
+
)
|
1520 |
+
|
1521 |
+
with gr.Row():
|
1522 |
+
advanced_language = gr.Dropdown(
|
1523 |
+
choices=list(config.SUPPORTED_LANGUAGES.values()),
|
1524 |
+
value="Auto Detect",
|
1525 |
+
label="Language"
|
1526 |
+
)
|
1527 |
+
|
1528 |
+
num_samples_slider = gr.Slider(
|
1529 |
+
minimum=50,
|
1530 |
+
maximum=300,
|
1531 |
+
value=100,
|
1532 |
+
step=25,
|
1533 |
+
label="Number of Samples",
|
1534 |
+
info="Lower = Faster, Higher = More Accurate"
|
1535 |
+
)
|
1536 |
+
|
1537 |
+
with gr.Row():
|
1538 |
+
shap_btn = gr.Button("SHAP Analysis", variant="primary")
|
1539 |
+
lime_btn = gr.Button("LIME Analysis", variant="secondary")
|
1540 |
+
|
1541 |
+
gr.Markdown("""
|
1542 |
+
|
1543 |
+
**📊 Analysis Methods:**
|
1544 |
+
- **SHAP**: Token-level importance scores using Text masker
|
1545 |
+
- **LIME**: Feature importance through text perturbation
|
1546 |
+
|
1547 |
+
**⚡ Expected Performance:**
|
1548 |
+
- 50 samples: ~10-20s | 100 samples: ~20-40s | 200+ samples: ~40-80s
|
1549 |
+
""")
|
1550 |
+
|
1551 |
+
with gr.Column():
|
1552 |
+
advanced_results = gr.Textbox(label="Analysis Summary", lines=12)
|
1553 |
+
|
1554 |
+
with gr.Row():
|
1555 |
+
advanced_plot = gr.Plot(label="Feature Importance Visualization")
|
1556 |
+
|
1557 |
+
with gr.Tab("Batch Analysis"):
|
1558 |
+
with gr.Row():
|
1559 |
+
with gr.Column():
|
1560 |
+
file_upload = gr.File(
|
1561 |
+
label="Upload File (CSV/TXT)",
|
1562 |
+
file_types=[".csv", ".txt"]
|
1563 |
+
)
|
1564 |
+
batch_input = gr.Textbox(
|
1565 |
+
label="Batch Input (one text per line)",
|
1566 |
+
placeholder="Enter multiple texts, one per line...",
|
1567 |
+
lines=10
|
1568 |
+
)
|
1569 |
+
|
1570 |
+
with gr.Row():
|
1571 |
+
batch_language = gr.Dropdown(
|
1572 |
+
choices=list(config.SUPPORTED_LANGUAGES.values()),
|
1573 |
+
value="Auto Detect",
|
1574 |
+
label="Language"
|
1575 |
+
)
|
1576 |
+
batch_theme = gr.Dropdown(
|
1577 |
+
choices=list(config.THEMES.keys()),
|
1578 |
+
value="default",
|
1579 |
+
label="Theme"
|
1580 |
+
)
|
1581 |
+
|
1582 |
+
with gr.Row():
|
1583 |
+
batch_clean_cb = gr.Checkbox(label="Clean Text", value=False)
|
1584 |
+
batch_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
|
1585 |
+
batch_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
|
1586 |
+
|
1587 |
+
with gr.Row():
|
1588 |
+
load_file_btn = gr.Button("Load File")
|
1589 |
+
analyze_batch_btn = gr.Button("Analyze Batch", variant="primary")
|
1590 |
+
|
1591 |
+
with gr.Column():
|
1592 |
+
batch_summary = gr.Textbox(label="Batch Summary", lines=8)
|
1593 |
+
batch_results_df = gr.Dataframe(
|
1594 |
+
label="Detailed Results",
|
1595 |
+
headers=["Index", "Text", "Sentiment", "Confidence", "Language", "Word_Count"],
|
1596 |
+
datatype=["number", "str", "str", "str", "str", "number"]
|
1597 |
+
)
|
1598 |
+
|
1599 |
+
with gr.Row():
|
1600 |
+
batch_plot = gr.Plot(label="Batch Analysis Summary")
|
1601 |
+
confidence_dist_plot = gr.Plot(label="Confidence Distribution")
|
1602 |
+
|
1603 |
+
with gr.Tab("History & Analytics"):
|
1604 |
+
with gr.Row():
|
1605 |
+
with gr.Column():
|
1606 |
+
with gr.Row():
|
1607 |
+
refresh_history_btn = gr.Button("Refresh History")
|
1608 |
+
clear_history_btn = gr.Button("Clear History", variant="stop")
|
1609 |
+
status_btn = gr.Button("Get Status")
|
1610 |
+
|
1611 |
+
history_theme = gr.Dropdown(
|
1612 |
+
choices=list(config.THEMES.keys()),
|
1613 |
+
value="default",
|
1614 |
+
label="Dashboard Theme"
|
1615 |
+
)
|
1616 |
+
|
1617 |
+
with gr.Row():
|
1618 |
+
export_csv_btn = gr.Button("Export CSV")
|
1619 |
+
export_json_btn = gr.Button("Export JSON")
|
1620 |
+
|
1621 |
+
with gr.Column():
|
1622 |
+
history_status = gr.Textbox(label="History Status", lines=8)
|
1623 |
+
|
1624 |
+
history_dashboard = gr.Plot(label="History Analytics Dashboard")
|
1625 |
+
|
1626 |
+
with gr.Row():
|
1627 |
+
csv_download = gr.File(label="CSV Download", visible=True)
|
1628 |
+
json_download = gr.File(label="JSON Download", visible=True)
|
1629 |
+
|
1630 |
+
# Event Handlers
|
1631 |
+
|
1632 |
+
# Single Analysis
|
1633 |
+
analyze_btn.click(
|
1634 |
+
app.analyze_single,
|
1635 |
+
inputs=[text_input, language_selector, theme_selector,
|
1636 |
+
clean_text_cb, remove_punct_cb, remove_nums_cb],
|
1637 |
+
outputs=[result_output, gauge_plot, probability_plot]
|
1638 |
+
)
|
1639 |
+
|
1640 |
+
# FIXED Advanced Analysis with sample size control
|
1641 |
+
shap_btn.click(
|
1642 |
+
app.analyze_with_shap,
|
1643 |
+
inputs=[advanced_text_input, advanced_language, num_samples_slider],
|
1644 |
+
outputs=[advanced_results, advanced_plot]
|
1645 |
+
)
|
1646 |
+
|
1647 |
+
lime_btn.click(
|
1648 |
+
app.analyze_with_lime,
|
1649 |
+
inputs=[advanced_text_input, advanced_language, num_samples_slider],
|
1650 |
+
outputs=[advanced_results, advanced_plot]
|
1651 |
+
)
|
1652 |
+
|
1653 |
+
# Batch Analysis
|
1654 |
+
load_file_btn.click(
|
1655 |
+
app.data_handler.process_file,
|
1656 |
+
inputs=file_upload,
|
1657 |
+
outputs=batch_input
|
1658 |
+
)
|
1659 |
+
|
1660 |
+
analyze_batch_btn.click(
|
1661 |
+
app.analyze_batch,
|
1662 |
+
inputs=[batch_input, batch_language, batch_theme,
|
1663 |
+
batch_clean_cb, batch_punct_cb, batch_nums_cb],
|
1664 |
+
outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot]
|
1665 |
+
)
|
1666 |
+
|
1667 |
+
# History & Analytics
|
1668 |
+
refresh_history_btn.click(
|
1669 |
+
app.plot_history,
|
1670 |
+
inputs=history_theme,
|
1671 |
+
outputs=[history_dashboard, history_status]
|
1672 |
+
|
1673 |
+
|
1674 |
+
|
1675 |
+
|
1676 |
+
|
1677 |
+
|
1678 |
+
|
1679 |
+
|
1680 |
+
|
1681 |
+
|
1682 |
+
|
1683 |
+
|
1684 |
+
|
1685 |
+
|
1686 |
+
|
1687 |
+
|
1688 |
+
|
1689 |
+
|
1690 |
+
|
1691 |
+
|
1692 |
+
|
1693 |
+
|
1694 |
+
|
1695 |
+
|
1696 |
+
|
1697 |
+
|
1698 |
+
|
1699 |
+
|
1700 |
+
|
1701 |
+
|
1702 |
+
|
1703 |
+
|
1704 |
+
|
1705 |
+
|
1706 |
+
|
1707 |
+
|
1708 |
+
|
1709 |
+
|
1710 |
+
|
1711 |
+
|
1712 |
+
|
1713 |
+
|
1714 |
+
|
1715 |
+
|
1716 |
+
|
1717 |
+
|
1718 |
+
|
1719 |
+
|
1720 |
+
|
1721 |
+
|
1722 |
+
|
1723 |
+
|
1724 |
+
|
1725 |
+
|
1726 |
+
|
1727 |
+
|
1728 |
+
)
|
1729 |
+
|
1730 |
+
clear_history_btn.click(
|
1731 |
+
lambda: f"Cleared {app.history.clear()} entries",
|
1732 |
+
outputs=history_status
|
1733 |
+
|
1734 |
+
|
1735 |
+
|
1736 |
+
|
1737 |
+
|
1738 |
+
|
1739 |
+
|
1740 |
+
|
1741 |
+
|
1742 |
+
|
1743 |
+
|
1744 |
+
|
1745 |
+
|
1746 |
+
|
1747 |
+
|
1748 |
+
|
1749 |
+
|
1750 |
+
|
1751 |
+
)
|
1752 |
+
|
1753 |
+
status_btn.click(
|
1754 |
+
app.get_history_status,
|
1755 |
+
outputs=history_status
|
1756 |
+
|
1757 |
+
|
1758 |
+
|
1759 |
+
|
1760 |
+
|
1761 |
+
|
1762 |
+
|
1763 |
+
|
1764 |
+
|
1765 |
+
|
1766 |
+
|
1767 |
+
|
1768 |
+
|
1769 |
+
|
1770 |
+
|
1771 |
+
|
1772 |
+
|
1773 |
+
|
1774 |
+
|
1775 |
+
|
1776 |
+
|
1777 |
+
|
1778 |
+
|
1779 |
+
|
1780 |
+
|
1781 |
+
|
1782 |
+
|
1783 |
+
|
1784 |
+
)
|
1785 |
+
|
1786 |
+
export_csv_btn.click(
|
1787 |
+
lambda: app.data_handler.export_data(app.history.get_all(), 'csv'),
|
1788 |
+
outputs=[csv_download, history_status]
|
1789 |
+
|
1790 |
+
|
1791 |
+
)
|
1792 |
+
|
1793 |
+
export_json_btn.click(
|
1794 |
+
lambda: app.data_handler.export_data(app.history.get_all(), 'json'),
|
1795 |
+
outputs=[json_download, history_status]
|
1796 |
+
|
1797 |
+
)
|
1798 |
+
|
1799 |
+
return demo
|
1800 |
+
# Application Entry Point
|
1801 |
if __name__ == "__main__":
|
1802 |
+
logging.basicConfig(
|
1803 |
+
level=logging.INFO,
|
1804 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
1805 |
+
)
|
1806 |
+
|
1807 |
+
try:
|
1808 |
+
demo = create_interface()
|
1809 |
+
demo.launch(
|
1810 |
+
share=True,
|
1811 |
+
server_name="0.0.0.0",
|
1812 |
+
server_port=7860,
|
1813 |
+
show_error=True
|
1814 |
+
)
|
1815 |
+
except Exception as e:
|
1816 |
+
logger.error(f"Failed to launch application: {e}")
|
1817 |
+
raise
|