entropy25 commited on
Commit
38f2ab8
Β·
verified Β·
1 Parent(s): c2473ff

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +492 -0
app.py ADDED
@@ -0,0 +1,492 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import json
5
+ import re
6
+ from datetime import datetime
7
+ from typing import List, Dict, Tuple
8
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
9
+ import plotly.graph_objects as go
10
+ from plotly.subplots import make_subplots
11
+ import sqlite3
12
+ import hashlib
13
+ import time
14
+
15
+ # Initialize models
16
+ sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
17
+ tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
18
+
19
+ class ReviewAnalyzer:
20
+ def __init__(self):
21
+ self.db_path = "reviews.db"
22
+ self._init_db()
23
+
24
+ def _init_db(self):
25
+ conn = sqlite3.connect(self.db_path)
26
+ conn.execute('''
27
+ CREATE TABLE IF NOT EXISTS usage_log (
28
+ id INTEGER PRIMARY KEY,
29
+ user_id TEXT,
30
+ timestamp DATETIME,
31
+ analysis_type TEXT,
32
+ items_count INTEGER
33
+ )
34
+ ''')
35
+ conn.close()
36
+
37
+ def preprocess_text(self, text: str) -> str:
38
+ """Clean and preprocess review text"""
39
+ text = re.sub(r'http\S+', '', text) # Remove URLs
40
+ text = re.sub(r'[^\w\s]', '', text) # Remove special chars
41
+ text = text.strip().lower()
42
+ return text
43
+
44
+ def analyze_sentiment(self, reviews: List[str]) -> Dict:
45
+ """Analyze sentiment of reviews"""
46
+ results = []
47
+ sentiments = {'positive': 0, 'negative': 0, 'neutral': 0}
48
+
49
+ for review in reviews:
50
+ if not review.strip():
51
+ continue
52
+
53
+ clean_review = self.preprocess_text(review)
54
+ result = sentiment_analyzer(clean_review)[0]
55
+
56
+ label = result['label'].lower()
57
+ score = result['score']
58
+
59
+ # Map labels to standard format
60
+ if 'pos' in label:
61
+ sentiment = 'positive'
62
+ elif 'neg' in label:
63
+ sentiment = 'negative'
64
+ else:
65
+ sentiment = 'neutral'
66
+
67
+ sentiments[sentiment] += 1
68
+ results.append({
69
+ 'text': review[:100] + '...' if len(review) > 100 else review,
70
+ 'sentiment': sentiment,
71
+ 'confidence': round(score, 3)
72
+ })
73
+
74
+ total = len(results)
75
+ sentiment_percentages = {k: round(v/total*100, 1) for k, v in sentiments.items()}
76
+
77
+ return {
78
+ 'summary': sentiment_percentages,
79
+ 'details': results,
80
+ 'total_reviews': total
81
+ }
82
+
83
+ def detect_fake_reviews(self, reviews: List[str]) -> Dict:
84
+ """Detect potentially fake reviews"""
85
+ fake_scores = []
86
+
87
+ for review in reviews:
88
+ if not review.strip():
89
+ continue
90
+
91
+ # Simple fake detection heuristics
92
+ score = 0
93
+
94
+ # Length check
95
+ if len(review) < 20:
96
+ score += 0.3
97
+
98
+ # Repetitive words
99
+ words = review.lower().split()
100
+ unique_ratio = len(set(words)) / len(words) if words else 0
101
+ if unique_ratio < 0.5:
102
+ score += 0.4
103
+
104
+ # Excessive punctuation
105
+ punct_ratio = len(re.findall(r'[!?.]', review)) / len(review) if review else 0
106
+ if punct_ratio > 0.1:
107
+ score += 0.2
108
+
109
+ # Generic phrases
110
+ generic_phrases = ['amazing', 'perfect', 'best ever', 'highly recommend']
111
+ if any(phrase in review.lower() for phrase in generic_phrases):
112
+ score += 0.1
113
+
114
+ fake_scores.append({
115
+ 'text': review[:100] + '...' if len(review) > 100 else review,
116
+ 'fake_probability': min(round(score, 3), 1.0),
117
+ 'status': 'suspicious' if score > 0.5 else 'authentic'
118
+ })
119
+
120
+ suspicious_count = sum(1 for item in fake_scores if item['fake_probability'] > 0.5)
121
+
122
+ return {
123
+ 'summary': {
124
+ 'total_reviews': len(fake_scores),
125
+ 'suspicious_reviews': suspicious_count,
126
+ 'authenticity_rate': round((len(fake_scores) - suspicious_count) / len(fake_scores) * 100, 1) if fake_scores else 0
127
+ },
128
+ 'details': fake_scores
129
+ }
130
+
131
+ def assess_quality(self, reviews: List[str]) -> Dict:
132
+ """Assess review quality"""
133
+ quality_scores = []
134
+
135
+ for review in reviews:
136
+ if not review.strip():
137
+ continue
138
+
139
+ score = 0
140
+ factors = {}
141
+
142
+ # Length factor
143
+ length_score = min(len(review) / 200, 1.0)
144
+ factors['length'] = round(length_score, 2)
145
+ score += length_score * 0.3
146
+
147
+ # Detail factor (specific words)
148
+ detail_words = ['because', 'however', 'although', 'specifically', 'particularly']
149
+ detail_score = min(sum(1 for word in detail_words if word in review.lower()) / 3, 1.0)
150
+ factors['detail'] = round(detail_score, 2)
151
+ score += detail_score * 0.3
152
+
153
+ # Structure factor
154
+ sentences = len(re.split(r'[.!?]', review))
155
+ structure_score = min(sentences / 5, 1.0)
156
+ factors['structure'] = round(structure_score, 2)
157
+ score += structure_score * 0.2
158
+
159
+ # Helpfulness factor
160
+ helpful_words = ['pros', 'cons', 'recommend', 'suggest', 'tip', 'advice']
161
+ helpful_score = min(sum(1 for word in helpful_words if word in review.lower()) / 2, 1.0)
162
+ factors['helpfulness'] = round(helpful_score, 2)
163
+ score += helpful_score * 0.2
164
+
165
+ quality_scores.append({
166
+ 'text': review[:100] + '...' if len(review) > 100 else review,
167
+ 'quality_score': round(score, 3),
168
+ 'factors': factors,
169
+ 'grade': 'A' if score > 0.8 else 'B' if score > 0.6 else 'C' if score > 0.4 else 'D'
170
+ })
171
+
172
+ avg_quality = sum(item['quality_score'] for item in quality_scores) / len(quality_scores) if quality_scores else 0
173
+
174
+ return {
175
+ 'summary': {
176
+ 'average_quality': round(avg_quality, 3),
177
+ 'total_reviews': len(quality_scores),
178
+ 'high_quality_count': sum(1 for item in quality_scores if item['quality_score'] > 0.7)
179
+ },
180
+ 'details': quality_scores
181
+ }
182
+
183
+ def compare_competitors(self, product_a_reviews: List[str], product_b_reviews: List[str]) -> Tuple[Dict, go.Figure]:
184
+ """Compare sentiment between two products"""
185
+ analysis_a = self.analyze_sentiment(product_a_reviews)
186
+ analysis_b = self.analyze_sentiment(product_b_reviews)
187
+
188
+ # Create comparison chart
189
+ fig = make_subplots(
190
+ rows=1, cols=2,
191
+ specs=[[{'type': 'pie'}, {'type': 'pie'}]],
192
+ subplot_titles=['Product A', 'Product B']
193
+ )
194
+
195
+ # Product A pie chart
196
+ fig.add_trace(go.Pie(
197
+ labels=list(analysis_a['summary'].keys()),
198
+ values=list(analysis_a['summary'].values()),
199
+ name="Product A"
200
+ ), row=1, col=1)
201
+
202
+ # Product B pie chart
203
+ fig.add_trace(go.Pie(
204
+ labels=list(analysis_b['summary'].keys()),
205
+ values=list(analysis_b['summary'].values()),
206
+ name="Product B"
207
+ ), row=1, col=2)
208
+
209
+ fig.update_layout(title_text="Sentiment Comparison")
210
+
211
+ comparison = {
212
+ 'product_a': analysis_a,
213
+ 'product_b': analysis_b,
214
+ 'winner': 'Product A' if analysis_a['summary']['positive'] > analysis_b['summary']['positive'] else 'Product B'
215
+ }
216
+
217
+ return comparison, fig
218
+
219
+ def generate_report(self, analysis_data: Dict, report_type: str = "basic") -> str:
220
+ """Generate analysis report"""
221
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
222
+
223
+ if report_type == "sentiment":
224
+ return f"""
225
+ # Sentiment Analysis Report
226
+ Generated: {timestamp}
227
+
228
+ ## Summary
229
+ - Total Reviews: {analysis_data.get('total_reviews', 0)}
230
+ - Positive: {analysis_data.get('summary', {}).get('positive', 0)}%
231
+ - Negative: {analysis_data.get('summary', {}).get('negative', 0)}%
232
+ - Neutral: {analysis_data.get('summary', {}).get('neutral', 0)}%
233
+
234
+ ## Key Insights
235
+ - Overall sentiment trend: {'Positive' if analysis_data.get('summary', {}).get('positive', 0) > 50 else 'Mixed'}
236
+ - Customer satisfaction level: {'High' if analysis_data.get('summary', {}).get('positive', 0) > 70 else 'Moderate' if analysis_data.get('summary', {}).get('positive', 0) > 40 else 'Low'}
237
+
238
+ ## Recommendations
239
+ - Focus on addressing negative feedback themes
240
+ - Leverage positive reviews for marketing
241
+ - Monitor sentiment trends over time
242
+ """
243
+
244
+ elif report_type == "fake":
245
+ return f"""
246
+ # Fake Review Detection Report
247
+ Generated: {timestamp}
248
+
249
+ ## Summary
250
+ - Total Reviews Analyzed: {analysis_data.get('summary', {}).get('total_reviews', 0)}
251
+ - Suspicious Reviews: {analysis_data.get('summary', {}).get('suspicious_reviews', 0)}
252
+ - Authenticity Rate: {analysis_data.get('summary', {}).get('authenticity_rate', 0)}%
253
+
254
+ ## Risk Assessment
255
+ - Review Quality: {'High Risk' if analysis_data.get('summary', {}).get('authenticity_rate', 0) < 70 else 'Low Risk'}
256
+ - Recommendation: {'Investigate suspicious reviews' if analysis_data.get('summary', {}).get('suspicious_reviews', 0) > 0 else 'Reviews appear authentic'}
257
+ """
258
+
259
+ return "Report generated successfully"
260
+
261
+ # Global analyzer instance
262
+ analyzer = ReviewAnalyzer()
263
+
264
+ def process_reviews_input(text: str) -> List[str]:
265
+ """Process review input text into list"""
266
+ if not text.strip():
267
+ return []
268
+
269
+ # Split by lines or by common separators
270
+ reviews = []
271
+ for line in text.split('\n'):
272
+ line = line.strip()
273
+ if line and len(line) > 10: # Minimum length check
274
+ reviews.append(line)
275
+
276
+ return reviews
277
+
278
+ def sentiment_analysis_interface(reviews_text: str):
279
+ """Interface for sentiment analysis"""
280
+ if not reviews_text.strip():
281
+ return "Please enter some reviews to analyze.", None
282
+
283
+ reviews = process_reviews_input(reviews_text)
284
+ if not reviews:
285
+ return "No valid reviews found. Please check your input.", None
286
+
287
+ try:
288
+ result = analyzer.analyze_sentiment(reviews)
289
+
290
+ # Create visualization
291
+ fig = go.Figure(data=[
292
+ go.Bar(x=list(result['summary'].keys()),
293
+ y=list(result['summary'].values()),
294
+ marker_color=['green', 'red', 'gray'])
295
+ ])
296
+ fig.update_layout(title="Sentiment Distribution", yaxis_title="Percentage")
297
+
298
+ return json.dumps(result, indent=2), fig
299
+ except Exception as e:
300
+ return f"Error: {str(e)}", None
301
+
302
+ def fake_detection_interface(reviews_text: str):
303
+ """Interface for fake review detection"""
304
+ if not reviews_text.strip():
305
+ return "Please enter some reviews to analyze."
306
+
307
+ reviews = process_reviews_input(reviews_text)
308
+ if not reviews:
309
+ return "No valid reviews found. Please check your input."
310
+
311
+ try:
312
+ result = analyzer.detect_fake_reviews(reviews)
313
+ return json.dumps(result, indent=2)
314
+ except Exception as e:
315
+ return f"Error: {str(e)}"
316
+
317
+ def quality_assessment_interface(reviews_text: str):
318
+ """Interface for quality assessment"""
319
+ if not reviews_text.strip():
320
+ return "Please enter some reviews to analyze."
321
+
322
+ reviews = process_reviews_input(reviews_text)
323
+ if not reviews:
324
+ return "No valid reviews found. Please check your input."
325
+
326
+ try:
327
+ result = analyzer.assess_quality(reviews)
328
+ return json.dumps(result, indent=2)
329
+ except Exception as e:
330
+ return f"Error: {str(e)}"
331
+
332
+ def competitor_comparison_interface(product_a_text: str, product_b_text: str):
333
+ """Interface for competitor comparison"""
334
+ if not product_a_text.strip() or not product_b_text.strip():
335
+ return "Please enter reviews for both products.", None
336
+
337
+ reviews_a = process_reviews_input(product_a_text)
338
+ reviews_b = process_reviews_input(product_b_text)
339
+
340
+ if not reviews_a or not reviews_b:
341
+ return "Please provide valid reviews for both products.", None
342
+
343
+ try:
344
+ result, fig = analyzer.compare_competitors(reviews_a, reviews_b)
345
+ return json.dumps(result, indent=2), fig
346
+ except Exception as e:
347
+ return f"Error: {str(e)}", None
348
+
349
+ def generate_report_interface(analysis_result: str, report_type: str):
350
+ """Interface for report generation"""
351
+ if not analysis_result.strip():
352
+ return "No analysis data available. Please run an analysis first."
353
+
354
+ try:
355
+ data = json.loads(analysis_result)
356
+ report = analyzer.generate_report(data, report_type.lower())
357
+ return report
358
+ except Exception as e:
359
+ return f"Error generating report: {str(e)}"
360
+
361
+ # Create Gradio interface
362
+ with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
363
+ gr.Markdown("# πŸ›’ SmartReview Pro")
364
+ gr.Markdown("Professional review analysis platform for e-commerce businesses")
365
+
366
+ with gr.Tab("πŸ“Š Sentiment Analysis"):
367
+ gr.Markdown("### Analyze customer sentiment from reviews")
368
+ with gr.Row():
369
+ with gr.Column():
370
+ sentiment_input = gr.Textbox(
371
+ lines=10,
372
+ placeholder="Enter reviews (one per line):\nGreat product, love it!\nTerrible quality, waste of money.\nOkay product, nothing special.",
373
+ label="Reviews"
374
+ )
375
+ sentiment_btn = gr.Button("Analyze Sentiment", variant="primary")
376
+ with gr.Column():
377
+ sentiment_output = gr.Textbox(label="Analysis Results", lines=15)
378
+ sentiment_chart = gr.Plot(label="Sentiment Distribution")
379
+
380
+ sentiment_btn.click(
381
+ sentiment_analysis_interface,
382
+ inputs=[sentiment_input],
383
+ outputs=[sentiment_output, sentiment_chart]
384
+ )
385
+
386
+ with gr.Tab("πŸ” Fake Review Detection"):
387
+ gr.Markdown("### Detect potentially fake or suspicious reviews")
388
+ with gr.Row():
389
+ with gr.Column():
390
+ fake_input = gr.Textbox(
391
+ lines=10,
392
+ placeholder="Enter reviews to check for authenticity...",
393
+ label="Reviews"
394
+ )
395
+ fake_btn = gr.Button("Detect Fake Reviews", variant="primary")
396
+ with gr.Column():
397
+ fake_output = gr.Textbox(label="Detection Results", lines=15)
398
+
399
+ fake_btn.click(
400
+ fake_detection_interface,
401
+ inputs=[fake_input],
402
+ outputs=[fake_output]
403
+ )
404
+
405
+ with gr.Tab("⭐ Quality Assessment"):
406
+ gr.Markdown("### Assess the quality and helpfulness of reviews")
407
+ with gr.Row():
408
+ with gr.Column():
409
+ quality_input = gr.Textbox(
410
+ lines=10,
411
+ placeholder="Enter reviews to assess quality...",
412
+ label="Reviews"
413
+ )
414
+ quality_btn = gr.Button("Assess Quality", variant="primary")
415
+ with gr.Column():
416
+ quality_output = gr.Textbox(label="Quality Assessment", lines=15)
417
+
418
+ quality_btn.click(
419
+ quality_assessment_interface,
420
+ inputs=[quality_input],
421
+ outputs=[quality_output]
422
+ )
423
+
424
+ with gr.Tab("πŸ†š Competitor Comparison"):
425
+ gr.Markdown("### Compare sentiment between competing products")
426
+ with gr.Row():
427
+ with gr.Column():
428
+ comp_product_a = gr.Textbox(
429
+ lines=8,
430
+ placeholder="Product A reviews...",
431
+ label="Product A Reviews"
432
+ )
433
+ comp_product_b = gr.Textbox(
434
+ lines=8,
435
+ placeholder="Product B reviews...",
436
+ label="Product B Reviews"
437
+ )
438
+ comp_btn = gr.Button("Compare Products", variant="primary")
439
+ with gr.Column():
440
+ comp_output = gr.Textbox(label="Comparison Results", lines=15)
441
+ comp_chart = gr.Plot(label="Comparison Chart")
442
+
443
+ comp_btn.click(
444
+ competitor_comparison_interface,
445
+ inputs=[comp_product_a, comp_product_b],
446
+ outputs=[comp_output, comp_chart]
447
+ )
448
+
449
+ with gr.Tab("πŸ“‹ Report Generation"):
450
+ gr.Markdown("### Generate professional analysis reports")
451
+ with gr.Row():
452
+ with gr.Column():
453
+ report_data = gr.Textbox(
454
+ lines=10,
455
+ placeholder="Paste analysis results here...",
456
+ label="Analysis Data (JSON)"
457
+ )
458
+ report_type = gr.Dropdown(
459
+ choices=["sentiment", "fake", "quality"],
460
+ value="sentiment",
461
+ label="Report Type"
462
+ )
463
+ report_btn = gr.Button("Generate Report", variant="primary")
464
+ with gr.Column():
465
+ report_output = gr.Textbox(label="Generated Report", lines=15)
466
+
467
+ report_btn.click(
468
+ generate_report_interface,
469
+ inputs=[report_data, report_type],
470
+ outputs=[report_output]
471
+ )
472
+
473
+ with gr.Tab("ℹ️ About"):
474
+ gr.Markdown("""
475
+ ## SmartReview Pro Features
476
+
477
+ - **Sentiment Analysis**: Analyze customer emotions and opinions
478
+ - **Fake Review Detection**: Identify suspicious or inauthentic reviews
479
+ - **Quality Assessment**: Evaluate review helpfulness and detail
480
+ - **Competitor Comparison**: Compare sentiment across products
481
+ - **Professional Reports**: Generate detailed analysis reports
482
+
483
+ ## Pricing Plans
484
+ - **Free**: 10 analyses per day
485
+ - **Pro ($299/month)**: 1000 analyses per day + advanced features
486
+ - **Enterprise**: Unlimited usage + API access + custom reports
487
+
488
+ Contact us for enterprise solutions and custom integrations.
489
+ """)
490
+
491
+ if __name__ == "__main__":
492
+ demo.launch()