entropy25 commited on
Commit
baf4a02
Β·
verified Β·
1 Parent(s): fc0d38a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +311 -104
app.py CHANGED
@@ -3,9 +3,10 @@ import pandas as pd
3
  import numpy as np
4
  import json
5
  import re
 
6
  from datetime import datetime
7
  from typing import List, Dict, Tuple
8
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
9
  import plotly.graph_objects as go
10
  from plotly.subplots import make_subplots
11
  import sqlite3
@@ -14,7 +15,7 @@ import time
14
 
15
  # Initialize models
16
  sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
17
- tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
18
 
19
  class ReviewAnalyzer:
20
  def __init__(self):
@@ -36,13 +37,65 @@ class ReviewAnalyzer:
36
 
37
  def preprocess_text(self, text: str) -> str:
38
  """Clean and preprocess review text"""
39
- text = re.sub(r'http\S+', '', text) # Remove URLs
40
- text = re.sub(r'[^\w\s]', '', text) # Remove special chars
41
  text = text.strip().lower()
42
  return text
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def analyze_sentiment(self, reviews: List[str]) -> Dict:
45
- """Analyze sentiment of reviews"""
46
  results = []
47
  sentiments = {'positive': 0, 'negative': 0, 'neutral': 0}
48
 
@@ -54,9 +107,8 @@ class ReviewAnalyzer:
54
  result = sentiment_analyzer(clean_review)[0]
55
 
56
  label = result['label'].lower()
57
- score = result['score']
58
 
59
- # Map labels to standard format
60
  if 'pos' in label:
61
  sentiment = 'positive'
62
  elif 'neg' in label:
@@ -74,47 +126,64 @@ class ReviewAnalyzer:
74
  total = len(results)
75
  sentiment_percentages = {k: round(v/total*100, 1) for k, v in sentiments.items()}
76
 
 
 
 
77
  return {
78
  'summary': sentiment_percentages,
79
  'details': results,
80
- 'total_reviews': total
 
81
  }
82
 
83
- def detect_fake_reviews(self, reviews: List[str]) -> Dict:
84
- """Detect potentially fake reviews"""
85
  fake_scores = []
86
 
87
- for review in reviews:
 
 
 
 
 
88
  if not review.strip():
89
  continue
90
 
91
- # Simple fake detection heuristics
92
  score = 0
 
93
 
94
- # Length check
95
  if len(review) < 20:
96
  score += 0.3
 
97
 
98
- # Repetitive words
99
  words = review.lower().split()
100
  unique_ratio = len(set(words)) / len(words) if words else 0
101
  if unique_ratio < 0.5:
102
  score += 0.4
 
103
 
104
- # Excessive punctuation
105
  punct_ratio = len(re.findall(r'[!?.]', review)) / len(review) if review else 0
106
  if punct_ratio > 0.1:
107
  score += 0.2
 
108
 
109
- # Generic phrases
110
  generic_phrases = ['amazing', 'perfect', 'best ever', 'highly recommend']
111
  if any(phrase in review.lower() for phrase in generic_phrases):
112
  score += 0.1
 
 
 
 
 
 
 
113
 
114
  fake_scores.append({
115
  'text': review[:100] + '...' if len(review) > 100 else review,
116
  'fake_probability': min(round(score, 3), 1.0),
117
- 'status': 'suspicious' if score > 0.5 else 'authentic'
 
118
  })
119
 
120
  suspicious_count = sum(1 for item in fake_scores if item['fake_probability'] > 0.5)
@@ -125,81 +194,141 @@ class ReviewAnalyzer:
125
  'suspicious_reviews': suspicious_count,
126
  'authenticity_rate': round((len(fake_scores) - suspicious_count) / len(fake_scores) * 100, 1) if fake_scores else 0
127
  },
128
- 'details': fake_scores
 
129
  }
130
 
131
- def assess_quality(self, reviews: List[str]) -> Dict:
132
- """Assess review quality"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  quality_scores = []
134
 
135
  for review in reviews:
136
  if not review.strip():
137
  continue
138
 
139
- score = 0
140
  factors = {}
141
 
142
  # Length factor
143
  length_score = min(len(review) / 200, 1.0)
144
  factors['length'] = round(length_score, 2)
145
- score += length_score * 0.3
146
 
147
- # Detail factor (specific words)
148
  detail_words = ['because', 'however', 'although', 'specifically', 'particularly']
149
  detail_score = min(sum(1 for word in detail_words if word in review.lower()) / 3, 1.0)
150
  factors['detail'] = round(detail_score, 2)
151
- score += detail_score * 0.3
152
 
153
  # Structure factor
154
  sentences = len(re.split(r'[.!?]', review))
155
  structure_score = min(sentences / 5, 1.0)
156
  factors['structure'] = round(structure_score, 2)
157
- score += structure_score * 0.2
158
 
159
  # Helpfulness factor
160
  helpful_words = ['pros', 'cons', 'recommend', 'suggest', 'tip', 'advice']
161
  helpful_score = min(sum(1 for word in helpful_words if word in review.lower()) / 2, 1.0)
162
  factors['helpfulness'] = round(helpful_score, 2)
163
- score += helpful_score * 0.2
 
 
164
 
165
  quality_scores.append({
166
  'text': review[:100] + '...' if len(review) > 100 else review,
167
- 'quality_score': round(score, 3),
168
  'factors': factors,
169
- 'grade': 'A' if score > 0.8 else 'B' if score > 0.6 else 'C' if score > 0.4 else 'D'
170
  })
171
 
172
  avg_quality = sum(item['quality_score'] for item in quality_scores) / len(quality_scores) if quality_scores else 0
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  return {
175
  'summary': {
176
  'average_quality': round(avg_quality, 3),
177
  'total_reviews': len(quality_scores),
178
- 'high_quality_count': sum(1 for item in quality_scores if item['quality_score'] > 0.7)
 
179
  },
180
- 'details': quality_scores
181
- }
 
182
 
183
  def compare_competitors(self, product_a_reviews: List[str], product_b_reviews: List[str]) -> Tuple[Dict, go.Figure]:
184
  """Compare sentiment between two products"""
185
  analysis_a = self.analyze_sentiment(product_a_reviews)
186
  analysis_b = self.analyze_sentiment(product_b_reviews)
187
 
188
- # Create comparison chart
189
  fig = make_subplots(
190
  rows=1, cols=2,
191
  specs=[[{'type': 'pie'}, {'type': 'pie'}]],
192
  subplot_titles=['Product A', 'Product B']
193
  )
194
 
195
- # Product A pie chart
196
  fig.add_trace(go.Pie(
197
  labels=list(analysis_a['summary'].keys()),
198
  values=list(analysis_a['summary'].values()),
199
  name="Product A"
200
  ), row=1, col=1)
201
 
202
- # Product B pie chart
203
  fig.add_trace(go.Pie(
204
  labels=list(analysis_b['summary'].keys()),
205
  values=list(analysis_b['summary'].values()),
@@ -217,12 +346,15 @@ class ReviewAnalyzer:
217
  return comparison, fig
218
 
219
  def generate_report(self, analysis_data: Dict, report_type: str = "basic") -> str:
220
- """Generate analysis report"""
221
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
222
 
223
  if report_type == "sentiment":
224
- return f"""
225
- # Sentiment Analysis Report
 
 
 
226
  Generated: {timestamp}
227
 
228
  ## Summary
@@ -231,30 +363,42 @@ Generated: {timestamp}
231
  - Negative: {analysis_data.get('summary', {}).get('negative', 0)}%
232
  - Neutral: {analysis_data.get('summary', {}).get('neutral', 0)}%
233
 
 
 
 
 
 
 
234
  ## Key Insights
235
- - Overall sentiment trend: {'Positive' if analysis_data.get('summary', {}).get('positive', 0) > 50 else 'Mixed'}
236
- - Customer satisfaction level: {'High' if analysis_data.get('summary', {}).get('positive', 0) > 70 else 'Moderate' if analysis_data.get('summary', {}).get('positive', 0) > 40 else 'Low'}
 
237
 
238
  ## Recommendations
239
- - Focus on addressing negative feedback themes
240
- - Leverage positive reviews for marketing
241
  - Monitor sentiment trends over time
242
- """
243
 
244
  elif report_type == "fake":
245
- return f"""
246
- # Fake Review Detection Report
247
  Generated: {timestamp}
248
 
249
  ## Summary
250
- - Total Reviews Analyzed: {analysis_data.get('summary', {}).get('total_reviews', 0)}
251
  - Suspicious Reviews: {analysis_data.get('summary', {}).get('suspicious_reviews', 0)}
252
  - Authenticity Rate: {analysis_data.get('summary', {}).get('authenticity_rate', 0)}%
253
 
254
  ## Risk Assessment
255
- - Review Quality: {'High Risk' if analysis_data.get('summary', {}).get('authenticity_rate', 0) < 70 else 'Low Risk'}
256
- - Recommendation: {'Investigate suspicious reviews' if analysis_data.get('summary', {}).get('suspicious_reviews', 0) > 0 else 'Reviews appear authentic'}
257
- """
 
 
 
 
 
 
258
 
259
  return "Report generated successfully"
260
 
@@ -266,28 +410,69 @@ def process_reviews_input(text: str) -> List[str]:
266
  if not text.strip():
267
  return []
268
 
269
- # Split by lines or by common separators
270
  reviews = []
271
  for line in text.split('\n'):
272
  line = line.strip()
273
- if line and len(line) > 10: # Minimum length check
274
  reviews.append(line)
275
 
276
  return reviews
277
 
278
- def sentiment_analysis_interface(reviews_text: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  """Interface for sentiment analysis"""
280
- if not reviews_text.strip():
281
- return "Please enter some reviews to analyze.", None
 
 
 
 
 
 
282
 
283
- reviews = process_reviews_input(reviews_text)
284
  if not reviews:
285
- return "No valid reviews found. Please check your input.", None
286
 
287
  try:
288
  result = analyzer.analyze_sentiment(reviews)
289
 
290
- # Create visualization
291
  fig = go.Figure(data=[
292
  go.Bar(x=list(result['summary'].keys()),
293
  y=list(result['summary'].values()),
@@ -299,35 +484,53 @@ def sentiment_analysis_interface(reviews_text: str):
299
  except Exception as e:
300
  return f"Error: {str(e)}", None
301
 
302
- def fake_detection_interface(reviews_text: str):
303
  """Interface for fake review detection"""
304
- if not reviews_text.strip():
305
- return "Please enter some reviews to analyze."
 
 
 
 
 
 
 
306
 
307
- reviews = process_reviews_input(reviews_text)
308
  if not reviews:
309
- return "No valid reviews found. Please check your input."
310
 
311
  try:
312
- result = analyzer.detect_fake_reviews(reviews)
313
  return json.dumps(result, indent=2)
314
  except Exception as e:
315
  return f"Error: {str(e)}"
316
 
317
- def quality_assessment_interface(reviews_text: str):
318
- """Interface for quality assessment"""
319
- if not reviews_text.strip():
320
- return "Please enter some reviews to analyze."
 
 
 
 
 
 
321
 
322
- reviews = process_reviews_input(reviews_text)
323
  if not reviews:
324
- return "No valid reviews found. Please check your input."
325
 
326
  try:
327
- result = analyzer.assess_quality(reviews)
328
- return json.dumps(result, indent=2)
 
 
 
 
 
 
 
329
  except Exception as e:
330
- return f"Error: {str(e)}"
331
 
332
  def competitor_comparison_interface(product_a_text: str, product_b_text: str):
333
  """Interface for competitor comparison"""
@@ -361,17 +564,21 @@ def generate_report_interface(analysis_result: str, report_type: str):
361
  # Create Gradio interface
362
  with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
363
  gr.Markdown("# πŸ›’ SmartReview Pro")
364
- gr.Markdown("Professional review analysis platform for e-commerce businesses")
365
 
366
  with gr.Tab("πŸ“Š Sentiment Analysis"):
367
- gr.Markdown("### Analyze customer sentiment from reviews")
368
  with gr.Row():
369
  with gr.Column():
370
  sentiment_input = gr.Textbox(
371
- lines=10,
372
- placeholder="Enter reviews (one per line):\nGreat product, love it!\nTerrible quality, waste of money.\nOkay product, nothing special.",
373
  label="Reviews"
374
  )
 
 
 
 
375
  sentiment_btn = gr.Button("Analyze Sentiment", variant="primary")
376
  with gr.Column():
377
  sentiment_output = gr.Textbox(label="Analysis Results", lines=15)
@@ -379,46 +586,64 @@ with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
379
 
380
  sentiment_btn.click(
381
  sentiment_analysis_interface,
382
- inputs=[sentiment_input],
383
  outputs=[sentiment_output, sentiment_chart]
384
  )
385
 
386
  with gr.Tab("πŸ” Fake Review Detection"):
387
- gr.Markdown("### Detect potentially fake or suspicious reviews")
388
  with gr.Row():
389
  with gr.Column():
390
  fake_input = gr.Textbox(
391
- lines=10,
392
- placeholder="Enter reviews to check for authenticity...",
393
  label="Reviews"
394
  )
 
 
 
 
395
  fake_btn = gr.Button("Detect Fake Reviews", variant="primary")
396
  with gr.Column():
397
  fake_output = gr.Textbox(label="Detection Results", lines=15)
398
 
399
  fake_btn.click(
400
  fake_detection_interface,
401
- inputs=[fake_input],
402
  outputs=[fake_output]
403
  )
404
 
405
  with gr.Tab("⭐ Quality Assessment"):
406
- gr.Markdown("### Assess the quality and helpfulness of reviews")
407
  with gr.Row():
408
  with gr.Column():
409
  quality_input = gr.Textbox(
410
- lines=10,
411
- placeholder="Enter reviews to assess quality...",
412
  label="Reviews"
413
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  quality_btn = gr.Button("Assess Quality", variant="primary")
415
  with gr.Column():
416
- quality_output = gr.Textbox(label="Quality Assessment", lines=15)
 
417
 
418
  quality_btn.click(
419
  quality_assessment_interface,
420
- inputs=[quality_input],
421
- outputs=[quality_output]
422
  )
423
 
424
  with gr.Tab("πŸ†š Competitor Comparison"):
@@ -469,24 +694,6 @@ with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
469
  inputs=[report_data, report_type],
470
  outputs=[report_output]
471
  )
472
-
473
- with gr.Tab("ℹ️ About"):
474
- gr.Markdown("""
475
- ## SmartReview Pro Features
476
-
477
- - **Sentiment Analysis**: Analyze customer emotions and opinions
478
- - **Fake Review Detection**: Identify suspicious or inauthentic reviews
479
- - **Quality Assessment**: Evaluate review helpfulness and detail
480
- - **Competitor Comparison**: Compare sentiment across products
481
- - **Professional Reports**: Generate detailed analysis reports
482
-
483
- ## Pricing Plans
484
- - **Free**: 10 analyses per day
485
- - **Pro ($299/month)**: 1000 analyses per day + advanced features
486
- - **Enterprise**: Unlimited usage + API access + custom reports
487
-
488
- Contact us for enterprise solutions and custom integrations.
489
- """)
490
 
491
  if __name__ == "__main__":
492
  demo.launch()
 
3
  import numpy as np
4
  import json
5
  import re
6
+ import io
7
  from datetime import datetime
8
  from typing import List, Dict, Tuple
9
+ from transformers import pipeline, AutoTokenizer
10
  import plotly.graph_objects as go
11
  from plotly.subplots import make_subplots
12
  import sqlite3
 
15
 
16
  # Initialize models
17
  sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
18
+ absa_analyzer = pipeline("ner", model="yangheng/deberta-v3-base-absa-v1.1", aggregation_strategy="simple")
19
 
20
  class ReviewAnalyzer:
21
  def __init__(self):
 
37
 
38
  def preprocess_text(self, text: str) -> str:
39
  """Clean and preprocess review text"""
40
+ text = re.sub(r'http\S+', '', text)
41
+ text = re.sub(r'[^\w\s]', '', text)
42
  text = text.strip().lower()
43
  return text
44
 
45
+ def extract_aspect_keywords(self, reviews: List[str]) -> Dict:
46
+ """Extract aspect-based sentiment keywords"""
47
+ all_aspects = {'positive': {}, 'negative': {}}
48
+ detailed_aspects = []
49
+
50
+ for review in reviews:
51
+ if not review.strip() or len(review) < 10:
52
+ continue
53
+
54
+ try:
55
+ aspects = absa_analyzer(review)
56
+ for aspect in aspects:
57
+ word = aspect['word'].lower()
58
+ label = aspect['entity_group'].lower()
59
+ confidence = float(aspect['score'])
60
+
61
+ # Map labels to sentiment
62
+ if 'pos' in label or label == 'positive':
63
+ sentiment = 'positive'
64
+ elif 'neg' in label or label == 'negative':
65
+ sentiment = 'negative'
66
+ else:
67
+ continue
68
+
69
+ # Count aspects
70
+ if word not in all_aspects[sentiment]:
71
+ all_aspects[sentiment][word] = 0
72
+ all_aspects[sentiment][word] += 1
73
+
74
+ detailed_aspects.append({
75
+ 'review': review[:50] + '...',
76
+ 'aspect': word,
77
+ 'sentiment': sentiment,
78
+ 'confidence': round(confidence, 3)
79
+ })
80
+ except:
81
+ continue
82
+
83
+ # Get top aspects
84
+ top_positive = sorted(all_aspects['positive'].items(), key=lambda x: x[1], reverse=True)[:10]
85
+ top_negative = sorted(all_aspects['negative'].items(), key=lambda x: x[1], reverse=True)[:10]
86
+
87
+ return {
88
+ 'top_positive_aspects': top_positive,
89
+ 'top_negative_aspects': top_negative,
90
+ 'detailed_aspects': detailed_aspects,
91
+ 'summary': {
92
+ 'total_positive_aspects': len(all_aspects['positive']),
93
+ 'total_negative_aspects': len(all_aspects['negative'])
94
+ }
95
+ }
96
+
97
  def analyze_sentiment(self, reviews: List[str]) -> Dict:
98
+ """Analyze sentiment of reviews with keyword extraction"""
99
  results = []
100
  sentiments = {'positive': 0, 'negative': 0, 'neutral': 0}
101
 
 
107
  result = sentiment_analyzer(clean_review)[0]
108
 
109
  label = result['label'].lower()
110
+ score = float(result['score'])
111
 
 
112
  if 'pos' in label:
113
  sentiment = 'positive'
114
  elif 'neg' in label:
 
126
  total = len(results)
127
  sentiment_percentages = {k: round(v/total*100, 1) for k, v in sentiments.items()}
128
 
129
+ # Extract keywords
130
+ keywords = self.extract_aspect_keywords(reviews)
131
+
132
  return {
133
  'summary': sentiment_percentages,
134
  'details': results,
135
+ 'total_reviews': total,
136
+ 'keywords': keywords
137
  }
138
 
139
+ def detect_fake_reviews(self, reviews: List[str], metadata: Dict = None) -> Dict:
140
+ """Detect potentially fake reviews with optional metadata"""
141
  fake_scores = []
142
 
143
+ # Process metadata if provided
144
+ metadata_flags = []
145
+ if metadata and 'timestamps' in metadata and 'usernames' in metadata:
146
+ metadata_flags = self._analyze_metadata(metadata['timestamps'], metadata['usernames'])
147
+
148
+ for i, review in enumerate(reviews):
149
  if not review.strip():
150
  continue
151
 
 
152
  score = 0
153
+ flags = []
154
 
155
+ # Text-based checks
156
  if len(review) < 20:
157
  score += 0.3
158
+ flags.append("too_short")
159
 
 
160
  words = review.lower().split()
161
  unique_ratio = len(set(words)) / len(words) if words else 0
162
  if unique_ratio < 0.5:
163
  score += 0.4
164
+ flags.append("repetitive")
165
 
 
166
  punct_ratio = len(re.findall(r'[!?.]', review)) / len(review) if review else 0
167
  if punct_ratio > 0.1:
168
  score += 0.2
169
+ flags.append("excessive_punctuation")
170
 
 
171
  generic_phrases = ['amazing', 'perfect', 'best ever', 'highly recommend']
172
  if any(phrase in review.lower() for phrase in generic_phrases):
173
  score += 0.1
174
+ flags.append("generic_language")
175
+
176
+ # Add metadata flags if available
177
+ if i < len(metadata_flags):
178
+ if metadata_flags[i]:
179
+ score += 0.3
180
+ flags.extend(metadata_flags[i])
181
 
182
  fake_scores.append({
183
  'text': review[:100] + '...' if len(review) > 100 else review,
184
  'fake_probability': min(round(score, 3), 1.0),
185
+ 'status': 'suspicious' if score > 0.5 else 'authentic',
186
+ 'flags': flags
187
  })
188
 
189
  suspicious_count = sum(1 for item in fake_scores if item['fake_probability'] > 0.5)
 
194
  'suspicious_reviews': suspicious_count,
195
  'authenticity_rate': round((len(fake_scores) - suspicious_count) / len(fake_scores) * 100, 1) if fake_scores else 0
196
  },
197
+ 'details': fake_scores,
198
+ 'metadata_analysis': metadata_flags if metadata_flags else None
199
  }
200
 
201
+ def _analyze_metadata(self, timestamps: List[str], usernames: List[str]) -> List[List[str]]:
202
+ """Analyze metadata for suspicious patterns"""
203
+ flags_per_review = [[] for _ in range(len(timestamps))]
204
+
205
+ # Time density analysis
206
+ if len(timestamps) >= 5:
207
+ times = []
208
+ for i, ts in enumerate(timestamps):
209
+ try:
210
+ dt = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
211
+ times.append((i, dt))
212
+ except:
213
+ continue
214
+
215
+ times.sort(key=lambda x: x[1])
216
+
217
+ # Check for clusters
218
+ for i in range(len(times) - 5):
219
+ if (times[i + 5][1] - times[i][1]).total_seconds() < 300: # 5 mins
220
+ for j in range(i, i + 6):
221
+ flags_per_review[times[j][0]].append("time_cluster")
222
+
223
+ # Username pattern analysis
224
+ for i, username in enumerate(usernames):
225
+ if re.match(r"user_\d{4,}", username):
226
+ flags_per_review[i].append("suspicious_username")
227
+ if len(username) < 4:
228
+ flags_per_review[i].append("short_username")
229
+
230
+ return flags_per_review
231
+
232
+ def assess_quality(self, reviews: List[str], custom_weights: Dict = None) -> Tuple[Dict, go.Figure]:
233
+ """Assess review quality with customizable weights and radar chart"""
234
+ default_weights = {
235
+ 'length': 0.25,
236
+ 'detail': 0.25,
237
+ 'structure': 0.25,
238
+ 'helpfulness': 0.25
239
+ }
240
+
241
+ weights = custom_weights if custom_weights else default_weights
242
  quality_scores = []
243
 
244
  for review in reviews:
245
  if not review.strip():
246
  continue
247
 
 
248
  factors = {}
249
 
250
  # Length factor
251
  length_score = min(len(review) / 200, 1.0)
252
  factors['length'] = round(length_score, 2)
 
253
 
254
+ # Detail factor
255
  detail_words = ['because', 'however', 'although', 'specifically', 'particularly']
256
  detail_score = min(sum(1 for word in detail_words if word in review.lower()) / 3, 1.0)
257
  factors['detail'] = round(detail_score, 2)
 
258
 
259
  # Structure factor
260
  sentences = len(re.split(r'[.!?]', review))
261
  structure_score = min(sentences / 5, 1.0)
262
  factors['structure'] = round(structure_score, 2)
 
263
 
264
  # Helpfulness factor
265
  helpful_words = ['pros', 'cons', 'recommend', 'suggest', 'tip', 'advice']
266
  helpful_score = min(sum(1 for word in helpful_words if word in review.lower()) / 2, 1.0)
267
  factors['helpfulness'] = round(helpful_score, 2)
268
+
269
+ # Calculate weighted score
270
+ total_score = sum(factors[k] * weights[k] for k in factors.keys())
271
 
272
  quality_scores.append({
273
  'text': review[:100] + '...' if len(review) > 100 else review,
274
+ 'quality_score': round(total_score, 3),
275
  'factors': factors,
276
+ 'grade': 'A' if total_score > 0.8 else 'B' if total_score > 0.6 else 'C' if total_score > 0.4 else 'D'
277
  })
278
 
279
  avg_quality = sum(item['quality_score'] for item in quality_scores) / len(quality_scores) if quality_scores else 0
280
 
281
+ # Create radar chart for average factors
282
+ avg_factors = {}
283
+ for factor in ['length', 'detail', 'structure', 'helpfulness']:
284
+ avg_factors[factor] = float(sum(item['factors'][factor] for item in quality_scores) / len(quality_scores) if quality_scores else 0)
285
+
286
+ fig = go.Figure()
287
+ fig.add_trace(go.Scatterpolar(
288
+ r=list(avg_factors.values()),
289
+ theta=list(avg_factors.keys()),
290
+ fill='toself',
291
+ name='Quality Factors'
292
+ ))
293
+
294
+ fig.update_layout(
295
+ polar=dict(
296
+ radialaxis=dict(
297
+ visible=True,
298
+ range=[0, 1]
299
+ )),
300
+ showlegend=True,
301
+ title="Average Quality Factors"
302
+ )
303
+
304
  return {
305
  'summary': {
306
  'average_quality': round(avg_quality, 3),
307
  'total_reviews': len(quality_scores),
308
+ 'high_quality_count': sum(1 for item in quality_scores if item['quality_score'] > 0.7),
309
+ 'weights_used': weights
310
  },
311
+ 'details': quality_scores,
312
+ 'factor_averages': avg_factors
313
+ }, fig
314
 
315
  def compare_competitors(self, product_a_reviews: List[str], product_b_reviews: List[str]) -> Tuple[Dict, go.Figure]:
316
  """Compare sentiment between two products"""
317
  analysis_a = self.analyze_sentiment(product_a_reviews)
318
  analysis_b = self.analyze_sentiment(product_b_reviews)
319
 
 
320
  fig = make_subplots(
321
  rows=1, cols=2,
322
  specs=[[{'type': 'pie'}, {'type': 'pie'}]],
323
  subplot_titles=['Product A', 'Product B']
324
  )
325
 
 
326
  fig.add_trace(go.Pie(
327
  labels=list(analysis_a['summary'].keys()),
328
  values=list(analysis_a['summary'].values()),
329
  name="Product A"
330
  ), row=1, col=1)
331
 
 
332
  fig.add_trace(go.Pie(
333
  labels=list(analysis_b['summary'].keys()),
334
  values=list(analysis_b['summary'].values()),
 
346
  return comparison, fig
347
 
348
  def generate_report(self, analysis_data: Dict, report_type: str = "basic") -> str:
349
+ """Generate analysis report with export capability"""
350
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
351
 
352
  if report_type == "sentiment":
353
+ keywords = analysis_data.get('keywords', {})
354
+ top_pos = keywords.get('top_positive_aspects', [])[:5]
355
+ top_neg = keywords.get('top_negative_aspects', [])[:5]
356
+
357
+ return f"""# Sentiment Analysis Report
358
  Generated: {timestamp}
359
 
360
  ## Summary
 
363
  - Negative: {analysis_data.get('summary', {}).get('negative', 0)}%
364
  - Neutral: {analysis_data.get('summary', {}).get('neutral', 0)}%
365
 
366
+ ## Top Positive Aspects
367
+ {chr(10).join([f"- {aspect[0]} (mentioned {aspect[1]} times)" for aspect in top_pos])}
368
+
369
+ ## Top Negative Aspects
370
+ {chr(10).join([f"- {aspect[0]} (mentioned {aspect[1]} times)" for aspect in top_neg])}
371
+
372
  ## Key Insights
373
+ - Overall sentiment: {'Positive' if analysis_data.get('summary', {}).get('positive', 0) > 50 else 'Mixed'}
374
+ - Main complaints: {', '.join([aspect[0] for aspect in top_neg[:3]])}
375
+ - Key strengths: {', '.join([aspect[0] for aspect in top_pos[:3]])}
376
 
377
  ## Recommendations
378
+ - Address negative aspects: {', '.join([aspect[0] for aspect in top_neg[:2]])}
379
+ - Leverage positive aspects in marketing
380
  - Monitor sentiment trends over time
381
+ """
382
 
383
  elif report_type == "fake":
384
+ return f"""# Fake Review Detection Report
 
385
  Generated: {timestamp}
386
 
387
  ## Summary
388
+ - Total Reviews: {analysis_data.get('summary', {}).get('total_reviews', 0)}
389
  - Suspicious Reviews: {analysis_data.get('summary', {}).get('suspicious_reviews', 0)}
390
  - Authenticity Rate: {analysis_data.get('summary', {}).get('authenticity_rate', 0)}%
391
 
392
  ## Risk Assessment
393
+ - Overall Risk: {'High' if analysis_data.get('summary', {}).get('authenticity_rate', 0) < 70 else 'Low'}
394
+ - Action Required: {'Yes' if analysis_data.get('summary', {}).get('suspicious_reviews', 0) > 0 else 'No'}
395
+
396
+ ## Common Fraud Indicators
397
+ - Short reviews with generic language
398
+ - Repetitive content patterns
399
+ - Suspicious timing clusters
400
+ - Unusual username patterns
401
+ """
402
 
403
  return "Report generated successfully"
404
 
 
410
  if not text.strip():
411
  return []
412
 
 
413
  reviews = []
414
  for line in text.split('\n'):
415
  line = line.strip()
416
+ if line and len(line) > 10:
417
  reviews.append(line)
418
 
419
  return reviews
420
 
421
+ def process_csv_upload(file) -> Tuple[List[str], Dict]:
422
+ """Process uploaded CSV file"""
423
+ if file is None:
424
+ return [], {}
425
+
426
+ try:
427
+ df = pd.read_csv(file.name)
428
+
429
+ # Look for common column names
430
+ review_col = None
431
+ time_col = None
432
+ user_col = None
433
+
434
+ for col in df.columns:
435
+ col_lower = col.lower()
436
+ if 'review' in col_lower or 'comment' in col_lower or 'text' in col_lower:
437
+ review_col = col
438
+ elif 'time' in col_lower or 'date' in col_lower:
439
+ time_col = col
440
+ elif 'user' in col_lower or 'name' in col_lower:
441
+ user_col = col
442
+
443
+ if review_col is None:
444
+ return [], {"error": "No review column found. Expected columns: 'review', 'comment', or 'text'"}
445
+
446
+ reviews = df[review_col].dropna().astype(str).tolist()
447
+
448
+ metadata = {}
449
+ if time_col:
450
+ metadata['timestamps'] = df[time_col].dropna().astype(str).tolist()
451
+ if user_col:
452
+ metadata['usernames'] = df[user_col].dropna().astype(str).tolist()
453
+
454
+ return reviews, metadata
455
+
456
+ except Exception as e:
457
+ return [], {"error": f"Failed to process CSV: {str(e)}"}
458
+
459
+ def sentiment_analysis_interface(reviews_text: str, csv_file):
460
  """Interface for sentiment analysis"""
461
+ reviews = []
462
+
463
+ if csv_file is not None:
464
+ reviews, metadata = process_csv_upload(csv_file)
465
+ if 'error' in metadata:
466
+ return metadata['error'], None
467
+ else:
468
+ reviews = process_reviews_input(reviews_text)
469
 
 
470
  if not reviews:
471
+ return "Please enter reviews or upload a CSV file.", None
472
 
473
  try:
474
  result = analyzer.analyze_sentiment(reviews)
475
 
 
476
  fig = go.Figure(data=[
477
  go.Bar(x=list(result['summary'].keys()),
478
  y=list(result['summary'].values()),
 
484
  except Exception as e:
485
  return f"Error: {str(e)}", None
486
 
487
+ def fake_detection_interface(reviews_text: str, csv_file):
488
  """Interface for fake review detection"""
489
+ reviews = []
490
+ metadata = {}
491
+
492
+ if csv_file is not None:
493
+ reviews, metadata = process_csv_upload(csv_file)
494
+ if 'error' in metadata:
495
+ return metadata['error']
496
+ else:
497
+ reviews = process_reviews_input(reviews_text)
498
 
 
499
  if not reviews:
500
+ return "Please enter reviews or upload a CSV file."
501
 
502
  try:
503
+ result = analyzer.detect_fake_reviews(reviews, metadata if metadata else None)
504
  return json.dumps(result, indent=2)
505
  except Exception as e:
506
  return f"Error: {str(e)}"
507
 
508
+ def quality_assessment_interface(reviews_text: str, csv_file, length_weight: float, detail_weight: float, structure_weight: float, help_weight: float):
509
+ """Interface for quality assessment with custom weights"""
510
+ reviews = []
511
+
512
+ if csv_file is not None:
513
+ reviews, metadata = process_csv_upload(csv_file)
514
+ if 'error' in metadata:
515
+ return metadata['error'], None
516
+ else:
517
+ reviews = process_reviews_input(reviews_text)
518
 
 
519
  if not reviews:
520
+ return "Please enter reviews or upload a CSV file.", None
521
 
522
  try:
523
+ custom_weights = {
524
+ 'length': length_weight,
525
+ 'detail': detail_weight,
526
+ 'structure': structure_weight,
527
+ 'helpfulness': help_weight
528
+ }
529
+
530
+ result, radar_fig = analyzer.assess_quality(reviews, custom_weights)
531
+ return json.dumps(result, indent=2), radar_fig
532
  except Exception as e:
533
+ return f"Error: {str(e)}", None
534
 
535
  def competitor_comparison_interface(product_a_text: str, product_b_text: str):
536
  """Interface for competitor comparison"""
 
564
  # Create Gradio interface
565
  with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
566
  gr.Markdown("# πŸ›’ SmartReview Pro")
567
+ gr.Markdown("Advanced review analysis platform with AI-powered insights")
568
 
569
  with gr.Tab("πŸ“Š Sentiment Analysis"):
570
+ gr.Markdown("### Analyze customer sentiment and extract key aspects")
571
  with gr.Row():
572
  with gr.Column():
573
  sentiment_input = gr.Textbox(
574
+ lines=8,
575
+ placeholder="Enter reviews (one per line) or upload CSV...",
576
  label="Reviews"
577
  )
578
+ sentiment_csv = gr.File(
579
+ label="Upload CSV (columns: review/comment/text, optional: timestamp, username)",
580
+ file_types=[".csv"]
581
+ )
582
  sentiment_btn = gr.Button("Analyze Sentiment", variant="primary")
583
  with gr.Column():
584
  sentiment_output = gr.Textbox(label="Analysis Results", lines=15)
 
586
 
587
  sentiment_btn.click(
588
  sentiment_analysis_interface,
589
+ inputs=[sentiment_input, sentiment_csv],
590
  outputs=[sentiment_output, sentiment_chart]
591
  )
592
 
593
  with gr.Tab("πŸ” Fake Review Detection"):
594
+ gr.Markdown("### Detect suspicious reviews using text analysis and metadata")
595
  with gr.Row():
596
  with gr.Column():
597
  fake_input = gr.Textbox(
598
+ lines=8,
599
+ placeholder="Enter reviews to analyze...",
600
  label="Reviews"
601
  )
602
+ fake_csv = gr.File(
603
+ label="Upload CSV (supports timestamp & username analysis)",
604
+ file_types=[".csv"]
605
+ )
606
  fake_btn = gr.Button("Detect Fake Reviews", variant="primary")
607
  with gr.Column():
608
  fake_output = gr.Textbox(label="Detection Results", lines=15)
609
 
610
  fake_btn.click(
611
  fake_detection_interface,
612
+ inputs=[fake_input, fake_csv],
613
  outputs=[fake_output]
614
  )
615
 
616
  with gr.Tab("⭐ Quality Assessment"):
617
+ gr.Markdown("### Assess review quality with customizable weights")
618
  with gr.Row():
619
  with gr.Column():
620
  quality_input = gr.Textbox(
621
+ lines=8,
622
+ placeholder="Enter reviews to assess...",
623
  label="Reviews"
624
  )
625
+ quality_csv = gr.File(
626
+ label="Upload CSV",
627
+ file_types=[".csv"]
628
+ )
629
+
630
+ gr.Markdown("**Customize Quality Weights:**")
631
+ with gr.Row():
632
+ length_weight = gr.Slider(0, 1, 0.25, label="Length Weight")
633
+ detail_weight = gr.Slider(0, 1, 0.25, label="Detail Weight")
634
+ with gr.Row():
635
+ structure_weight = gr.Slider(0, 1, 0.25, label="Structure Weight")
636
+ help_weight = gr.Slider(0, 1, 0.25, label="Helpfulness Weight")
637
+
638
  quality_btn = gr.Button("Assess Quality", variant="primary")
639
  with gr.Column():
640
+ quality_output = gr.Textbox(label="Quality Assessment", lines=12)
641
+ quality_radar = gr.Plot(label="Quality Factors Radar Chart")
642
 
643
  quality_btn.click(
644
  quality_assessment_interface,
645
+ inputs=[quality_input, quality_csv, length_weight, detail_weight, structure_weight, help_weight],
646
+ outputs=[quality_output, quality_radar]
647
  )
648
 
649
  with gr.Tab("πŸ†š Competitor Comparison"):
 
694
  inputs=[report_data, report_type],
695
  outputs=[report_output]
696
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
 
698
  if __name__ == "__main__":
699
  demo.launch()