Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,9 +3,10 @@ import pandas as pd
|
|
3 |
import numpy as np
|
4 |
import json
|
5 |
import re
|
|
|
6 |
from datetime import datetime
|
7 |
from typing import List, Dict, Tuple
|
8 |
-
from transformers import pipeline, AutoTokenizer
|
9 |
import plotly.graph_objects as go
|
10 |
from plotly.subplots import make_subplots
|
11 |
import sqlite3
|
@@ -14,7 +15,7 @@ import time
|
|
14 |
|
15 |
# Initialize models
|
16 |
sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
|
17 |
-
|
18 |
|
19 |
class ReviewAnalyzer:
|
20 |
def __init__(self):
|
@@ -36,13 +37,65 @@ class ReviewAnalyzer:
|
|
36 |
|
37 |
def preprocess_text(self, text: str) -> str:
|
38 |
"""Clean and preprocess review text"""
|
39 |
-
text = re.sub(r'http\S+', '', text)
|
40 |
-
text = re.sub(r'[^\w\s]', '', text)
|
41 |
text = text.strip().lower()
|
42 |
return text
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
def analyze_sentiment(self, reviews: List[str]) -> Dict:
|
45 |
-
"""Analyze sentiment of reviews"""
|
46 |
results = []
|
47 |
sentiments = {'positive': 0, 'negative': 0, 'neutral': 0}
|
48 |
|
@@ -54,9 +107,8 @@ class ReviewAnalyzer:
|
|
54 |
result = sentiment_analyzer(clean_review)[0]
|
55 |
|
56 |
label = result['label'].lower()
|
57 |
-
score = result['score']
|
58 |
|
59 |
-
# Map labels to standard format
|
60 |
if 'pos' in label:
|
61 |
sentiment = 'positive'
|
62 |
elif 'neg' in label:
|
@@ -74,47 +126,64 @@ class ReviewAnalyzer:
|
|
74 |
total = len(results)
|
75 |
sentiment_percentages = {k: round(v/total*100, 1) for k, v in sentiments.items()}
|
76 |
|
|
|
|
|
|
|
77 |
return {
|
78 |
'summary': sentiment_percentages,
|
79 |
'details': results,
|
80 |
-
'total_reviews': total
|
|
|
81 |
}
|
82 |
|
83 |
-
def detect_fake_reviews(self, reviews: List[str]) -> Dict:
|
84 |
-
"""Detect potentially fake reviews"""
|
85 |
fake_scores = []
|
86 |
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
88 |
if not review.strip():
|
89 |
continue
|
90 |
|
91 |
-
# Simple fake detection heuristics
|
92 |
score = 0
|
|
|
93 |
|
94 |
-
#
|
95 |
if len(review) < 20:
|
96 |
score += 0.3
|
|
|
97 |
|
98 |
-
# Repetitive words
|
99 |
words = review.lower().split()
|
100 |
unique_ratio = len(set(words)) / len(words) if words else 0
|
101 |
if unique_ratio < 0.5:
|
102 |
score += 0.4
|
|
|
103 |
|
104 |
-
# Excessive punctuation
|
105 |
punct_ratio = len(re.findall(r'[!?.]', review)) / len(review) if review else 0
|
106 |
if punct_ratio > 0.1:
|
107 |
score += 0.2
|
|
|
108 |
|
109 |
-
# Generic phrases
|
110 |
generic_phrases = ['amazing', 'perfect', 'best ever', 'highly recommend']
|
111 |
if any(phrase in review.lower() for phrase in generic_phrases):
|
112 |
score += 0.1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
fake_scores.append({
|
115 |
'text': review[:100] + '...' if len(review) > 100 else review,
|
116 |
'fake_probability': min(round(score, 3), 1.0),
|
117 |
-
'status': 'suspicious' if score > 0.5 else 'authentic'
|
|
|
118 |
})
|
119 |
|
120 |
suspicious_count = sum(1 for item in fake_scores if item['fake_probability'] > 0.5)
|
@@ -125,81 +194,141 @@ class ReviewAnalyzer:
|
|
125 |
'suspicious_reviews': suspicious_count,
|
126 |
'authenticity_rate': round((len(fake_scores) - suspicious_count) / len(fake_scores) * 100, 1) if fake_scores else 0
|
127 |
},
|
128 |
-
'details': fake_scores
|
|
|
129 |
}
|
130 |
|
131 |
-
def
|
132 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
quality_scores = []
|
134 |
|
135 |
for review in reviews:
|
136 |
if not review.strip():
|
137 |
continue
|
138 |
|
139 |
-
score = 0
|
140 |
factors = {}
|
141 |
|
142 |
# Length factor
|
143 |
length_score = min(len(review) / 200, 1.0)
|
144 |
factors['length'] = round(length_score, 2)
|
145 |
-
score += length_score * 0.3
|
146 |
|
147 |
-
# Detail factor
|
148 |
detail_words = ['because', 'however', 'although', 'specifically', 'particularly']
|
149 |
detail_score = min(sum(1 for word in detail_words if word in review.lower()) / 3, 1.0)
|
150 |
factors['detail'] = round(detail_score, 2)
|
151 |
-
score += detail_score * 0.3
|
152 |
|
153 |
# Structure factor
|
154 |
sentences = len(re.split(r'[.!?]', review))
|
155 |
structure_score = min(sentences / 5, 1.0)
|
156 |
factors['structure'] = round(structure_score, 2)
|
157 |
-
score += structure_score * 0.2
|
158 |
|
159 |
# Helpfulness factor
|
160 |
helpful_words = ['pros', 'cons', 'recommend', 'suggest', 'tip', 'advice']
|
161 |
helpful_score = min(sum(1 for word in helpful_words if word in review.lower()) / 2, 1.0)
|
162 |
factors['helpfulness'] = round(helpful_score, 2)
|
163 |
-
|
|
|
|
|
164 |
|
165 |
quality_scores.append({
|
166 |
'text': review[:100] + '...' if len(review) > 100 else review,
|
167 |
-
'quality_score': round(
|
168 |
'factors': factors,
|
169 |
-
'grade': 'A' if
|
170 |
})
|
171 |
|
172 |
avg_quality = sum(item['quality_score'] for item in quality_scores) / len(quality_scores) if quality_scores else 0
|
173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
return {
|
175 |
'summary': {
|
176 |
'average_quality': round(avg_quality, 3),
|
177 |
'total_reviews': len(quality_scores),
|
178 |
-
'high_quality_count': sum(1 for item in quality_scores if item['quality_score'] > 0.7)
|
|
|
179 |
},
|
180 |
-
'details': quality_scores
|
181 |
-
|
|
|
182 |
|
183 |
def compare_competitors(self, product_a_reviews: List[str], product_b_reviews: List[str]) -> Tuple[Dict, go.Figure]:
|
184 |
"""Compare sentiment between two products"""
|
185 |
analysis_a = self.analyze_sentiment(product_a_reviews)
|
186 |
analysis_b = self.analyze_sentiment(product_b_reviews)
|
187 |
|
188 |
-
# Create comparison chart
|
189 |
fig = make_subplots(
|
190 |
rows=1, cols=2,
|
191 |
specs=[[{'type': 'pie'}, {'type': 'pie'}]],
|
192 |
subplot_titles=['Product A', 'Product B']
|
193 |
)
|
194 |
|
195 |
-
# Product A pie chart
|
196 |
fig.add_trace(go.Pie(
|
197 |
labels=list(analysis_a['summary'].keys()),
|
198 |
values=list(analysis_a['summary'].values()),
|
199 |
name="Product A"
|
200 |
), row=1, col=1)
|
201 |
|
202 |
-
# Product B pie chart
|
203 |
fig.add_trace(go.Pie(
|
204 |
labels=list(analysis_b['summary'].keys()),
|
205 |
values=list(analysis_b['summary'].values()),
|
@@ -217,12 +346,15 @@ class ReviewAnalyzer:
|
|
217 |
return comparison, fig
|
218 |
|
219 |
def generate_report(self, analysis_data: Dict, report_type: str = "basic") -> str:
|
220 |
-
"""Generate analysis report"""
|
221 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
222 |
|
223 |
if report_type == "sentiment":
|
224 |
-
|
225 |
-
|
|
|
|
|
|
|
226 |
Generated: {timestamp}
|
227 |
|
228 |
## Summary
|
@@ -231,30 +363,42 @@ Generated: {timestamp}
|
|
231 |
- Negative: {analysis_data.get('summary', {}).get('negative', 0)}%
|
232 |
- Neutral: {analysis_data.get('summary', {}).get('neutral', 0)}%
|
233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
## Key Insights
|
235 |
-
- Overall sentiment
|
236 |
-
-
|
|
|
237 |
|
238 |
## Recommendations
|
239 |
-
-
|
240 |
-
- Leverage positive
|
241 |
- Monitor sentiment trends over time
|
242 |
-
|
243 |
|
244 |
elif report_type == "fake":
|
245 |
-
return f"""
|
246 |
-
# Fake Review Detection Report
|
247 |
Generated: {timestamp}
|
248 |
|
249 |
## Summary
|
250 |
-
- Total Reviews
|
251 |
- Suspicious Reviews: {analysis_data.get('summary', {}).get('suspicious_reviews', 0)}
|
252 |
- Authenticity Rate: {analysis_data.get('summary', {}).get('authenticity_rate', 0)}%
|
253 |
|
254 |
## Risk Assessment
|
255 |
-
-
|
256 |
-
-
|
257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
return "Report generated successfully"
|
260 |
|
@@ -266,28 +410,69 @@ def process_reviews_input(text: str) -> List[str]:
|
|
266 |
if not text.strip():
|
267 |
return []
|
268 |
|
269 |
-
# Split by lines or by common separators
|
270 |
reviews = []
|
271 |
for line in text.split('\n'):
|
272 |
line = line.strip()
|
273 |
-
if line and len(line) > 10:
|
274 |
reviews.append(line)
|
275 |
|
276 |
return reviews
|
277 |
|
278 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
"""Interface for sentiment analysis"""
|
280 |
-
|
281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
|
283 |
-
reviews = process_reviews_input(reviews_text)
|
284 |
if not reviews:
|
285 |
-
return "
|
286 |
|
287 |
try:
|
288 |
result = analyzer.analyze_sentiment(reviews)
|
289 |
|
290 |
-
# Create visualization
|
291 |
fig = go.Figure(data=[
|
292 |
go.Bar(x=list(result['summary'].keys()),
|
293 |
y=list(result['summary'].values()),
|
@@ -299,35 +484,53 @@ def sentiment_analysis_interface(reviews_text: str):
|
|
299 |
except Exception as e:
|
300 |
return f"Error: {str(e)}", None
|
301 |
|
302 |
-
def fake_detection_interface(reviews_text: str):
|
303 |
"""Interface for fake review detection"""
|
304 |
-
|
305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
-
reviews = process_reviews_input(reviews_text)
|
308 |
if not reviews:
|
309 |
-
return "
|
310 |
|
311 |
try:
|
312 |
-
result = analyzer.detect_fake_reviews(reviews)
|
313 |
return json.dumps(result, indent=2)
|
314 |
except Exception as e:
|
315 |
return f"Error: {str(e)}"
|
316 |
|
317 |
-
def quality_assessment_interface(reviews_text: str):
|
318 |
-
"""Interface for quality assessment"""
|
319 |
-
|
320 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
|
322 |
-
reviews = process_reviews_input(reviews_text)
|
323 |
if not reviews:
|
324 |
-
return "
|
325 |
|
326 |
try:
|
327 |
-
|
328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
except Exception as e:
|
330 |
-
return f"Error: {str(e)}"
|
331 |
|
332 |
def competitor_comparison_interface(product_a_text: str, product_b_text: str):
|
333 |
"""Interface for competitor comparison"""
|
@@ -361,17 +564,21 @@ def generate_report_interface(analysis_result: str, report_type: str):
|
|
361 |
# Create Gradio interface
|
362 |
with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
|
363 |
gr.Markdown("# π SmartReview Pro")
|
364 |
-
gr.Markdown("
|
365 |
|
366 |
with gr.Tab("π Sentiment Analysis"):
|
367 |
-
gr.Markdown("### Analyze customer sentiment
|
368 |
with gr.Row():
|
369 |
with gr.Column():
|
370 |
sentiment_input = gr.Textbox(
|
371 |
-
lines=
|
372 |
-
placeholder="Enter reviews (one per line)
|
373 |
label="Reviews"
|
374 |
)
|
|
|
|
|
|
|
|
|
375 |
sentiment_btn = gr.Button("Analyze Sentiment", variant="primary")
|
376 |
with gr.Column():
|
377 |
sentiment_output = gr.Textbox(label="Analysis Results", lines=15)
|
@@ -379,46 +586,64 @@ with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
|
|
379 |
|
380 |
sentiment_btn.click(
|
381 |
sentiment_analysis_interface,
|
382 |
-
inputs=[sentiment_input],
|
383 |
outputs=[sentiment_output, sentiment_chart]
|
384 |
)
|
385 |
|
386 |
with gr.Tab("π Fake Review Detection"):
|
387 |
-
gr.Markdown("### Detect
|
388 |
with gr.Row():
|
389 |
with gr.Column():
|
390 |
fake_input = gr.Textbox(
|
391 |
-
lines=
|
392 |
-
placeholder="Enter reviews to
|
393 |
label="Reviews"
|
394 |
)
|
|
|
|
|
|
|
|
|
395 |
fake_btn = gr.Button("Detect Fake Reviews", variant="primary")
|
396 |
with gr.Column():
|
397 |
fake_output = gr.Textbox(label="Detection Results", lines=15)
|
398 |
|
399 |
fake_btn.click(
|
400 |
fake_detection_interface,
|
401 |
-
inputs=[fake_input],
|
402 |
outputs=[fake_output]
|
403 |
)
|
404 |
|
405 |
with gr.Tab("β Quality Assessment"):
|
406 |
-
gr.Markdown("### Assess
|
407 |
with gr.Row():
|
408 |
with gr.Column():
|
409 |
quality_input = gr.Textbox(
|
410 |
-
lines=
|
411 |
-
placeholder="Enter reviews to assess
|
412 |
label="Reviews"
|
413 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
414 |
quality_btn = gr.Button("Assess Quality", variant="primary")
|
415 |
with gr.Column():
|
416 |
-
quality_output = gr.Textbox(label="Quality Assessment", lines=
|
|
|
417 |
|
418 |
quality_btn.click(
|
419 |
quality_assessment_interface,
|
420 |
-
inputs=[quality_input],
|
421 |
-
outputs=[quality_output]
|
422 |
)
|
423 |
|
424 |
with gr.Tab("π Competitor Comparison"):
|
@@ -469,24 +694,6 @@ with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
|
|
469 |
inputs=[report_data, report_type],
|
470 |
outputs=[report_output]
|
471 |
)
|
472 |
-
|
473 |
-
with gr.Tab("βΉοΈ About"):
|
474 |
-
gr.Markdown("""
|
475 |
-
## SmartReview Pro Features
|
476 |
-
|
477 |
-
- **Sentiment Analysis**: Analyze customer emotions and opinions
|
478 |
-
- **Fake Review Detection**: Identify suspicious or inauthentic reviews
|
479 |
-
- **Quality Assessment**: Evaluate review helpfulness and detail
|
480 |
-
- **Competitor Comparison**: Compare sentiment across products
|
481 |
-
- **Professional Reports**: Generate detailed analysis reports
|
482 |
-
|
483 |
-
## Pricing Plans
|
484 |
-
- **Free**: 10 analyses per day
|
485 |
-
- **Pro ($299/month)**: 1000 analyses per day + advanced features
|
486 |
-
- **Enterprise**: Unlimited usage + API access + custom reports
|
487 |
-
|
488 |
-
Contact us for enterprise solutions and custom integrations.
|
489 |
-
""")
|
490 |
|
491 |
if __name__ == "__main__":
|
492 |
demo.launch()
|
|
|
3 |
import numpy as np
|
4 |
import json
|
5 |
import re
|
6 |
+
import io
|
7 |
from datetime import datetime
|
8 |
from typing import List, Dict, Tuple
|
9 |
+
from transformers import pipeline, AutoTokenizer
|
10 |
import plotly.graph_objects as go
|
11 |
from plotly.subplots import make_subplots
|
12 |
import sqlite3
|
|
|
15 |
|
16 |
# Initialize models
|
17 |
sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
|
18 |
+
absa_analyzer = pipeline("ner", model="yangheng/deberta-v3-base-absa-v1.1", aggregation_strategy="simple")
|
19 |
|
20 |
class ReviewAnalyzer:
|
21 |
def __init__(self):
|
|
|
37 |
|
38 |
def preprocess_text(self, text: str) -> str:
|
39 |
"""Clean and preprocess review text"""
|
40 |
+
text = re.sub(r'http\S+', '', text)
|
41 |
+
text = re.sub(r'[^\w\s]', '', text)
|
42 |
text = text.strip().lower()
|
43 |
return text
|
44 |
|
45 |
+
def extract_aspect_keywords(self, reviews: List[str]) -> Dict:
|
46 |
+
"""Extract aspect-based sentiment keywords"""
|
47 |
+
all_aspects = {'positive': {}, 'negative': {}}
|
48 |
+
detailed_aspects = []
|
49 |
+
|
50 |
+
for review in reviews:
|
51 |
+
if not review.strip() or len(review) < 10:
|
52 |
+
continue
|
53 |
+
|
54 |
+
try:
|
55 |
+
aspects = absa_analyzer(review)
|
56 |
+
for aspect in aspects:
|
57 |
+
word = aspect['word'].lower()
|
58 |
+
label = aspect['entity_group'].lower()
|
59 |
+
confidence = float(aspect['score'])
|
60 |
+
|
61 |
+
# Map labels to sentiment
|
62 |
+
if 'pos' in label or label == 'positive':
|
63 |
+
sentiment = 'positive'
|
64 |
+
elif 'neg' in label or label == 'negative':
|
65 |
+
sentiment = 'negative'
|
66 |
+
else:
|
67 |
+
continue
|
68 |
+
|
69 |
+
# Count aspects
|
70 |
+
if word not in all_aspects[sentiment]:
|
71 |
+
all_aspects[sentiment][word] = 0
|
72 |
+
all_aspects[sentiment][word] += 1
|
73 |
+
|
74 |
+
detailed_aspects.append({
|
75 |
+
'review': review[:50] + '...',
|
76 |
+
'aspect': word,
|
77 |
+
'sentiment': sentiment,
|
78 |
+
'confidence': round(confidence, 3)
|
79 |
+
})
|
80 |
+
except:
|
81 |
+
continue
|
82 |
+
|
83 |
+
# Get top aspects
|
84 |
+
top_positive = sorted(all_aspects['positive'].items(), key=lambda x: x[1], reverse=True)[:10]
|
85 |
+
top_negative = sorted(all_aspects['negative'].items(), key=lambda x: x[1], reverse=True)[:10]
|
86 |
+
|
87 |
+
return {
|
88 |
+
'top_positive_aspects': top_positive,
|
89 |
+
'top_negative_aspects': top_negative,
|
90 |
+
'detailed_aspects': detailed_aspects,
|
91 |
+
'summary': {
|
92 |
+
'total_positive_aspects': len(all_aspects['positive']),
|
93 |
+
'total_negative_aspects': len(all_aspects['negative'])
|
94 |
+
}
|
95 |
+
}
|
96 |
+
|
97 |
def analyze_sentiment(self, reviews: List[str]) -> Dict:
|
98 |
+
"""Analyze sentiment of reviews with keyword extraction"""
|
99 |
results = []
|
100 |
sentiments = {'positive': 0, 'negative': 0, 'neutral': 0}
|
101 |
|
|
|
107 |
result = sentiment_analyzer(clean_review)[0]
|
108 |
|
109 |
label = result['label'].lower()
|
110 |
+
score = float(result['score'])
|
111 |
|
|
|
112 |
if 'pos' in label:
|
113 |
sentiment = 'positive'
|
114 |
elif 'neg' in label:
|
|
|
126 |
total = len(results)
|
127 |
sentiment_percentages = {k: round(v/total*100, 1) for k, v in sentiments.items()}
|
128 |
|
129 |
+
# Extract keywords
|
130 |
+
keywords = self.extract_aspect_keywords(reviews)
|
131 |
+
|
132 |
return {
|
133 |
'summary': sentiment_percentages,
|
134 |
'details': results,
|
135 |
+
'total_reviews': total,
|
136 |
+
'keywords': keywords
|
137 |
}
|
138 |
|
139 |
+
def detect_fake_reviews(self, reviews: List[str], metadata: Dict = None) -> Dict:
|
140 |
+
"""Detect potentially fake reviews with optional metadata"""
|
141 |
fake_scores = []
|
142 |
|
143 |
+
# Process metadata if provided
|
144 |
+
metadata_flags = []
|
145 |
+
if metadata and 'timestamps' in metadata and 'usernames' in metadata:
|
146 |
+
metadata_flags = self._analyze_metadata(metadata['timestamps'], metadata['usernames'])
|
147 |
+
|
148 |
+
for i, review in enumerate(reviews):
|
149 |
if not review.strip():
|
150 |
continue
|
151 |
|
|
|
152 |
score = 0
|
153 |
+
flags = []
|
154 |
|
155 |
+
# Text-based checks
|
156 |
if len(review) < 20:
|
157 |
score += 0.3
|
158 |
+
flags.append("too_short")
|
159 |
|
|
|
160 |
words = review.lower().split()
|
161 |
unique_ratio = len(set(words)) / len(words) if words else 0
|
162 |
if unique_ratio < 0.5:
|
163 |
score += 0.4
|
164 |
+
flags.append("repetitive")
|
165 |
|
|
|
166 |
punct_ratio = len(re.findall(r'[!?.]', review)) / len(review) if review else 0
|
167 |
if punct_ratio > 0.1:
|
168 |
score += 0.2
|
169 |
+
flags.append("excessive_punctuation")
|
170 |
|
|
|
171 |
generic_phrases = ['amazing', 'perfect', 'best ever', 'highly recommend']
|
172 |
if any(phrase in review.lower() for phrase in generic_phrases):
|
173 |
score += 0.1
|
174 |
+
flags.append("generic_language")
|
175 |
+
|
176 |
+
# Add metadata flags if available
|
177 |
+
if i < len(metadata_flags):
|
178 |
+
if metadata_flags[i]:
|
179 |
+
score += 0.3
|
180 |
+
flags.extend(metadata_flags[i])
|
181 |
|
182 |
fake_scores.append({
|
183 |
'text': review[:100] + '...' if len(review) > 100 else review,
|
184 |
'fake_probability': min(round(score, 3), 1.0),
|
185 |
+
'status': 'suspicious' if score > 0.5 else 'authentic',
|
186 |
+
'flags': flags
|
187 |
})
|
188 |
|
189 |
suspicious_count = sum(1 for item in fake_scores if item['fake_probability'] > 0.5)
|
|
|
194 |
'suspicious_reviews': suspicious_count,
|
195 |
'authenticity_rate': round((len(fake_scores) - suspicious_count) / len(fake_scores) * 100, 1) if fake_scores else 0
|
196 |
},
|
197 |
+
'details': fake_scores,
|
198 |
+
'metadata_analysis': metadata_flags if metadata_flags else None
|
199 |
}
|
200 |
|
201 |
+
def _analyze_metadata(self, timestamps: List[str], usernames: List[str]) -> List[List[str]]:
|
202 |
+
"""Analyze metadata for suspicious patterns"""
|
203 |
+
flags_per_review = [[] for _ in range(len(timestamps))]
|
204 |
+
|
205 |
+
# Time density analysis
|
206 |
+
if len(timestamps) >= 5:
|
207 |
+
times = []
|
208 |
+
for i, ts in enumerate(timestamps):
|
209 |
+
try:
|
210 |
+
dt = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
|
211 |
+
times.append((i, dt))
|
212 |
+
except:
|
213 |
+
continue
|
214 |
+
|
215 |
+
times.sort(key=lambda x: x[1])
|
216 |
+
|
217 |
+
# Check for clusters
|
218 |
+
for i in range(len(times) - 5):
|
219 |
+
if (times[i + 5][1] - times[i][1]).total_seconds() < 300: # 5 mins
|
220 |
+
for j in range(i, i + 6):
|
221 |
+
flags_per_review[times[j][0]].append("time_cluster")
|
222 |
+
|
223 |
+
# Username pattern analysis
|
224 |
+
for i, username in enumerate(usernames):
|
225 |
+
if re.match(r"user_\d{4,}", username):
|
226 |
+
flags_per_review[i].append("suspicious_username")
|
227 |
+
if len(username) < 4:
|
228 |
+
flags_per_review[i].append("short_username")
|
229 |
+
|
230 |
+
return flags_per_review
|
231 |
+
|
232 |
+
def assess_quality(self, reviews: List[str], custom_weights: Dict = None) -> Tuple[Dict, go.Figure]:
|
233 |
+
"""Assess review quality with customizable weights and radar chart"""
|
234 |
+
default_weights = {
|
235 |
+
'length': 0.25,
|
236 |
+
'detail': 0.25,
|
237 |
+
'structure': 0.25,
|
238 |
+
'helpfulness': 0.25
|
239 |
+
}
|
240 |
+
|
241 |
+
weights = custom_weights if custom_weights else default_weights
|
242 |
quality_scores = []
|
243 |
|
244 |
for review in reviews:
|
245 |
if not review.strip():
|
246 |
continue
|
247 |
|
|
|
248 |
factors = {}
|
249 |
|
250 |
# Length factor
|
251 |
length_score = min(len(review) / 200, 1.0)
|
252 |
factors['length'] = round(length_score, 2)
|
|
|
253 |
|
254 |
+
# Detail factor
|
255 |
detail_words = ['because', 'however', 'although', 'specifically', 'particularly']
|
256 |
detail_score = min(sum(1 for word in detail_words if word in review.lower()) / 3, 1.0)
|
257 |
factors['detail'] = round(detail_score, 2)
|
|
|
258 |
|
259 |
# Structure factor
|
260 |
sentences = len(re.split(r'[.!?]', review))
|
261 |
structure_score = min(sentences / 5, 1.0)
|
262 |
factors['structure'] = round(structure_score, 2)
|
|
|
263 |
|
264 |
# Helpfulness factor
|
265 |
helpful_words = ['pros', 'cons', 'recommend', 'suggest', 'tip', 'advice']
|
266 |
helpful_score = min(sum(1 for word in helpful_words if word in review.lower()) / 2, 1.0)
|
267 |
factors['helpfulness'] = round(helpful_score, 2)
|
268 |
+
|
269 |
+
# Calculate weighted score
|
270 |
+
total_score = sum(factors[k] * weights[k] for k in factors.keys())
|
271 |
|
272 |
quality_scores.append({
|
273 |
'text': review[:100] + '...' if len(review) > 100 else review,
|
274 |
+
'quality_score': round(total_score, 3),
|
275 |
'factors': factors,
|
276 |
+
'grade': 'A' if total_score > 0.8 else 'B' if total_score > 0.6 else 'C' if total_score > 0.4 else 'D'
|
277 |
})
|
278 |
|
279 |
avg_quality = sum(item['quality_score'] for item in quality_scores) / len(quality_scores) if quality_scores else 0
|
280 |
|
281 |
+
# Create radar chart for average factors
|
282 |
+
avg_factors = {}
|
283 |
+
for factor in ['length', 'detail', 'structure', 'helpfulness']:
|
284 |
+
avg_factors[factor] = float(sum(item['factors'][factor] for item in quality_scores) / len(quality_scores) if quality_scores else 0)
|
285 |
+
|
286 |
+
fig = go.Figure()
|
287 |
+
fig.add_trace(go.Scatterpolar(
|
288 |
+
r=list(avg_factors.values()),
|
289 |
+
theta=list(avg_factors.keys()),
|
290 |
+
fill='toself',
|
291 |
+
name='Quality Factors'
|
292 |
+
))
|
293 |
+
|
294 |
+
fig.update_layout(
|
295 |
+
polar=dict(
|
296 |
+
radialaxis=dict(
|
297 |
+
visible=True,
|
298 |
+
range=[0, 1]
|
299 |
+
)),
|
300 |
+
showlegend=True,
|
301 |
+
title="Average Quality Factors"
|
302 |
+
)
|
303 |
+
|
304 |
return {
|
305 |
'summary': {
|
306 |
'average_quality': round(avg_quality, 3),
|
307 |
'total_reviews': len(quality_scores),
|
308 |
+
'high_quality_count': sum(1 for item in quality_scores if item['quality_score'] > 0.7),
|
309 |
+
'weights_used': weights
|
310 |
},
|
311 |
+
'details': quality_scores,
|
312 |
+
'factor_averages': avg_factors
|
313 |
+
}, fig
|
314 |
|
315 |
def compare_competitors(self, product_a_reviews: List[str], product_b_reviews: List[str]) -> Tuple[Dict, go.Figure]:
|
316 |
"""Compare sentiment between two products"""
|
317 |
analysis_a = self.analyze_sentiment(product_a_reviews)
|
318 |
analysis_b = self.analyze_sentiment(product_b_reviews)
|
319 |
|
|
|
320 |
fig = make_subplots(
|
321 |
rows=1, cols=2,
|
322 |
specs=[[{'type': 'pie'}, {'type': 'pie'}]],
|
323 |
subplot_titles=['Product A', 'Product B']
|
324 |
)
|
325 |
|
|
|
326 |
fig.add_trace(go.Pie(
|
327 |
labels=list(analysis_a['summary'].keys()),
|
328 |
values=list(analysis_a['summary'].values()),
|
329 |
name="Product A"
|
330 |
), row=1, col=1)
|
331 |
|
|
|
332 |
fig.add_trace(go.Pie(
|
333 |
labels=list(analysis_b['summary'].keys()),
|
334 |
values=list(analysis_b['summary'].values()),
|
|
|
346 |
return comparison, fig
|
347 |
|
348 |
def generate_report(self, analysis_data: Dict, report_type: str = "basic") -> str:
|
349 |
+
"""Generate analysis report with export capability"""
|
350 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
351 |
|
352 |
if report_type == "sentiment":
|
353 |
+
keywords = analysis_data.get('keywords', {})
|
354 |
+
top_pos = keywords.get('top_positive_aspects', [])[:5]
|
355 |
+
top_neg = keywords.get('top_negative_aspects', [])[:5]
|
356 |
+
|
357 |
+
return f"""# Sentiment Analysis Report
|
358 |
Generated: {timestamp}
|
359 |
|
360 |
## Summary
|
|
|
363 |
- Negative: {analysis_data.get('summary', {}).get('negative', 0)}%
|
364 |
- Neutral: {analysis_data.get('summary', {}).get('neutral', 0)}%
|
365 |
|
366 |
+
## Top Positive Aspects
|
367 |
+
{chr(10).join([f"- {aspect[0]} (mentioned {aspect[1]} times)" for aspect in top_pos])}
|
368 |
+
|
369 |
+
## Top Negative Aspects
|
370 |
+
{chr(10).join([f"- {aspect[0]} (mentioned {aspect[1]} times)" for aspect in top_neg])}
|
371 |
+
|
372 |
## Key Insights
|
373 |
+
- Overall sentiment: {'Positive' if analysis_data.get('summary', {}).get('positive', 0) > 50 else 'Mixed'}
|
374 |
+
- Main complaints: {', '.join([aspect[0] for aspect in top_neg[:3]])}
|
375 |
+
- Key strengths: {', '.join([aspect[0] for aspect in top_pos[:3]])}
|
376 |
|
377 |
## Recommendations
|
378 |
+
- Address negative aspects: {', '.join([aspect[0] for aspect in top_neg[:2]])}
|
379 |
+
- Leverage positive aspects in marketing
|
380 |
- Monitor sentiment trends over time
|
381 |
+
"""
|
382 |
|
383 |
elif report_type == "fake":
|
384 |
+
return f"""# Fake Review Detection Report
|
|
|
385 |
Generated: {timestamp}
|
386 |
|
387 |
## Summary
|
388 |
+
- Total Reviews: {analysis_data.get('summary', {}).get('total_reviews', 0)}
|
389 |
- Suspicious Reviews: {analysis_data.get('summary', {}).get('suspicious_reviews', 0)}
|
390 |
- Authenticity Rate: {analysis_data.get('summary', {}).get('authenticity_rate', 0)}%
|
391 |
|
392 |
## Risk Assessment
|
393 |
+
- Overall Risk: {'High' if analysis_data.get('summary', {}).get('authenticity_rate', 0) < 70 else 'Low'}
|
394 |
+
- Action Required: {'Yes' if analysis_data.get('summary', {}).get('suspicious_reviews', 0) > 0 else 'No'}
|
395 |
+
|
396 |
+
## Common Fraud Indicators
|
397 |
+
- Short reviews with generic language
|
398 |
+
- Repetitive content patterns
|
399 |
+
- Suspicious timing clusters
|
400 |
+
- Unusual username patterns
|
401 |
+
"""
|
402 |
|
403 |
return "Report generated successfully"
|
404 |
|
|
|
410 |
if not text.strip():
|
411 |
return []
|
412 |
|
|
|
413 |
reviews = []
|
414 |
for line in text.split('\n'):
|
415 |
line = line.strip()
|
416 |
+
if line and len(line) > 10:
|
417 |
reviews.append(line)
|
418 |
|
419 |
return reviews
|
420 |
|
421 |
+
def process_csv_upload(file) -> Tuple[List[str], Dict]:
|
422 |
+
"""Process uploaded CSV file"""
|
423 |
+
if file is None:
|
424 |
+
return [], {}
|
425 |
+
|
426 |
+
try:
|
427 |
+
df = pd.read_csv(file.name)
|
428 |
+
|
429 |
+
# Look for common column names
|
430 |
+
review_col = None
|
431 |
+
time_col = None
|
432 |
+
user_col = None
|
433 |
+
|
434 |
+
for col in df.columns:
|
435 |
+
col_lower = col.lower()
|
436 |
+
if 'review' in col_lower or 'comment' in col_lower or 'text' in col_lower:
|
437 |
+
review_col = col
|
438 |
+
elif 'time' in col_lower or 'date' in col_lower:
|
439 |
+
time_col = col
|
440 |
+
elif 'user' in col_lower or 'name' in col_lower:
|
441 |
+
user_col = col
|
442 |
+
|
443 |
+
if review_col is None:
|
444 |
+
return [], {"error": "No review column found. Expected columns: 'review', 'comment', or 'text'"}
|
445 |
+
|
446 |
+
reviews = df[review_col].dropna().astype(str).tolist()
|
447 |
+
|
448 |
+
metadata = {}
|
449 |
+
if time_col:
|
450 |
+
metadata['timestamps'] = df[time_col].dropna().astype(str).tolist()
|
451 |
+
if user_col:
|
452 |
+
metadata['usernames'] = df[user_col].dropna().astype(str).tolist()
|
453 |
+
|
454 |
+
return reviews, metadata
|
455 |
+
|
456 |
+
except Exception as e:
|
457 |
+
return [], {"error": f"Failed to process CSV: {str(e)}"}
|
458 |
+
|
459 |
+
def sentiment_analysis_interface(reviews_text: str, csv_file):
|
460 |
"""Interface for sentiment analysis"""
|
461 |
+
reviews = []
|
462 |
+
|
463 |
+
if csv_file is not None:
|
464 |
+
reviews, metadata = process_csv_upload(csv_file)
|
465 |
+
if 'error' in metadata:
|
466 |
+
return metadata['error'], None
|
467 |
+
else:
|
468 |
+
reviews = process_reviews_input(reviews_text)
|
469 |
|
|
|
470 |
if not reviews:
|
471 |
+
return "Please enter reviews or upload a CSV file.", None
|
472 |
|
473 |
try:
|
474 |
result = analyzer.analyze_sentiment(reviews)
|
475 |
|
|
|
476 |
fig = go.Figure(data=[
|
477 |
go.Bar(x=list(result['summary'].keys()),
|
478 |
y=list(result['summary'].values()),
|
|
|
484 |
except Exception as e:
|
485 |
return f"Error: {str(e)}", None
|
486 |
|
487 |
+
def fake_detection_interface(reviews_text: str, csv_file):
|
488 |
"""Interface for fake review detection"""
|
489 |
+
reviews = []
|
490 |
+
metadata = {}
|
491 |
+
|
492 |
+
if csv_file is not None:
|
493 |
+
reviews, metadata = process_csv_upload(csv_file)
|
494 |
+
if 'error' in metadata:
|
495 |
+
return metadata['error']
|
496 |
+
else:
|
497 |
+
reviews = process_reviews_input(reviews_text)
|
498 |
|
|
|
499 |
if not reviews:
|
500 |
+
return "Please enter reviews or upload a CSV file."
|
501 |
|
502 |
try:
|
503 |
+
result = analyzer.detect_fake_reviews(reviews, metadata if metadata else None)
|
504 |
return json.dumps(result, indent=2)
|
505 |
except Exception as e:
|
506 |
return f"Error: {str(e)}"
|
507 |
|
508 |
+
def quality_assessment_interface(reviews_text: str, csv_file, length_weight: float, detail_weight: float, structure_weight: float, help_weight: float):
|
509 |
+
"""Interface for quality assessment with custom weights"""
|
510 |
+
reviews = []
|
511 |
+
|
512 |
+
if csv_file is not None:
|
513 |
+
reviews, metadata = process_csv_upload(csv_file)
|
514 |
+
if 'error' in metadata:
|
515 |
+
return metadata['error'], None
|
516 |
+
else:
|
517 |
+
reviews = process_reviews_input(reviews_text)
|
518 |
|
|
|
519 |
if not reviews:
|
520 |
+
return "Please enter reviews or upload a CSV file.", None
|
521 |
|
522 |
try:
|
523 |
+
custom_weights = {
|
524 |
+
'length': length_weight,
|
525 |
+
'detail': detail_weight,
|
526 |
+
'structure': structure_weight,
|
527 |
+
'helpfulness': help_weight
|
528 |
+
}
|
529 |
+
|
530 |
+
result, radar_fig = analyzer.assess_quality(reviews, custom_weights)
|
531 |
+
return json.dumps(result, indent=2), radar_fig
|
532 |
except Exception as e:
|
533 |
+
return f"Error: {str(e)}", None
|
534 |
|
535 |
def competitor_comparison_interface(product_a_text: str, product_b_text: str):
|
536 |
"""Interface for competitor comparison"""
|
|
|
564 |
# Create Gradio interface
|
565 |
with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
|
566 |
gr.Markdown("# π SmartReview Pro")
|
567 |
+
gr.Markdown("Advanced review analysis platform with AI-powered insights")
|
568 |
|
569 |
with gr.Tab("π Sentiment Analysis"):
|
570 |
+
gr.Markdown("### Analyze customer sentiment and extract key aspects")
|
571 |
with gr.Row():
|
572 |
with gr.Column():
|
573 |
sentiment_input = gr.Textbox(
|
574 |
+
lines=8,
|
575 |
+
placeholder="Enter reviews (one per line) or upload CSV...",
|
576 |
label="Reviews"
|
577 |
)
|
578 |
+
sentiment_csv = gr.File(
|
579 |
+
label="Upload CSV (columns: review/comment/text, optional: timestamp, username)",
|
580 |
+
file_types=[".csv"]
|
581 |
+
)
|
582 |
sentiment_btn = gr.Button("Analyze Sentiment", variant="primary")
|
583 |
with gr.Column():
|
584 |
sentiment_output = gr.Textbox(label="Analysis Results", lines=15)
|
|
|
586 |
|
587 |
sentiment_btn.click(
|
588 |
sentiment_analysis_interface,
|
589 |
+
inputs=[sentiment_input, sentiment_csv],
|
590 |
outputs=[sentiment_output, sentiment_chart]
|
591 |
)
|
592 |
|
593 |
with gr.Tab("π Fake Review Detection"):
|
594 |
+
gr.Markdown("### Detect suspicious reviews using text analysis and metadata")
|
595 |
with gr.Row():
|
596 |
with gr.Column():
|
597 |
fake_input = gr.Textbox(
|
598 |
+
lines=8,
|
599 |
+
placeholder="Enter reviews to analyze...",
|
600 |
label="Reviews"
|
601 |
)
|
602 |
+
fake_csv = gr.File(
|
603 |
+
label="Upload CSV (supports timestamp & username analysis)",
|
604 |
+
file_types=[".csv"]
|
605 |
+
)
|
606 |
fake_btn = gr.Button("Detect Fake Reviews", variant="primary")
|
607 |
with gr.Column():
|
608 |
fake_output = gr.Textbox(label="Detection Results", lines=15)
|
609 |
|
610 |
fake_btn.click(
|
611 |
fake_detection_interface,
|
612 |
+
inputs=[fake_input, fake_csv],
|
613 |
outputs=[fake_output]
|
614 |
)
|
615 |
|
616 |
with gr.Tab("β Quality Assessment"):
|
617 |
+
gr.Markdown("### Assess review quality with customizable weights")
|
618 |
with gr.Row():
|
619 |
with gr.Column():
|
620 |
quality_input = gr.Textbox(
|
621 |
+
lines=8,
|
622 |
+
placeholder="Enter reviews to assess...",
|
623 |
label="Reviews"
|
624 |
)
|
625 |
+
quality_csv = gr.File(
|
626 |
+
label="Upload CSV",
|
627 |
+
file_types=[".csv"]
|
628 |
+
)
|
629 |
+
|
630 |
+
gr.Markdown("**Customize Quality Weights:**")
|
631 |
+
with gr.Row():
|
632 |
+
length_weight = gr.Slider(0, 1, 0.25, label="Length Weight")
|
633 |
+
detail_weight = gr.Slider(0, 1, 0.25, label="Detail Weight")
|
634 |
+
with gr.Row():
|
635 |
+
structure_weight = gr.Slider(0, 1, 0.25, label="Structure Weight")
|
636 |
+
help_weight = gr.Slider(0, 1, 0.25, label="Helpfulness Weight")
|
637 |
+
|
638 |
quality_btn = gr.Button("Assess Quality", variant="primary")
|
639 |
with gr.Column():
|
640 |
+
quality_output = gr.Textbox(label="Quality Assessment", lines=12)
|
641 |
+
quality_radar = gr.Plot(label="Quality Factors Radar Chart")
|
642 |
|
643 |
quality_btn.click(
|
644 |
quality_assessment_interface,
|
645 |
+
inputs=[quality_input, quality_csv, length_weight, detail_weight, structure_weight, help_weight],
|
646 |
+
outputs=[quality_output, quality_radar]
|
647 |
)
|
648 |
|
649 |
with gr.Tab("π Competitor Comparison"):
|
|
|
694 |
inputs=[report_data, report_type],
|
695 |
outputs=[report_output]
|
696 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
697 |
|
698 |
if __name__ == "__main__":
|
699 |
demo.launch()
|