Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import json | |
import re | |
from datetime import datetime | |
from typing import List, Dict, Tuple | |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
import sqlite3 | |
import hashlib | |
import time | |
# Initialize models | |
sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest") | |
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest") | |
class ReviewAnalyzer: | |
def __init__(self): | |
self.db_path = "reviews.db" | |
self._init_db() | |
def _init_db(self): | |
conn = sqlite3.connect(self.db_path) | |
conn.execute(''' | |
CREATE TABLE IF NOT EXISTS usage_log ( | |
id INTEGER PRIMARY KEY, | |
user_id TEXT, | |
timestamp DATETIME, | |
analysis_type TEXT, | |
items_count INTEGER | |
) | |
''') | |
conn.close() | |
def preprocess_text(self, text: str) -> str: | |
"""Clean and preprocess review text""" | |
text = re.sub(r'http\S+', '', text) # Remove URLs | |
text = re.sub(r'[^\w\s]', '', text) # Remove special chars | |
text = text.strip().lower() | |
return text | |
def analyze_sentiment(self, reviews: List[str]) -> Dict: | |
"""Analyze sentiment of reviews""" | |
results = [] | |
sentiments = {'positive': 0, 'negative': 0, 'neutral': 0} | |
for review in reviews: | |
if not review.strip(): | |
continue | |
clean_review = self.preprocess_text(review) | |
result = sentiment_analyzer(clean_review)[0] | |
label = result['label'].lower() | |
score = result['score'] | |
# Map labels to standard format | |
if 'pos' in label: | |
sentiment = 'positive' | |
elif 'neg' in label: | |
sentiment = 'negative' | |
else: | |
sentiment = 'neutral' | |
sentiments[sentiment] += 1 | |
results.append({ | |
'text': review[:100] + '...' if len(review) > 100 else review, | |
'sentiment': sentiment, | |
'confidence': round(score, 3) | |
}) | |
total = len(results) | |
sentiment_percentages = {k: round(v/total*100, 1) for k, v in sentiments.items()} | |
return { | |
'summary': sentiment_percentages, | |
'details': results, | |
'total_reviews': total | |
} | |
def detect_fake_reviews(self, reviews: List[str]) -> Dict: | |
"""Detect potentially fake reviews""" | |
fake_scores = [] | |
for review in reviews: | |
if not review.strip(): | |
continue | |
# Simple fake detection heuristics | |
score = 0 | |
# Length check | |
if len(review) < 20: | |
score += 0.3 | |
# Repetitive words | |
words = review.lower().split() | |
unique_ratio = len(set(words)) / len(words) if words else 0 | |
if unique_ratio < 0.5: | |
score += 0.4 | |
# Excessive punctuation | |
punct_ratio = len(re.findall(r'[!?.]', review)) / len(review) if review else 0 | |
if punct_ratio > 0.1: | |
score += 0.2 | |
# Generic phrases | |
generic_phrases = ['amazing', 'perfect', 'best ever', 'highly recommend'] | |
if any(phrase in review.lower() for phrase in generic_phrases): | |
score += 0.1 | |
fake_scores.append({ | |
'text': review[:100] + '...' if len(review) > 100 else review, | |
'fake_probability': min(round(score, 3), 1.0), | |
'status': 'suspicious' if score > 0.5 else 'authentic' | |
}) | |
suspicious_count = sum(1 for item in fake_scores if item['fake_probability'] > 0.5) | |
return { | |
'summary': { | |
'total_reviews': len(fake_scores), | |
'suspicious_reviews': suspicious_count, | |
'authenticity_rate': round((len(fake_scores) - suspicious_count) / len(fake_scores) * 100, 1) if fake_scores else 0 | |
}, | |
'details': fake_scores | |
} | |
def assess_quality(self, reviews: List[str]) -> Dict: | |
"""Assess review quality""" | |
quality_scores = [] | |
for review in reviews: | |
if not review.strip(): | |
continue | |
score = 0 | |
factors = {} | |
# Length factor | |
length_score = min(len(review) / 200, 1.0) | |
factors['length'] = round(length_score, 2) | |
score += length_score * 0.3 | |
# Detail factor (specific words) | |
detail_words = ['because', 'however', 'although', 'specifically', 'particularly'] | |
detail_score = min(sum(1 for word in detail_words if word in review.lower()) / 3, 1.0) | |
factors['detail'] = round(detail_score, 2) | |
score += detail_score * 0.3 | |
# Structure factor | |
sentences = len(re.split(r'[.!?]', review)) | |
structure_score = min(sentences / 5, 1.0) | |
factors['structure'] = round(structure_score, 2) | |
score += structure_score * 0.2 | |
# Helpfulness factor | |
helpful_words = ['pros', 'cons', 'recommend', 'suggest', 'tip', 'advice'] | |
helpful_score = min(sum(1 for word in helpful_words if word in review.lower()) / 2, 1.0) | |
factors['helpfulness'] = round(helpful_score, 2) | |
score += helpful_score * 0.2 | |
quality_scores.append({ | |
'text': review[:100] + '...' if len(review) > 100 else review, | |
'quality_score': round(score, 3), | |
'factors': factors, | |
'grade': 'A' if score > 0.8 else 'B' if score > 0.6 else 'C' if score > 0.4 else 'D' | |
}) | |
avg_quality = sum(item['quality_score'] for item in quality_scores) / len(quality_scores) if quality_scores else 0 | |
return { | |
'summary': { | |
'average_quality': round(avg_quality, 3), | |
'total_reviews': len(quality_scores), | |
'high_quality_count': sum(1 for item in quality_scores if item['quality_score'] > 0.7) | |
}, | |
'details': quality_scores | |
} | |
def compare_competitors(self, product_a_reviews: List[str], product_b_reviews: List[str]) -> Tuple[Dict, go.Figure]: | |
"""Compare sentiment between two products""" | |
analysis_a = self.analyze_sentiment(product_a_reviews) | |
analysis_b = self.analyze_sentiment(product_b_reviews) | |
# Create comparison chart | |
fig = make_subplots( | |
rows=1, cols=2, | |
specs=[[{'type': 'pie'}, {'type': 'pie'}]], | |
subplot_titles=['Product A', 'Product B'] | |
) | |
# Product A pie chart | |
fig.add_trace(go.Pie( | |
labels=list(analysis_a['summary'].keys()), | |
values=list(analysis_a['summary'].values()), | |
name="Product A" | |
), row=1, col=1) | |
# Product B pie chart | |
fig.add_trace(go.Pie( | |
labels=list(analysis_b['summary'].keys()), | |
values=list(analysis_b['summary'].values()), | |
name="Product B" | |
), row=1, col=2) | |
fig.update_layout(title_text="Sentiment Comparison") | |
comparison = { | |
'product_a': analysis_a, | |
'product_b': analysis_b, | |
'winner': 'Product A' if analysis_a['summary']['positive'] > analysis_b['summary']['positive'] else 'Product B' | |
} | |
return comparison, fig | |
def generate_report(self, analysis_data: Dict, report_type: str = "basic") -> str: | |
"""Generate analysis report""" | |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
if report_type == "sentiment": | |
return f""" | |
# Sentiment Analysis Report | |
Generated: {timestamp} | |
## Summary | |
- Total Reviews: {analysis_data.get('total_reviews', 0)} | |
- Positive: {analysis_data.get('summary', {}).get('positive', 0)}% | |
- Negative: {analysis_data.get('summary', {}).get('negative', 0)}% | |
- Neutral: {analysis_data.get('summary', {}).get('neutral', 0)}% | |
## Key Insights | |
- Overall sentiment trend: {'Positive' if analysis_data.get('summary', {}).get('positive', 0) > 50 else 'Mixed'} | |
- Customer satisfaction level: {'High' if analysis_data.get('summary', {}).get('positive', 0) > 70 else 'Moderate' if analysis_data.get('summary', {}).get('positive', 0) > 40 else 'Low'} | |
## Recommendations | |
- Focus on addressing negative feedback themes | |
- Leverage positive reviews for marketing | |
- Monitor sentiment trends over time | |
""" | |
elif report_type == "fake": | |
return f""" | |
# Fake Review Detection Report | |
Generated: {timestamp} | |
## Summary | |
- Total Reviews Analyzed: {analysis_data.get('summary', {}).get('total_reviews', 0)} | |
- Suspicious Reviews: {analysis_data.get('summary', {}).get('suspicious_reviews', 0)} | |
- Authenticity Rate: {analysis_data.get('summary', {}).get('authenticity_rate', 0)}% | |
## Risk Assessment | |
- Review Quality: {'High Risk' if analysis_data.get('summary', {}).get('authenticity_rate', 0) < 70 else 'Low Risk'} | |
- Recommendation: {'Investigate suspicious reviews' if analysis_data.get('summary', {}).get('suspicious_reviews', 0) > 0 else 'Reviews appear authentic'} | |
""" | |
return "Report generated successfully" | |
# Global analyzer instance | |
analyzer = ReviewAnalyzer() | |
def process_reviews_input(text: str) -> List[str]: | |
"""Process review input text into list""" | |
if not text.strip(): | |
return [] | |
# Split by lines or by common separators | |
reviews = [] | |
for line in text.split('\n'): | |
line = line.strip() | |
if line and len(line) > 10: # Minimum length check | |
reviews.append(line) | |
return reviews | |
def sentiment_analysis_interface(reviews_text: str): | |
"""Interface for sentiment analysis""" | |
if not reviews_text.strip(): | |
return "Please enter some reviews to analyze.", None | |
reviews = process_reviews_input(reviews_text) | |
if not reviews: | |
return "No valid reviews found. Please check your input.", None | |
try: | |
result = analyzer.analyze_sentiment(reviews) | |
# Create visualization | |
fig = go.Figure(data=[ | |
go.Bar(x=list(result['summary'].keys()), | |
y=list(result['summary'].values()), | |
marker_color=['green', 'red', 'gray']) | |
]) | |
fig.update_layout(title="Sentiment Distribution", yaxis_title="Percentage") | |
return json.dumps(result, indent=2), fig | |
except Exception as e: | |
return f"Error: {str(e)}", None | |
def fake_detection_interface(reviews_text: str): | |
"""Interface for fake review detection""" | |
if not reviews_text.strip(): | |
return "Please enter some reviews to analyze." | |
reviews = process_reviews_input(reviews_text) | |
if not reviews: | |
return "No valid reviews found. Please check your input." | |
try: | |
result = analyzer.detect_fake_reviews(reviews) | |
return json.dumps(result, indent=2) | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def quality_assessment_interface(reviews_text: str): | |
"""Interface for quality assessment""" | |
if not reviews_text.strip(): | |
return "Please enter some reviews to analyze." | |
reviews = process_reviews_input(reviews_text) | |
if not reviews: | |
return "No valid reviews found. Please check your input." | |
try: | |
result = analyzer.assess_quality(reviews) | |
return json.dumps(result, indent=2) | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def competitor_comparison_interface(product_a_text: str, product_b_text: str): | |
"""Interface for competitor comparison""" | |
if not product_a_text.strip() or not product_b_text.strip(): | |
return "Please enter reviews for both products.", None | |
reviews_a = process_reviews_input(product_a_text) | |
reviews_b = process_reviews_input(product_b_text) | |
if not reviews_a or not reviews_b: | |
return "Please provide valid reviews for both products.", None | |
try: | |
result, fig = analyzer.compare_competitors(reviews_a, reviews_b) | |
return json.dumps(result, indent=2), fig | |
except Exception as e: | |
return f"Error: {str(e)}", None | |
def generate_report_interface(analysis_result: str, report_type: str): | |
"""Interface for report generation""" | |
if not analysis_result.strip(): | |
return "No analysis data available. Please run an analysis first." | |
try: | |
data = json.loads(analysis_result) | |
report = analyzer.generate_report(data, report_type.lower()) | |
return report | |
except Exception as e: | |
return f"Error generating report: {str(e)}" | |
# Create Gradio interface | |
with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# π SmartReview Pro") | |
gr.Markdown("Professional review analysis platform for e-commerce businesses") | |
with gr.Tab("π Sentiment Analysis"): | |
gr.Markdown("### Analyze customer sentiment from reviews") | |
with gr.Row(): | |
with gr.Column(): | |
sentiment_input = gr.Textbox( | |
lines=10, | |
placeholder="Enter reviews (one per line):\nGreat product, love it!\nTerrible quality, waste of money.\nOkay product, nothing special.", | |
label="Reviews" | |
) | |
sentiment_btn = gr.Button("Analyze Sentiment", variant="primary") | |
with gr.Column(): | |
sentiment_output = gr.Textbox(label="Analysis Results", lines=15) | |
sentiment_chart = gr.Plot(label="Sentiment Distribution") | |
sentiment_btn.click( | |
sentiment_analysis_interface, | |
inputs=[sentiment_input], | |
outputs=[sentiment_output, sentiment_chart] | |
) | |
with gr.Tab("π Fake Review Detection"): | |
gr.Markdown("### Detect potentially fake or suspicious reviews") | |
with gr.Row(): | |
with gr.Column(): | |
fake_input = gr.Textbox( | |
lines=10, | |
placeholder="Enter reviews to check for authenticity...", | |
label="Reviews" | |
) | |
fake_btn = gr.Button("Detect Fake Reviews", variant="primary") | |
with gr.Column(): | |
fake_output = gr.Textbox(label="Detection Results", lines=15) | |
fake_btn.click( | |
fake_detection_interface, | |
inputs=[fake_input], | |
outputs=[fake_output] | |
) | |
with gr.Tab("β Quality Assessment"): | |
gr.Markdown("### Assess the quality and helpfulness of reviews") | |
with gr.Row(): | |
with gr.Column(): | |
quality_input = gr.Textbox( | |
lines=10, | |
placeholder="Enter reviews to assess quality...", | |
label="Reviews" | |
) | |
quality_btn = gr.Button("Assess Quality", variant="primary") | |
with gr.Column(): | |
quality_output = gr.Textbox(label="Quality Assessment", lines=15) | |
quality_btn.click( | |
quality_assessment_interface, | |
inputs=[quality_input], | |
outputs=[quality_output] | |
) | |
with gr.Tab("π Competitor Comparison"): | |
gr.Markdown("### Compare sentiment between competing products") | |
with gr.Row(): | |
with gr.Column(): | |
comp_product_a = gr.Textbox( | |
lines=8, | |
placeholder="Product A reviews...", | |
label="Product A Reviews" | |
) | |
comp_product_b = gr.Textbox( | |
lines=8, | |
placeholder="Product B reviews...", | |
label="Product B Reviews" | |
) | |
comp_btn = gr.Button("Compare Products", variant="primary") | |
with gr.Column(): | |
comp_output = gr.Textbox(label="Comparison Results", lines=15) | |
comp_chart = gr.Plot(label="Comparison Chart") | |
comp_btn.click( | |
competitor_comparison_interface, | |
inputs=[comp_product_a, comp_product_b], | |
outputs=[comp_output, comp_chart] | |
) | |
with gr.Tab("π Report Generation"): | |
gr.Markdown("### Generate professional analysis reports") | |
with gr.Row(): | |
with gr.Column(): | |
report_data = gr.Textbox( | |
lines=10, | |
placeholder="Paste analysis results here...", | |
label="Analysis Data (JSON)" | |
) | |
report_type = gr.Dropdown( | |
choices=["sentiment", "fake", "quality"], | |
value="sentiment", | |
label="Report Type" | |
) | |
report_btn = gr.Button("Generate Report", variant="primary") | |
with gr.Column(): | |
report_output = gr.Textbox(label="Generated Report", lines=15) | |
report_btn.click( | |
generate_report_interface, | |
inputs=[report_data, report_type], | |
outputs=[report_output] | |
) | |
with gr.Tab("βΉοΈ About"): | |
gr.Markdown(""" | |
## SmartReview Pro Features | |
- **Sentiment Analysis**: Analyze customer emotions and opinions | |
- **Fake Review Detection**: Identify suspicious or inauthentic reviews | |
- **Quality Assessment**: Evaluate review helpfulness and detail | |
- **Competitor Comparison**: Compare sentiment across products | |
- **Professional Reports**: Generate detailed analysis reports | |
## Pricing Plans | |
- **Free**: 10 analyses per day | |
- **Pro ($299/month)**: 1000 analyses per day + advanced features | |
- **Enterprise**: Unlimited usage + API access + custom reports | |
Contact us for enterprise solutions and custom integrations. | |
""") | |
if __name__ == "__main__": | |
demo.launch() |