Spaces:

entropy25
/

SmartReview

Sleeping

App Files Files Community

entropy25 commited on 17 days ago

Commit

baf4a02

verified ·

1 Parent(s): fc0d38a

Update app.py

Browse files

Files changed (1) hide show

app.py +311 -104

app.py CHANGED Viewed

@@ -3,9 +3,10 @@ import pandas as pd
 import numpy as np
 import json
 import re
 from datetime import datetime
 from typing import List, Dict, Tuple
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 import sqlite3
@@ -14,7 +15,7 @@ import time
 # Initialize models
 sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
-tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
 class ReviewAnalyzer:
     def __init__(self):
@@ -36,13 +37,65 @@ class ReviewAnalyzer:
     def preprocess_text(self, text: str) -> str:
         """Clean and preprocess review text"""
-        text = re.sub(r'http\S+', '', text)  # Remove URLs
-        text = re.sub(r'[^\w\s]', '', text)  # Remove special chars
         text = text.strip().lower()
         return text
     def analyze_sentiment(self, reviews: List[str]) -> Dict:
-        """Analyze sentiment of reviews"""
         results = []
         sentiments = {'positive': 0, 'negative': 0, 'neutral': 0}
@@ -54,9 +107,8 @@ class ReviewAnalyzer:
             result = sentiment_analyzer(clean_review)[0]
             label = result['label'].lower()
-            score = result['score']
-            # Map labels to standard format
             if 'pos' in label:
                 sentiment = 'positive'
             elif 'neg' in label:
@@ -74,47 +126,64 @@ class ReviewAnalyzer:
         total = len(results)
         sentiment_percentages = {k: round(v/total*100, 1) for k, v in sentiments.items()}
         return {
             'summary': sentiment_percentages,
             'details': results,
-            'total_reviews': total
         }
-    def detect_fake_reviews(self, reviews: List[str]) -> Dict:
-        """Detect potentially fake reviews"""
         fake_scores = []
-        for review in reviews:
             if not review.strip():
                 continue
-            # Simple fake detection heuristics
             score = 0
-            # Length check
             if len(review) < 20:
                 score += 0.3
-            # Repetitive words
             words = review.lower().split()
             unique_ratio = len(set(words)) / len(words) if words else 0
             if unique_ratio < 0.5:
                 score += 0.4
-            # Excessive punctuation
             punct_ratio = len(re.findall(r'[!?.]', review)) / len(review) if review else 0
             if punct_ratio > 0.1:
                 score += 0.2
-            # Generic phrases
             generic_phrases = ['amazing', 'perfect', 'best ever', 'highly recommend']
             if any(phrase in review.lower() for phrase in generic_phrases):
                 score += 0.1
             fake_scores.append({
                 'text': review[:100] + '...' if len(review) > 100 else review,
                 'fake_probability': min(round(score, 3), 1.0),
-                'status': 'suspicious' if score > 0.5 else 'authentic'
             })
         suspicious_count = sum(1 for item in fake_scores if item['fake_probability'] > 0.5)
@@ -125,81 +194,141 @@ class ReviewAnalyzer:
                 'suspicious_reviews': suspicious_count,
                 'authenticity_rate': round((len(fake_scores) - suspicious_count) / len(fake_scores) * 100, 1) if fake_scores else 0
             },
-            'details': fake_scores
         }
-    def assess_quality(self, reviews: List[str]) -> Dict:
-        """Assess review quality"""
         quality_scores = []
         for review in reviews:
             if not review.strip():
                 continue
-            score = 0
             factors = {}
             # Length factor
             length_score = min(len(review) / 200, 1.0)
             factors['length'] = round(length_score, 2)
-            score += length_score * 0.3
-            # Detail factor (specific words)
             detail_words = ['because', 'however', 'although', 'specifically', 'particularly']
             detail_score = min(sum(1 for word in detail_words if word in review.lower()) / 3, 1.0)
             factors['detail'] = round(detail_score, 2)
-            score += detail_score * 0.3
             # Structure factor
             sentences = len(re.split(r'[.!?]', review))
             structure_score = min(sentences / 5, 1.0)
             factors['structure'] = round(structure_score, 2)
-            score += structure_score * 0.2
             # Helpfulness factor
             helpful_words = ['pros', 'cons', 'recommend', 'suggest', 'tip', 'advice']
             helpful_score = min(sum(1 for word in helpful_words if word in review.lower()) / 2, 1.0)
             factors['helpfulness'] = round(helpful_score, 2)
-            score += helpful_score * 0.2
             quality_scores.append({
                 'text': review[:100] + '...' if len(review) > 100 else review,
-                'quality_score': round(score, 3),
                 'factors': factors,
-                'grade': 'A' if score > 0.8 else 'B' if score > 0.6 else 'C' if score > 0.4 else 'D'
             })
         avg_quality = sum(item['quality_score'] for item in quality_scores) / len(quality_scores) if quality_scores else 0
         return {
             'summary': {
                 'average_quality': round(avg_quality, 3),
                 'total_reviews': len(quality_scores),
-                'high_quality_count': sum(1 for item in quality_scores if item['quality_score'] > 0.7)
             },
-            'details': quality_scores
-        }
     def compare_competitors(self, product_a_reviews: List[str], product_b_reviews: List[str]) -> Tuple[Dict, go.Figure]:
         """Compare sentiment between two products"""
         analysis_a = self.analyze_sentiment(product_a_reviews)
         analysis_b = self.analyze_sentiment(product_b_reviews)
-        # Create comparison chart
         fig = make_subplots(
             rows=1, cols=2,
             specs=[[{'type': 'pie'}, {'type': 'pie'}]],
             subplot_titles=['Product A', 'Product B']
         )
-        # Product A pie chart
         fig.add_trace(go.Pie(
             labels=list(analysis_a['summary'].keys()),
             values=list(analysis_a['summary'].values()),
             name="Product A"
         ), row=1, col=1)
-        # Product B pie chart
         fig.add_trace(go.Pie(
             labels=list(analysis_b['summary'].keys()),
             values=list(analysis_b['summary'].values()),
@@ -217,12 +346,15 @@ class ReviewAnalyzer:
         return comparison, fig
     def generate_report(self, analysis_data: Dict, report_type: str = "basic") -> str:
-        """Generate analysis report"""
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         if report_type == "sentiment":
-            return f"""
-# Sentiment Analysis Report
 Generated: {timestamp}
 ## Summary
@@ -231,30 +363,42 @@ Generated: {timestamp}
 - Negative: {analysis_data.get('summary', {}).get('negative', 0)}%
 - Neutral: {analysis_data.get('summary', {}).get('neutral', 0)}%
 ## Key Insights
-- Overall sentiment trend: {'Positive' if analysis_data.get('summary', {}).get('positive', 0) > 50 else 'Mixed'}
-- Customer satisfaction level: {'High' if analysis_data.get('summary', {}).get('positive', 0) > 70 else 'Moderate' if analysis_data.get('summary', {}).get('positive', 0) > 40 else 'Low'}
 ## Recommendations
-- Focus on addressing negative feedback themes
-- Leverage positive reviews for marketing
 - Monitor sentiment trends over time
-            """
         elif report_type == "fake":
-            return f"""
-# Fake Review Detection Report
 Generated: {timestamp}
 ## Summary
-- Total Reviews Analyzed: {analysis_data.get('summary', {}).get('total_reviews', 0)}
 - Suspicious Reviews: {analysis_data.get('summary', {}).get('suspicious_reviews', 0)}
 - Authenticity Rate: {analysis_data.get('summary', {}).get('authenticity_rate', 0)}%
 ## Risk Assessment
-- Review Quality: {'High Risk' if analysis_data.get('summary', {}).get('authenticity_rate', 0) < 70 else 'Low Risk'}
-- Recommendation: {'Investigate suspicious reviews' if analysis_data.get('summary', {}).get('suspicious_reviews', 0) > 0 else 'Reviews appear authentic'}
-            """
         return "Report generated successfully"
@@ -266,28 +410,69 @@ def process_reviews_input(text: str) -> List[str]:
     if not text.strip():
         return []
-    # Split by lines or by common separators
     reviews = []
     for line in text.split('\n'):
         line = line.strip()
-        if line and len(line) > 10:  # Minimum length check
             reviews.append(line)
     return reviews
-def sentiment_analysis_interface(reviews_text: str):
     """Interface for sentiment analysis"""
-    if not reviews_text.strip():
-        return "Please enter some reviews to analyze.", None
-    reviews = process_reviews_input(reviews_text)
     if not reviews:
-        return "No valid reviews found. Please check your input.", None
     try:
         result = analyzer.analyze_sentiment(reviews)
-        # Create visualization
         fig = go.Figure(data=[
             go.Bar(x=list(result['summary'].keys()),
                    y=list(result['summary'].values()),
@@ -299,35 +484,53 @@ def sentiment_analysis_interface(reviews_text: str):
     except Exception as e:
         return f"Error: {str(e)}", None
-def fake_detection_interface(reviews_text: str):
     """Interface for fake review detection"""
-    if not reviews_text.strip():
-        return "Please enter some reviews to analyze."
-    reviews = process_reviews_input(reviews_text)
     if not reviews:
-        return "No valid reviews found. Please check your input."
     try:
-        result = analyzer.detect_fake_reviews(reviews)
         return json.dumps(result, indent=2)
     except Exception as e:
         return f"Error: {str(e)}"
-def quality_assessment_interface(reviews_text: str):
-    """Interface for quality assessment"""
-    if not reviews_text.strip():
-        return "Please enter some reviews to analyze."
-    reviews = process_reviews_input(reviews_text)
     if not reviews:
-        return "No valid reviews found. Please check your input."
     try:
-        result = analyzer.assess_quality(reviews)
-        return json.dumps(result, indent=2)
     except Exception as e:
-        return f"Error: {str(e)}"
 def competitor_comparison_interface(product_a_text: str, product_b_text: str):
     """Interface for competitor comparison"""
@@ -361,17 +564,21 @@ def generate_report_interface(analysis_result: str, report_type: str):
 # Create Gradio interface
 with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🛒 SmartReview Pro")
-    gr.Markdown("Professional review analysis platform for e-commerce businesses")
     with gr.Tab("📊 Sentiment Analysis"):
-        gr.Markdown("### Analyze customer sentiment from reviews")
         with gr.Row():
             with gr.Column():
                 sentiment_input = gr.Textbox(
-                    lines=10,
-                    placeholder="Enter reviews (one per line):\nGreat product, love it!\nTerrible quality, waste of money.\nOkay product, nothing special.",
                     label="Reviews"
                 )
                 sentiment_btn = gr.Button("Analyze Sentiment", variant="primary")
             with gr.Column():
                 sentiment_output = gr.Textbox(label="Analysis Results", lines=15)
@@ -379,46 +586,64 @@ with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
         sentiment_btn.click(
             sentiment_analysis_interface,
-            inputs=[sentiment_input],
             outputs=[sentiment_output, sentiment_chart]
         )
     with gr.Tab("🔍 Fake Review Detection"):
-        gr.Markdown("### Detect potentially fake or suspicious reviews")
         with gr.Row():
             with gr.Column():
                 fake_input = gr.Textbox(
-                    lines=10,
-                    placeholder="Enter reviews to check for authenticity...",
                     label="Reviews"
                 )
                 fake_btn = gr.Button("Detect Fake Reviews", variant="primary")
             with gr.Column():
                 fake_output = gr.Textbox(label="Detection Results", lines=15)
         fake_btn.click(
             fake_detection_interface,
-            inputs=[fake_input],
             outputs=[fake_output]
         )
     with gr.Tab("⭐ Quality Assessment"):
-        gr.Markdown("### Assess the quality and helpfulness of reviews")
         with gr.Row():
             with gr.Column():
                 quality_input = gr.Textbox(
-                    lines=10,
-                    placeholder="Enter reviews to assess quality...",
                     label="Reviews"
                 )
                 quality_btn = gr.Button("Assess Quality", variant="primary")
             with gr.Column():
-                quality_output = gr.Textbox(label="Quality Assessment", lines=15)
         quality_btn.click(
             quality_assessment_interface,
-            inputs=[quality_input],
-            outputs=[quality_output]
         )
     with gr.Tab("🆚 Competitor Comparison"):
@@ -469,24 +694,6 @@ with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
             inputs=[report_data, report_type],
             outputs=[report_output]
         )
-    with gr.Tab("ℹ️ About"):
-        gr.Markdown("""
-        ## SmartReview Pro Features
-        - **Sentiment Analysis**: Analyze customer emotions and opinions
-        - **Fake Review Detection**: Identify suspicious or inauthentic reviews
-        - **Quality Assessment**: Evaluate review helpfulness and detail
-        - **Competitor Comparison**: Compare sentiment across products
-        - **Professional Reports**: Generate detailed analysis reports
-        ## Pricing Plans
-        - **Free**: 10 analyses per day
-        - **Pro ($299/month)**: 1000 analyses per day + advanced features
-        - **Enterprise**: Unlimited usage + API access + custom reports
-        Contact us for enterprise solutions and custom integrations.
-        """)
 if __name__ == "__main__":
     demo.launch()

 import numpy as np
 import json
 import re
+import io
 from datetime import datetime
 from typing import List, Dict, Tuple
+from transformers import pipeline, AutoTokenizer
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 import sqlite3
 # Initialize models
 sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
+absa_analyzer = pipeline("ner", model="yangheng/deberta-v3-base-absa-v1.1", aggregation_strategy="simple")
 class ReviewAnalyzer:
     def __init__(self):
     def preprocess_text(self, text: str) -> str:
         """Clean and preprocess review text"""
+        text = re.sub(r'http\S+', '', text)
+        text = re.sub(r'[^\w\s]', '', text)
         text = text.strip().lower()
         return text
+    def extract_aspect_keywords(self, reviews: List[str]) -> Dict:
+        """Extract aspect-based sentiment keywords"""
+        all_aspects = {'positive': {}, 'negative': {}}
+        detailed_aspects = []
+        for review in reviews:
+            if not review.strip() or len(review) < 10:
+                continue
+            try:
+                aspects = absa_analyzer(review)
+                for aspect in aspects:
+                    word = aspect['word'].lower()
+                    label = aspect['entity_group'].lower()
+                    confidence = float(aspect['score'])
+                    # Map labels to sentiment
+                    if 'pos' in label or label == 'positive':
+                        sentiment = 'positive'
+                    elif 'neg' in label or label == 'negative':
+                        sentiment = 'negative'
+                    else:
+                        continue
+                    # Count aspects
+                    if word not in all_aspects[sentiment]:
+                        all_aspects[sentiment][word] = 0
+                    all_aspects[sentiment][word] += 1
+                    detailed_aspects.append({
+                        'review': review[:50] + '...',
+                        'aspect': word,
+                        'sentiment': sentiment,
+                        'confidence': round(confidence, 3)
+                    })
+            except:
+                continue
+        # Get top aspects
+        top_positive = sorted(all_aspects['positive'].items(), key=lambda x: x[1], reverse=True)[:10]
+        top_negative = sorted(all_aspects['negative'].items(), key=lambda x: x[1], reverse=True)[:10]
+        return {
+            'top_positive_aspects': top_positive,
+            'top_negative_aspects': top_negative,
+            'detailed_aspects': detailed_aspects,
+            'summary': {
+                'total_positive_aspects': len(all_aspects['positive']),
+                'total_negative_aspects': len(all_aspects['negative'])
+            }
+        }
     def analyze_sentiment(self, reviews: List[str]) -> Dict:
+        """Analyze sentiment of reviews with keyword extraction"""
         results = []
         sentiments = {'positive': 0, 'negative': 0, 'neutral': 0}
             result = sentiment_analyzer(clean_review)[0]
             label = result['label'].lower()
+            score = float(result['score'])
             if 'pos' in label:
                 sentiment = 'positive'
             elif 'neg' in label:
         total = len(results)
         sentiment_percentages = {k: round(v/total*100, 1) for k, v in sentiments.items()}
+        # Extract keywords
+        keywords = self.extract_aspect_keywords(reviews)
         return {
             'summary': sentiment_percentages,
             'details': results,
+            'total_reviews': total,
+            'keywords': keywords
         }
+    def detect_fake_reviews(self, reviews: List[str], metadata: Dict = None) -> Dict:
+        """Detect potentially fake reviews with optional metadata"""
         fake_scores = []
+        # Process metadata if provided
+        metadata_flags = []
+        if metadata and 'timestamps' in metadata and 'usernames' in metadata:
+            metadata_flags = self._analyze_metadata(metadata['timestamps'], metadata['usernames'])
+        for i, review in enumerate(reviews):
             if not review.strip():
                 continue
             score = 0
+            flags = []
+            # Text-based checks
             if len(review) < 20:
                 score += 0.3
+                flags.append("too_short")
             words = review.lower().split()
             unique_ratio = len(set(words)) / len(words) if words else 0
             if unique_ratio < 0.5:
                 score += 0.4
+                flags.append("repetitive")
             punct_ratio = len(re.findall(r'[!?.]', review)) / len(review) if review else 0
             if punct_ratio > 0.1:
                 score += 0.2
+                flags.append("excessive_punctuation")
             generic_phrases = ['amazing', 'perfect', 'best ever', 'highly recommend']
             if any(phrase in review.lower() for phrase in generic_phrases):
                 score += 0.1
+                flags.append("generic_language")
+            # Add metadata flags if available
+            if i < len(metadata_flags):
+                if metadata_flags[i]:
+                    score += 0.3
+                    flags.extend(metadata_flags[i])
             fake_scores.append({
                 'text': review[:100] + '...' if len(review) > 100 else review,
                 'fake_probability': min(round(score, 3), 1.0),
+                'status': 'suspicious' if score > 0.5 else 'authentic',
+                'flags': flags
             })
         suspicious_count = sum(1 for item in fake_scores if item['fake_probability'] > 0.5)
                 'suspicious_reviews': suspicious_count,
                 'authenticity_rate': round((len(fake_scores) - suspicious_count) / len(fake_scores) * 100, 1) if fake_scores else 0
             },
+            'details': fake_scores,
+            'metadata_analysis': metadata_flags if metadata_flags else None
         }
+    def _analyze_metadata(self, timestamps: List[str], usernames: List[str]) -> List[List[str]]:
+        """Analyze metadata for suspicious patterns"""
+        flags_per_review = [[] for _ in range(len(timestamps))]
+        # Time density analysis
+        if len(timestamps) >= 5:
+            times = []
+            for i, ts in enumerate(timestamps):
+                try:
+                    dt = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
+                    times.append((i, dt))
+                except:
+                    continue
+            times.sort(key=lambda x: x[1])
+            # Check for clusters
+            for i in range(len(times) - 5):
+                if (times[i + 5][1] - times[i][1]).total_seconds() < 300:  # 5 mins
+                    for j in range(i, i + 6):
+                        flags_per_review[times[j][0]].append("time_cluster")
+        # Username pattern analysis
+        for i, username in enumerate(usernames):
+            if re.match(r"user_\d{4,}", username):
+                flags_per_review[i].append("suspicious_username")
+            if len(username) < 4:
+                flags_per_review[i].append("short_username")
+        return flags_per_review
+    def assess_quality(self, reviews: List[str], custom_weights: Dict = None) -> Tuple[Dict, go.Figure]:
+        """Assess review quality with customizable weights and radar chart"""
+        default_weights = {
+            'length': 0.25,
+            'detail': 0.25,
+            'structure': 0.25,
+            'helpfulness': 0.25
+        }
+        weights = custom_weights if custom_weights else default_weights
         quality_scores = []
         for review in reviews:
             if not review.strip():
                 continue
             factors = {}
             # Length factor
             length_score = min(len(review) / 200, 1.0)
             factors['length'] = round(length_score, 2)
+            # Detail factor
             detail_words = ['because', 'however', 'although', 'specifically', 'particularly']
             detail_score = min(sum(1 for word in detail_words if word in review.lower()) / 3, 1.0)
             factors['detail'] = round(detail_score, 2)
             # Structure factor
             sentences = len(re.split(r'[.!?]', review))
             structure_score = min(sentences / 5, 1.0)
             factors['structure'] = round(structure_score, 2)
             # Helpfulness factor
             helpful_words = ['pros', 'cons', 'recommend', 'suggest', 'tip', 'advice']
             helpful_score = min(sum(1 for word in helpful_words if word in review.lower()) / 2, 1.0)
             factors['helpfulness'] = round(helpful_score, 2)
+            # Calculate weighted score
+            total_score = sum(factors[k] * weights[k] for k in factors.keys())
             quality_scores.append({
                 'text': review[:100] + '...' if len(review) > 100 else review,
+                'quality_score': round(total_score, 3),
                 'factors': factors,
+                'grade': 'A' if total_score > 0.8 else 'B' if total_score > 0.6 else 'C' if total_score > 0.4 else 'D'
             })
         avg_quality = sum(item['quality_score'] for item in quality_scores) / len(quality_scores) if quality_scores else 0
+        # Create radar chart for average factors
+        avg_factors = {}
+        for factor in ['length', 'detail', 'structure', 'helpfulness']:
+            avg_factors[factor] = float(sum(item['factors'][factor] for item in quality_scores) / len(quality_scores) if quality_scores else 0)
+        fig = go.Figure()
+        fig.add_trace(go.Scatterpolar(
+            r=list(avg_factors.values()),
+            theta=list(avg_factors.keys()),
+            fill='toself',
+            name='Quality Factors'
+        ))
+        fig.update_layout(
+            polar=dict(
+                radialaxis=dict(
+                    visible=True,
+                    range=[0, 1]
+                )),
+            showlegend=True,
+            title="Average Quality Factors"
+        )
         return {
             'summary': {
                 'average_quality': round(avg_quality, 3),
                 'total_reviews': len(quality_scores),
+                'high_quality_count': sum(1 for item in quality_scores if item['quality_score'] > 0.7),
+                'weights_used': weights
             },
+            'details': quality_scores,
+            'factor_averages': avg_factors
+        }, fig
     def compare_competitors(self, product_a_reviews: List[str], product_b_reviews: List[str]) -> Tuple[Dict, go.Figure]:
         """Compare sentiment between two products"""
         analysis_a = self.analyze_sentiment(product_a_reviews)
         analysis_b = self.analyze_sentiment(product_b_reviews)
         fig = make_subplots(
             rows=1, cols=2,
             specs=[[{'type': 'pie'}, {'type': 'pie'}]],
             subplot_titles=['Product A', 'Product B']
         )
         fig.add_trace(go.Pie(
             labels=list(analysis_a['summary'].keys()),
             values=list(analysis_a['summary'].values()),
             name="Product A"
         ), row=1, col=1)
         fig.add_trace(go.Pie(
             labels=list(analysis_b['summary'].keys()),
             values=list(analysis_b['summary'].values()),
         return comparison, fig
     def generate_report(self, analysis_data: Dict, report_type: str = "basic") -> str:
+        """Generate analysis report with export capability"""
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         if report_type == "sentiment":
+            keywords = analysis_data.get('keywords', {})
+            top_pos = keywords.get('top_positive_aspects', [])[:5]
+            top_neg = keywords.get('top_negative_aspects', [])[:5]
+            return f"""# Sentiment Analysis Report
 Generated: {timestamp}
 ## Summary
 - Negative: {analysis_data.get('summary', {}).get('negative', 0)}%
 - Neutral: {analysis_data.get('summary', {}).get('neutral', 0)}%
+## Top Positive Aspects
+{chr(10).join([f"- {aspect[0]} (mentioned {aspect[1]} times)" for aspect in top_pos])}
+## Top Negative Aspects
+{chr(10).join([f"- {aspect[0]} (mentioned {aspect[1]} times)" for aspect in top_neg])}
 ## Key Insights
+- Overall sentiment: {'Positive' if analysis_data.get('summary', {}).get('positive', 0) > 50 else 'Mixed'}
+- Main complaints: {', '.join([aspect[0] for aspect in top_neg[:3]])}
+- Key strengths: {', '.join([aspect[0] for aspect in top_pos[:3]])}
 ## Recommendations
+- Address negative aspects: {', '.join([aspect[0] for aspect in top_neg[:2]])}
+- Leverage positive aspects in marketing
 - Monitor sentiment trends over time
+"""
         elif report_type == "fake":
+            return f"""# Fake Review Detection Report
 Generated: {timestamp}
 ## Summary
+- Total Reviews: {analysis_data.get('summary', {}).get('total_reviews', 0)}
 - Suspicious Reviews: {analysis_data.get('summary', {}).get('suspicious_reviews', 0)}
 - Authenticity Rate: {analysis_data.get('summary', {}).get('authenticity_rate', 0)}%
 ## Risk Assessment
+- Overall Risk: {'High' if analysis_data.get('summary', {}).get('authenticity_rate', 0) < 70 else 'Low'}
+- Action Required: {'Yes' if analysis_data.get('summary', {}).get('suspicious_reviews', 0) > 0 else 'No'}
+## Common Fraud Indicators
+- Short reviews with generic language
+- Repetitive content patterns
+- Suspicious timing clusters
+- Unusual username patterns
+"""
         return "Report generated successfully"
     if not text.strip():
         return []
     reviews = []
     for line in text.split('\n'):
         line = line.strip()
+        if line and len(line) > 10:
             reviews.append(line)
     return reviews
+def process_csv_upload(file) -> Tuple[List[str], Dict]:
+    """Process uploaded CSV file"""
+    if file is None:
+        return [], {}
+    try:
+        df = pd.read_csv(file.name)
+        # Look for common column names
+        review_col = None
+        time_col = None
+        user_col = None
+        for col in df.columns:
+            col_lower = col.lower()
+            if 'review' in col_lower or 'comment' in col_lower or 'text' in col_lower:
+                review_col = col
+            elif 'time' in col_lower or 'date' in col_lower:
+                time_col = col
+            elif 'user' in col_lower or 'name' in col_lower:
+                user_col = col
+        if review_col is None:
+            return [], {"error": "No review column found. Expected columns: 'review', 'comment', or 'text'"}
+        reviews = df[review_col].dropna().astype(str).tolist()
+        metadata = {}
+        if time_col:
+            metadata['timestamps'] = df[time_col].dropna().astype(str).tolist()
+        if user_col:
+            metadata['usernames'] = df[user_col].dropna().astype(str).tolist()
+        return reviews, metadata
+    except Exception as e:
+        return [], {"error": f"Failed to process CSV: {str(e)}"}
+def sentiment_analysis_interface(reviews_text: str, csv_file):
     """Interface for sentiment analysis"""
+    reviews = []
+    if csv_file is not None:
+        reviews, metadata = process_csv_upload(csv_file)
+        if 'error' in metadata:
+            return metadata['error'], None
+    else:
+        reviews = process_reviews_input(reviews_text)
     if not reviews:
+        return "Please enter reviews or upload a CSV file.", None
     try:
         result = analyzer.analyze_sentiment(reviews)
         fig = go.Figure(data=[
             go.Bar(x=list(result['summary'].keys()),
                    y=list(result['summary'].values()),
     except Exception as e:
         return f"Error: {str(e)}", None
+def fake_detection_interface(reviews_text: str, csv_file):
     """Interface for fake review detection"""
+    reviews = []
+    metadata = {}
+    if csv_file is not None:
+        reviews, metadata = process_csv_upload(csv_file)
+        if 'error' in metadata:
+            return metadata['error']
+    else:
+        reviews = process_reviews_input(reviews_text)
     if not reviews:
+        return "Please enter reviews or upload a CSV file."
     try:
+        result = analyzer.detect_fake_reviews(reviews, metadata if metadata else None)
         return json.dumps(result, indent=2)
     except Exception as e:
         return f"Error: {str(e)}"
+def quality_assessment_interface(reviews_text: str, csv_file, length_weight: float, detail_weight: float, structure_weight: float, help_weight: float):
+    """Interface for quality assessment with custom weights"""
+    reviews = []
+    if csv_file is not None:
+        reviews, metadata = process_csv_upload(csv_file)
+        if 'error' in metadata:
+            return metadata['error'], None
+    else:
+        reviews = process_reviews_input(reviews_text)
     if not reviews:
+        return "Please enter reviews or upload a CSV file.", None
     try:
+        custom_weights = {
+            'length': length_weight,
+            'detail': detail_weight,
+            'structure': structure_weight,
+            'helpfulness': help_weight
+        }
+        result, radar_fig = analyzer.assess_quality(reviews, custom_weights)
+        return json.dumps(result, indent=2), radar_fig
     except Exception as e:
+        return f"Error: {str(e)}", None
 def competitor_comparison_interface(product_a_text: str, product_b_text: str):
     """Interface for competitor comparison"""
 # Create Gradio interface
 with gr.Blocks(title="SmartReview Pro", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🛒 SmartReview Pro")
+    gr.Markdown("Advanced review analysis platform with AI-powered insights")
     with gr.Tab("📊 Sentiment Analysis"):
+        gr.Markdown("### Analyze customer sentiment and extract key aspects")
         with gr.Row():
             with gr.Column():
                 sentiment_input = gr.Textbox(
+                    lines=8,
+                    placeholder="Enter reviews (one per line) or upload CSV...",
                     label="Reviews"
                 )
+                sentiment_csv = gr.File(
+                    label="Upload CSV (columns: review/comment/text, optional: timestamp, username)",
+                    file_types=[".csv"]
+                )
                 sentiment_btn = gr.Button("Analyze Sentiment", variant="primary")
             with gr.Column():
                 sentiment_output = gr.Textbox(label="Analysis Results", lines=15)
         sentiment_btn.click(
             sentiment_analysis_interface,
+            inputs=[sentiment_input, sentiment_csv],
             outputs=[sentiment_output, sentiment_chart]
         )
     with gr.Tab("🔍 Fake Review Detection"):
+        gr.Markdown("### Detect suspicious reviews using text analysis and metadata")
         with gr.Row():
             with gr.Column():
                 fake_input = gr.Textbox(
+                    lines=8,
+                    placeholder="Enter reviews to analyze...",
                     label="Reviews"
                 )
+                fake_csv = gr.File(
+                    label="Upload CSV (supports timestamp & username analysis)",
+                    file_types=[".csv"]
+                )
                 fake_btn = gr.Button("Detect Fake Reviews", variant="primary")
             with gr.Column():
                 fake_output = gr.Textbox(label="Detection Results", lines=15)
         fake_btn.click(
             fake_detection_interface,
+            inputs=[fake_input, fake_csv],
             outputs=[fake_output]
         )
     with gr.Tab("⭐ Quality Assessment"):
+        gr.Markdown("### Assess review quality with customizable weights")
         with gr.Row():
             with gr.Column():
                 quality_input = gr.Textbox(
+                    lines=8,
+                    placeholder="Enter reviews to assess...",
                     label="Reviews"
                 )
+                quality_csv = gr.File(
+                    label="Upload CSV",
+                    file_types=[".csv"]
+                )
+                gr.Markdown("**Customize Quality Weights:**")
+                with gr.Row():
+                    length_weight = gr.Slider(0, 1, 0.25, label="Length Weight")
+                    detail_weight = gr.Slider(0, 1, 0.25, label="Detail Weight")
+                with gr.Row():
+                    structure_weight = gr.Slider(0, 1, 0.25, label="Structure Weight")
+                    help_weight = gr.Slider(0, 1, 0.25, label="Helpfulness Weight")
                 quality_btn = gr.Button("Assess Quality", variant="primary")
             with gr.Column():
+                quality_output = gr.Textbox(label="Quality Assessment", lines=12)
+                quality_radar = gr.Plot(label="Quality Factors Radar Chart")
         quality_btn.click(
             quality_assessment_interface,
+            inputs=[quality_input, quality_csv, length_weight, detail_weight, structure_weight, help_weight],
+            outputs=[quality_output, quality_radar]
         )
     with gr.Tab("🆚 Competitor Comparison"):
             inputs=[report_data, report_type],
             outputs=[report_output]
         )
 if __name__ == "__main__":
     demo.launch()