Spaces:

mfoud444
/

oop

Running

App Files Files Community

Mohammed Foud commited on Apr 14

Commit

4ae3359

1 Parent(s): 31f3e54

first commit

Browse files

Files changed (1) hide show

app.py +39 -12

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
 import io
@@ -11,16 +11,37 @@ import base64
 from textblob import TextBlob
 from collections import defaultdict
 from tabulate import tabulate
-from transformers import pipeline
-# Load the model and tokenizer
 model_path = "./final_model"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForSequenceClassification.from_pretrained(model_path)
-# Initialize the summarizer
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 def predict_sentiment(text):
     # Preprocess text
     text = text.lower()
@@ -57,10 +78,10 @@ def analyze_sentiment(reviews):
             words = [word for word, tag in blob.tags
                     if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
-            if polarity > 0.3:  # Positive
                 for word in words:
                     pros[word] += 1
-            elif polarity < -0.3:  # Negative
                 for word in words:
                     cons[word] += 1
@@ -118,9 +139,7 @@ def analyze_reviews(reviews_text):
     return df, plot_html, summary
-# Rename original analyze_reviews to analyze_reviews_sentiment
 def analyze_reviews_sentiment(reviews_text):
-    # Original implementation
     reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
     if not reviews:
@@ -152,12 +171,19 @@ def analyze_reviews_sentiment(reviews_text):
     return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'
-# Create Gradio interface
 def create_interface():
     with gr.Blocks() as demo:
         gr.Markdown("# Review Analysis System")
         with gr.Tab("Review Analysis"):
             reviews_input = gr.Textbox(
                 label="Enter reviews (one per line)",
                 placeholder="Enter product reviews here...",
@@ -187,5 +213,6 @@ def create_interface():
     return demo
 # Create and launch the interface
-demo = create_interface()
-demo.launch()

 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import torch
 from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
 import io
 from textblob import TextBlob
 from collections import defaultdict
 from tabulate import tabulate
+# Load models and initialize components
 model_path = "./final_model"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForSequenceClassification.from_pretrained(model_path)
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+# Load dataset
+def load_dataset():
+    try:
+        df = pd.read_csv("dataset.csv")
+        # Ensure required columns exist
+        required_columns = ['reviews.text', 'reviews.rating', 'name', 'categories']
+        if not all(col in df.columns for col in required_columns):
+            raise ValueError("Missing required columns in dataset.csv")
+        return df
+    except Exception as e:
+        print(f"Error loading dataset: {str(e)}")
+        return None
+# Get initial summary
+def get_initial_summary():
+    df = load_dataset()
+    if df is None:
+        return "Error: Could not load dataset.csv"
+    # Take a sample of reviews for initial summary
+    sample_reviews = df['reviews.text'].sample(n=min(100, len(df))).fillna('').tolist()
+    sample_text = '\n'.join(sample_reviews)
+    return generate_category_summary(sample_text)
 def predict_sentiment(text):
     # Preprocess text
     text = text.lower()
             words = [word for word, tag in blob.tags
                     if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
+            if polarity > 0.3:
                 for word in words:
                     pros[word] += 1
+            elif polarity < -0.3:
                 for word in words:
                     cons[word] += 1
     return df, plot_html, summary
 def analyze_reviews_sentiment(reviews_text):
     reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
     if not reviews:
     return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'
 def create_interface():
+    # Get initial summary
+    initial_summary = get_initial_summary()
     with gr.Blocks() as demo:
         gr.Markdown("# Review Analysis System")
         with gr.Tab("Review Analysis"):
+            # Add initial dataset summary
+            gr.Markdown("## Dataset Overview")
+            gr.Markdown(initial_summary)
+            gr.Markdown("## Analyze New Reviews")
             reviews_input = gr.Textbox(
                 label="Enter reviews (one per line)",
                 placeholder="Enter product reviews here...",
     return demo
 # Create and launch the interface
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()