Mohammed Foud
commited on
Commit
·
4ae3359
1
Parent(s):
31f3e54
first commit
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
|
|
3 |
import numpy as np
|
4 |
import matplotlib.pyplot as plt
|
5 |
import seaborn as sns
|
6 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
7 |
import torch
|
8 |
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
|
9 |
import io
|
@@ -11,16 +11,37 @@ import base64
|
|
11 |
from textblob import TextBlob
|
12 |
from collections import defaultdict
|
13 |
from tabulate import tabulate
|
14 |
-
from transformers import pipeline
|
15 |
|
16 |
-
# Load
|
17 |
model_path = "./final_model"
|
18 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
19 |
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
20 |
-
|
21 |
-
# Initialize the summarizer
|
22 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def predict_sentiment(text):
|
25 |
# Preprocess text
|
26 |
text = text.lower()
|
@@ -57,10 +78,10 @@ def analyze_sentiment(reviews):
|
|
57 |
words = [word for word, tag in blob.tags
|
58 |
if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
|
59 |
|
60 |
-
if polarity > 0.3:
|
61 |
for word in words:
|
62 |
pros[word] += 1
|
63 |
-
elif polarity < -0.3:
|
64 |
for word in words:
|
65 |
cons[word] += 1
|
66 |
|
@@ -118,9 +139,7 @@ def analyze_reviews(reviews_text):
|
|
118 |
|
119 |
return df, plot_html, summary
|
120 |
|
121 |
-
# Rename original analyze_reviews to analyze_reviews_sentiment
|
122 |
def analyze_reviews_sentiment(reviews_text):
|
123 |
-
# Original implementation
|
124 |
reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
|
125 |
|
126 |
if not reviews:
|
@@ -152,12 +171,19 @@ def analyze_reviews_sentiment(reviews_text):
|
|
152 |
|
153 |
return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'
|
154 |
|
155 |
-
# Create Gradio interface
|
156 |
def create_interface():
|
|
|
|
|
|
|
157 |
with gr.Blocks() as demo:
|
158 |
gr.Markdown("# Review Analysis System")
|
159 |
|
160 |
with gr.Tab("Review Analysis"):
|
|
|
|
|
|
|
|
|
|
|
161 |
reviews_input = gr.Textbox(
|
162 |
label="Enter reviews (one per line)",
|
163 |
placeholder="Enter product reviews here...",
|
@@ -187,5 +213,6 @@ def create_interface():
|
|
187 |
return demo
|
188 |
|
189 |
# Create and launch the interface
|
190 |
-
|
191 |
-
demo
|
|
|
|
3 |
import numpy as np
|
4 |
import matplotlib.pyplot as plt
|
5 |
import seaborn as sns
|
6 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
7 |
import torch
|
8 |
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
|
9 |
import io
|
|
|
11 |
from textblob import TextBlob
|
12 |
from collections import defaultdict
|
13 |
from tabulate import tabulate
|
|
|
14 |
|
15 |
+
# Load models and initialize components
|
16 |
model_path = "./final_model"
|
17 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
18 |
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
|
|
|
|
19 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
20 |
|
21 |
+
# Load dataset
|
22 |
+
def load_dataset():
|
23 |
+
try:
|
24 |
+
df = pd.read_csv("dataset.csv")
|
25 |
+
# Ensure required columns exist
|
26 |
+
required_columns = ['reviews.text', 'reviews.rating', 'name', 'categories']
|
27 |
+
if not all(col in df.columns for col in required_columns):
|
28 |
+
raise ValueError("Missing required columns in dataset.csv")
|
29 |
+
return df
|
30 |
+
except Exception as e:
|
31 |
+
print(f"Error loading dataset: {str(e)}")
|
32 |
+
return None
|
33 |
+
|
34 |
+
# Get initial summary
|
35 |
+
def get_initial_summary():
|
36 |
+
df = load_dataset()
|
37 |
+
if df is None:
|
38 |
+
return "Error: Could not load dataset.csv"
|
39 |
+
|
40 |
+
# Take a sample of reviews for initial summary
|
41 |
+
sample_reviews = df['reviews.text'].sample(n=min(100, len(df))).fillna('').tolist()
|
42 |
+
sample_text = '\n'.join(sample_reviews)
|
43 |
+
return generate_category_summary(sample_text)
|
44 |
+
|
45 |
def predict_sentiment(text):
|
46 |
# Preprocess text
|
47 |
text = text.lower()
|
|
|
78 |
words = [word for word, tag in blob.tags
|
79 |
if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
|
80 |
|
81 |
+
if polarity > 0.3:
|
82 |
for word in words:
|
83 |
pros[word] += 1
|
84 |
+
elif polarity < -0.3:
|
85 |
for word in words:
|
86 |
cons[word] += 1
|
87 |
|
|
|
139 |
|
140 |
return df, plot_html, summary
|
141 |
|
|
|
142 |
def analyze_reviews_sentiment(reviews_text):
|
|
|
143 |
reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
|
144 |
|
145 |
if not reviews:
|
|
|
171 |
|
172 |
return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'
|
173 |
|
|
|
174 |
def create_interface():
|
175 |
+
# Get initial summary
|
176 |
+
initial_summary = get_initial_summary()
|
177 |
+
|
178 |
with gr.Blocks() as demo:
|
179 |
gr.Markdown("# Review Analysis System")
|
180 |
|
181 |
with gr.Tab("Review Analysis"):
|
182 |
+
# Add initial dataset summary
|
183 |
+
gr.Markdown("## Dataset Overview")
|
184 |
+
gr.Markdown(initial_summary)
|
185 |
+
|
186 |
+
gr.Markdown("## Analyze New Reviews")
|
187 |
reviews_input = gr.Textbox(
|
188 |
label="Enter reviews (one per line)",
|
189 |
placeholder="Enter product reviews here...",
|
|
|
213 |
return demo
|
214 |
|
215 |
# Create and launch the interface
|
216 |
+
if __name__ == "__main__":
|
217 |
+
demo = create_interface()
|
218 |
+
demo.launch()
|