Mohammed Foud commited on
Commit
4ae3359
·
1 Parent(s): 31f3e54

first commit

Browse files
Files changed (1) hide show
  1. app.py +39 -12
app.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
  import torch
8
  from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
9
  import io
@@ -11,16 +11,37 @@ import base64
11
  from textblob import TextBlob
12
  from collections import defaultdict
13
  from tabulate import tabulate
14
- from transformers import pipeline
15
 
16
- # Load the model and tokenizer
17
  model_path = "./final_model"
18
  tokenizer = AutoTokenizer.from_pretrained(model_path)
19
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
20
-
21
- # Initialize the summarizer
22
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def predict_sentiment(text):
25
  # Preprocess text
26
  text = text.lower()
@@ -57,10 +78,10 @@ def analyze_sentiment(reviews):
57
  words = [word for word, tag in blob.tags
58
  if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
59
 
60
- if polarity > 0.3: # Positive
61
  for word in words:
62
  pros[word] += 1
63
- elif polarity < -0.3: # Negative
64
  for word in words:
65
  cons[word] += 1
66
 
@@ -118,9 +139,7 @@ def analyze_reviews(reviews_text):
118
 
119
  return df, plot_html, summary
120
 
121
- # Rename original analyze_reviews to analyze_reviews_sentiment
122
  def analyze_reviews_sentiment(reviews_text):
123
- # Original implementation
124
  reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
125
 
126
  if not reviews:
@@ -152,12 +171,19 @@ def analyze_reviews_sentiment(reviews_text):
152
 
153
  return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'
154
 
155
- # Create Gradio interface
156
  def create_interface():
 
 
 
157
  with gr.Blocks() as demo:
158
  gr.Markdown("# Review Analysis System")
159
 
160
  with gr.Tab("Review Analysis"):
 
 
 
 
 
161
  reviews_input = gr.Textbox(
162
  label="Enter reviews (one per line)",
163
  placeholder="Enter product reviews here...",
@@ -187,5 +213,6 @@ def create_interface():
187
  return demo
188
 
189
  # Create and launch the interface
190
- demo = create_interface()
191
- demo.launch()
 
 
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
7
  import torch
8
  from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
9
  import io
 
11
  from textblob import TextBlob
12
  from collections import defaultdict
13
  from tabulate import tabulate
 
14
 
15
+ # Load models and initialize components
16
  model_path = "./final_model"
17
  tokenizer = AutoTokenizer.from_pretrained(model_path)
18
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
 
 
19
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
20
 
21
+ # Load dataset
22
+ def load_dataset():
23
+ try:
24
+ df = pd.read_csv("dataset.csv")
25
+ # Ensure required columns exist
26
+ required_columns = ['reviews.text', 'reviews.rating', 'name', 'categories']
27
+ if not all(col in df.columns for col in required_columns):
28
+ raise ValueError("Missing required columns in dataset.csv")
29
+ return df
30
+ except Exception as e:
31
+ print(f"Error loading dataset: {str(e)}")
32
+ return None
33
+
34
+ # Get initial summary
35
+ def get_initial_summary():
36
+ df = load_dataset()
37
+ if df is None:
38
+ return "Error: Could not load dataset.csv"
39
+
40
+ # Take a sample of reviews for initial summary
41
+ sample_reviews = df['reviews.text'].sample(n=min(100, len(df))).fillna('').tolist()
42
+ sample_text = '\n'.join(sample_reviews)
43
+ return generate_category_summary(sample_text)
44
+
45
  def predict_sentiment(text):
46
  # Preprocess text
47
  text = text.lower()
 
78
  words = [word for word, tag in blob.tags
79
  if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
80
 
81
+ if polarity > 0.3:
82
  for word in words:
83
  pros[word] += 1
84
+ elif polarity < -0.3:
85
  for word in words:
86
  cons[word] += 1
87
 
 
139
 
140
  return df, plot_html, summary
141
 
 
142
  def analyze_reviews_sentiment(reviews_text):
 
143
  reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
144
 
145
  if not reviews:
 
171
 
172
  return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'
173
 
 
174
  def create_interface():
175
+ # Get initial summary
176
+ initial_summary = get_initial_summary()
177
+
178
  with gr.Blocks() as demo:
179
  gr.Markdown("# Review Analysis System")
180
 
181
  with gr.Tab("Review Analysis"):
182
+ # Add initial dataset summary
183
+ gr.Markdown("## Dataset Overview")
184
+ gr.Markdown(initial_summary)
185
+
186
+ gr.Markdown("## Analyze New Reviews")
187
  reviews_input = gr.Textbox(
188
  label="Enter reviews (one per line)",
189
  placeholder="Enter product reviews here...",
 
213
  return demo
214
 
215
  # Create and launch the interface
216
+ if __name__ == "__main__":
217
+ demo = create_interface()
218
+ demo.launch()