|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline |
|
import torch |
|
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score |
|
import io |
|
import base64 |
|
from textblob import TextBlob |
|
from collections import defaultdict |
|
from tabulate import tabulate |
|
|
|
|
|
model_path = "./final_model" |
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_path) |
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
|
|
|
def load_dataset(): |
|
try: |
|
df = pd.read_csv("dataset.csv") |
|
|
|
required_columns = ['reviews.text', 'reviews.rating', 'name', 'categories'] |
|
if not all(col in df.columns for col in required_columns): |
|
raise ValueError("Missing required columns in dataset.csv") |
|
return df |
|
except Exception as e: |
|
print(f"Error loading dataset: {str(e)}") |
|
return None |
|
|
|
|
|
def get_initial_summary(): |
|
df = load_dataset() |
|
if df is None: |
|
return "Error: Could not load dataset.csv" |
|
|
|
|
|
sample_reviews = df['reviews.text'].sample(n=min(100, len(df))).fillna('').tolist() |
|
sample_text = '\n'.join(sample_reviews) |
|
return generate_category_summary(sample_text) |
|
|
|
def predict_sentiment(text): |
|
|
|
text = text.lower() |
|
|
|
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
logits = outputs.logits |
|
probabilities = torch.nn.functional.softmax(logits, dim=-1) |
|
predicted_class = torch.argmax(probabilities, dim=-1).item() |
|
|
|
|
|
sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"} |
|
sentiment = sentiment_map[predicted_class] |
|
|
|
|
|
probs = probabilities[0].tolist() |
|
prob_dict = {sentiment_map[i]: f"{prob*100:.2f}%" for i, prob in enumerate(probs)} |
|
|
|
return sentiment, prob_dict |
|
|
|
def analyze_sentiment(reviews): |
|
"""Perform sentiment analysis on reviews""" |
|
pros = defaultdict(int) |
|
cons = defaultdict(int) |
|
|
|
for review in reviews: |
|
blob = TextBlob(str(review)) |
|
for sentence in blob.sentences: |
|
polarity = sentence.sentiment.polarity |
|
words = [word for word, tag in blob.tags |
|
if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')] |
|
|
|
if polarity > 0.3: |
|
for word in words: |
|
pros[word] += 1 |
|
elif polarity < -0.3: |
|
for word in words: |
|
cons[word] += 1 |
|
|
|
pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else [] |
|
cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else [] |
|
|
|
return pros_sorted, cons_sorted |
|
|
|
def generate_category_summary(reviews_text): |
|
"""Generate summary for a set of reviews""" |
|
reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()] |
|
|
|
if not reviews: |
|
return "Please enter at least one review." |
|
|
|
|
|
pros, cons = analyze_sentiment(reviews) |
|
|
|
|
|
summary_text = f""" |
|
Review Analysis Summary: |
|
|
|
PROS: |
|
{', '.join(pros[:5]) if pros else 'No significant positive feedback'} |
|
|
|
CONS: |
|
{', '.join(cons[:5]) if cons else 'No major complaints'} |
|
|
|
Based on {len(reviews)} reviews analyzed. |
|
""" |
|
|
|
|
|
if len(summary_text) > 100: |
|
try: |
|
generated_summary = summarizer( |
|
summary_text, |
|
max_length=150, |
|
min_length=50, |
|
do_sample=False, |
|
truncation=True |
|
)[0]['summary_text'] |
|
except Exception as e: |
|
generated_summary = f"Error generating summary: {str(e)}" |
|
else: |
|
generated_summary = summary_text |
|
|
|
return generated_summary |
|
|
|
def analyze_reviews(reviews_text): |
|
|
|
df, plot_html = analyze_reviews_sentiment(reviews_text) |
|
|
|
|
|
summary = generate_category_summary(reviews_text) |
|
|
|
return df, plot_html, summary |
|
|
|
def analyze_reviews_sentiment(reviews_text): |
|
reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()] |
|
|
|
if not reviews: |
|
return "Please enter at least one review.", None |
|
|
|
results = [] |
|
for review in reviews: |
|
sentiment, probs = predict_sentiment(review) |
|
results.append({ |
|
'Review': review, |
|
'Sentiment': sentiment, |
|
'Confidence': probs |
|
}) |
|
|
|
df = pd.DataFrame(results) |
|
|
|
plt.figure(figsize=(10, 6)) |
|
sentiment_counts = df['Sentiment'].value_counts() |
|
plt.bar(sentiment_counts.index, sentiment_counts.values) |
|
plt.title('Sentiment Distribution') |
|
plt.xlabel('Sentiment') |
|
plt.ylabel('Count') |
|
|
|
buf = io.BytesIO() |
|
plt.savefig(buf, format='png') |
|
buf.seek(0) |
|
plot_base64 = base64.b64encode(buf.read()).decode('utf-8') |
|
plt.close() |
|
|
|
return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">' |
|
|
|
def create_interface(): |
|
|
|
initial_summary = get_initial_summary() |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Review Analysis System") |
|
|
|
with gr.Tab("Review Analysis"): |
|
|
|
gr.Markdown("## Dataset Overview") |
|
gr.Markdown(initial_summary) |
|
|
|
gr.Markdown("## Analyze New Reviews") |
|
reviews_input = gr.Textbox( |
|
label="Enter reviews (one per line)", |
|
placeholder="Enter product reviews here...", |
|
lines=5 |
|
) |
|
analyze_button = gr.Button("Analyze Reviews") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
sentiment_output = gr.Dataframe( |
|
label="Sentiment Analysis Results" |
|
) |
|
plot_output = gr.HTML(label="Sentiment Distribution") |
|
|
|
with gr.Column(): |
|
summary_output = gr.Textbox( |
|
label="Review Summary", |
|
lines=5 |
|
) |
|
|
|
analyze_button.click( |
|
analyze_reviews, |
|
inputs=[reviews_input], |
|
outputs=[sentiment_output, plot_output, summary_output] |
|
) |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.launch() |