Spaces:

mfoud444
/

oop

Running

oop / app.py

Mohammed Foud

first commit

4ae3359 4 months ago

6.99 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
	import torch
	from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
	import io
	import base64
	from textblob import TextBlob
	from collections import defaultdict
	from tabulate import tabulate

	# Load models and initialize components
	model_path = "./final_model"
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	model = AutoModelForSequenceClassification.from_pretrained(model_path)
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	# Load dataset
	def load_dataset():
	try:
	df = pd.read_csv("dataset.csv")
	# Ensure required columns exist
	required_columns = ['reviews.text', 'reviews.rating', 'name', 'categories']
	if not all(col in df.columns for col in required_columns):
	raise ValueError("Missing required columns in dataset.csv")
	return df
	except Exception as e:
	print(f"Error loading dataset: {str(e)}")
	return None

	# Get initial summary
	def get_initial_summary():
	df = load_dataset()
	if df is None:
	return "Error: Could not load dataset.csv"

	# Take a sample of reviews for initial summary
	sample_reviews = df['reviews.text'].sample(n=min(100, len(df))).fillna('').tolist()
	sample_text = '\n'.join(sample_reviews)
	return generate_category_summary(sample_text)

	def predict_sentiment(text):
	# Preprocess text
	text = text.lower()

	# Tokenize
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

	# Get prediction
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	probabilities = torch.nn.functional.softmax(logits, dim=-1)
	predicted_class = torch.argmax(probabilities, dim=-1).item()

	# Map class to sentiment
	sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
	sentiment = sentiment_map[predicted_class]

	# Get probabilities
	probs = probabilities[0].tolist()
	prob_dict = {sentiment_map[i]: f"{prob*100:.2f}%" for i, prob in enumerate(probs)}

	return sentiment, prob_dict

	def analyze_sentiment(reviews):
	"""Perform sentiment analysis on reviews"""
	pros = defaultdict(int)
	cons = defaultdict(int)

	for review in reviews:
	blob = TextBlob(str(review))
	for sentence in blob.sentences:
	polarity = sentence.sentiment.polarity
	words = [word for word, tag in blob.tags
	if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]

	if polarity > 0.3:
	for word in words:
	pros[word] += 1
	elif polarity < -0.3:
	for word in words:
	cons[word] += 1

	pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else []
	cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else []

	return pros_sorted, cons_sorted

	def generate_category_summary(reviews_text):
	"""Generate summary for a set of reviews"""
	reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]

	if not reviews:
	return "Please enter at least one review."

	# Analyze sentiment and get pros/cons
	pros, cons = analyze_sentiment(reviews)

	# Create summary text
	summary_text = f"""
	Review Analysis Summary:

	PROS:
	{', '.join(pros[:5]) if pros else 'No significant positive feedback'}

	CONS:
	{', '.join(cons[:5]) if cons else 'No major complaints'}

	Based on {len(reviews)} reviews analyzed.
	"""

	# Generate concise summary using BART
	if len(summary_text) > 100:
	try:
	generated_summary = summarizer(
	summary_text,
	max_length=150,
	min_length=50,
	do_sample=False,
	truncation=True
	)[0]['summary_text']
	except Exception as e:
	generated_summary = f"Error generating summary: {str(e)}"
	else:
	generated_summary = summary_text

	return generated_summary

	def analyze_reviews(reviews_text):
	# Original sentiment analysis
	df, plot_html = analyze_reviews_sentiment(reviews_text)

	# Generate summary
	summary = generate_category_summary(reviews_text)

	return df, plot_html, summary

	def analyze_reviews_sentiment(reviews_text):
	reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]

	if not reviews:
	return "Please enter at least one review.", None

	results = []
	for review in reviews:
	sentiment, probs = predict_sentiment(review)
	results.append({
	'Review': review,
	'Sentiment': sentiment,
	'Confidence': probs
	})

	df = pd.DataFrame(results)

	plt.figure(figsize=(10, 6))
	sentiment_counts = df['Sentiment'].value_counts()
	plt.bar(sentiment_counts.index, sentiment_counts.values)
	plt.title('Sentiment Distribution')
	plt.xlabel('Sentiment')
	plt.ylabel('Count')

	buf = io.BytesIO()
	plt.savefig(buf, format='png')
	buf.seek(0)
	plot_base64 = base64.b64encode(buf.read()).decode('utf-8')
	plt.close()

	return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'

	def create_interface():
	# Get initial summary
	initial_summary = get_initial_summary()

	with gr.Blocks() as demo:
	gr.Markdown("# Review Analysis System")

	with gr.Tab("Review Analysis"):
	# Add initial dataset summary
	gr.Markdown("## Dataset Overview")
	gr.Markdown(initial_summary)

	gr.Markdown("## Analyze New Reviews")
	reviews_input = gr.Textbox(
	label="Enter reviews (one per line)",
	placeholder="Enter product reviews here...",
	lines=5
	)
	analyze_button = gr.Button("Analyze Reviews")

	with gr.Row():
	with gr.Column():
	sentiment_output = gr.Dataframe(
	label="Sentiment Analysis Results"
	)
	plot_output = gr.HTML(label="Sentiment Distribution")

	with gr.Column():
	summary_output = gr.Textbox(
	label="Review Summary",
	lines=5
	)

	analyze_button.click(
	analyze_reviews,
	inputs=[reviews_input],
	outputs=[sentiment_output, plot_output, summary_output]
	)

	return demo

	# Create and launch the interface
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()