Spaces:

mfoud444
/

oop

Running

oop / app.py

Mohammed Foud

first commit

31f3e54 4 months ago

6.02 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
	import io
	import base64
	from textblob import TextBlob
	from collections import defaultdict
	from tabulate import tabulate
	from transformers import pipeline

	# Load the model and tokenizer
	model_path = "./final_model"
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	model = AutoModelForSequenceClassification.from_pretrained(model_path)

	# Initialize the summarizer
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	def predict_sentiment(text):
	# Preprocess text
	text = text.lower()

	# Tokenize
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

	# Get prediction
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	probabilities = torch.nn.functional.softmax(logits, dim=-1)
	predicted_class = torch.argmax(probabilities, dim=-1).item()

	# Map class to sentiment
	sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
	sentiment = sentiment_map[predicted_class]

	# Get probabilities
	probs = probabilities[0].tolist()
	prob_dict = {sentiment_map[i]: f"{prob*100:.2f}%" for i, prob in enumerate(probs)}

	return sentiment, prob_dict

	def analyze_sentiment(reviews):
	"""Perform sentiment analysis on reviews"""
	pros = defaultdict(int)
	cons = defaultdict(int)

	for review in reviews:
	blob = TextBlob(str(review))
	for sentence in blob.sentences:
	polarity = sentence.sentiment.polarity
	words = [word for word, tag in blob.tags
	if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]

	if polarity > 0.3: # Positive
	for word in words:
	pros[word] += 1
	elif polarity < -0.3: # Negative
	for word in words:
	cons[word] += 1

	pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else []
	cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else []

	return pros_sorted, cons_sorted

	def generate_category_summary(reviews_text):
	"""Generate summary for a set of reviews"""
	reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]

	if not reviews:
	return "Please enter at least one review."

	# Analyze sentiment and get pros/cons
	pros, cons = analyze_sentiment(reviews)

	# Create summary text
	summary_text = f"""
	Review Analysis Summary:

	PROS:
	{', '.join(pros[:5]) if pros else 'No significant positive feedback'}

	CONS:
	{', '.join(cons[:5]) if cons else 'No major complaints'}

	Based on {len(reviews)} reviews analyzed.
	"""

	# Generate concise summary using BART
	if len(summary_text) > 100:
	try:
	generated_summary = summarizer(
	summary_text,
	max_length=150,
	min_length=50,
	do_sample=False,
	truncation=True
	)[0]['summary_text']
	except Exception as e:
	generated_summary = f"Error generating summary: {str(e)}"
	else:
	generated_summary = summary_text

	return generated_summary

	def analyze_reviews(reviews_text):
	# Original sentiment analysis
	df, plot_html = analyze_reviews_sentiment(reviews_text)

	# Generate summary
	summary = generate_category_summary(reviews_text)

	return df, plot_html, summary

	# Rename original analyze_reviews to analyze_reviews_sentiment
	def analyze_reviews_sentiment(reviews_text):
	# Original implementation
	reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]

	if not reviews:
	return "Please enter at least one review.", None

	results = []
	for review in reviews:
	sentiment, probs = predict_sentiment(review)
	results.append({
	'Review': review,
	'Sentiment': sentiment,
	'Confidence': probs
	})

	df = pd.DataFrame(results)

	plt.figure(figsize=(10, 6))
	sentiment_counts = df['Sentiment'].value_counts()
	plt.bar(sentiment_counts.index, sentiment_counts.values)
	plt.title('Sentiment Distribution')
	plt.xlabel('Sentiment')
	plt.ylabel('Count')

	buf = io.BytesIO()
	plt.savefig(buf, format='png')
	buf.seek(0)
	plot_base64 = base64.b64encode(buf.read()).decode('utf-8')
	plt.close()

	return df, f'<img src="data:image/png;base64,{plot_base64}" style="max-width:100%;">'

	# Create Gradio interface
	def create_interface():
	with gr.Blocks() as demo:
	gr.Markdown("# Review Analysis System")

	with gr.Tab("Review Analysis"):
	reviews_input = gr.Textbox(
	label="Enter reviews (one per line)",
	placeholder="Enter product reviews here...",
	lines=5
	)
	analyze_button = gr.Button("Analyze Reviews")

	with gr.Row():
	with gr.Column():
	sentiment_output = gr.Dataframe(
	label="Sentiment Analysis Results"
	)
	plot_output = gr.HTML(label="Sentiment Distribution")

	with gr.Column():
	summary_output = gr.Textbox(
	label="Review Summary",
	lines=5
	)

	analyze_button.click(
	analyze_reviews,
	inputs=[reviews_input],
	outputs=[sentiment_output, plot_output, summary_output]
	)

	return demo

	# Create and launch the interface
	demo = create_interface()
	demo.launch()