Spaces:

shresht8
/

sentiment-analysis-excel

Running

App Files Files Community

sentiment-analysis-excel / app.py

shresht8

new new graphs and summaries

e0b4a17 verified 7 months ago

raw

history blame

10.2 kB

	import gradio as gr
	import pandas as pd
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import plotly.express as px
	import plotly.graph_objects as go
	from collections import defaultdict

	# Load model and tokenizer globally for efficiency
	model_name = "tabularisai/multilingual-sentiment-analysis"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)


	def predict_sentiment(texts):
	"""
	Predict sentiment for a list of texts
	"""
	inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
	with torch.no_grad():
	outputs = model(**inputs)
	probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
	sentiment_map = {
	0: "Very Negative",
	1: "Negative",
	2: "Neutral",
	3: "Positive",
	4: "Very Positive"
	}
	return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]


	def process_single_sheet(df, product_name):
	"""
	Process a single dataframe and return sentiment analysis results
	"""
	if 'Reviews' not in df.columns:
	raise ValueError(f"'Reviews' column not found in sheet/file for {product_name}")

	reviews = df['Reviews'].fillna("")
	sentiments = predict_sentiment(reviews.tolist())
	df['Sentiment'] = sentiments

	# Calculate sentiment distribution
	sentiment_counts = pd.Series(sentiments).value_counts()

	return df, sentiment_counts


	def create_comparison_charts(sentiment_results):
	"""
	Create investment-focused comparison charts for different products
	"""
	# Prepare data for plotting
	plot_data = []
	for product, sentiment_counts in sentiment_results.items():
	# Convert to dictionary and get sum
	sentiment_dict = sentiment_counts.to_dict()
	total = sum(sentiment_dict.values())

	row = {
	'Product': product,
	'Total Reviews': total
	}
	# Calculate percentages for each sentiment
	for sentiment, count in sentiment_dict.items():
	row[sentiment] = (count / total) * 100
	plot_data.append(row)

	df = pd.DataFrame(plot_data)

	# Ensure all sentiment columns exist (in case some products don't have all sentiments)
	for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']:
	if sentiment not in df.columns:
	df[sentiment] = 0

	# Calculate weighted sentiment score (0 to 100)
	sentiment_weights = {
	'Very Negative': 0,
	'Negative': 25,
	'Neutral': 50,
	'Positive': 75,
	'Very Positive': 100
	}

	df['Sentiment Score'] = 0
	for product in df['Product']:
	score = 0
	for sentiment, weight in sentiment_weights.items():
	if sentiment in df.columns:
	score += (df.loc[df['Product'] == product, sentiment].iloc[0] * weight / 100)
	df.loc[df['Product'] == product, 'Sentiment Score'] = round(score, 2)

	# Create sentiment score chart
	score_fig = go.Figure()
	score_fig.add_trace(go.Bar(
	x=df['Product'],
	y=df['Sentiment Score'],
	text=df['Sentiment Score'].round(1),
	textposition='auto',
	marker_color='rgb(65, 105, 225)'
	))
	score_fig.update_layout(
	title='Overall Sentiment Score by Product (0-100)',
	yaxis_title='Weighted Sentiment Score',
	yaxis_range=[0, 100],
	showlegend=False
	)

	# Calculate Positive-Negative Ratios
	df['Positive Ratio'] = df[['Positive', 'Very Positive']].sum(axis=1)
	df['Negative Ratio'] = df[['Negative', 'Very Negative']].sum(axis=1)

	# Create Positive-Negative ratio chart
	ratio_fig = go.Figure()
	ratio_fig.add_trace(go.Bar(
	name='Positive',
	x=df['Product'],
	y=df['Positive Ratio'],
	marker_color='rgb(50, 205, 50)'
	))
	ratio_fig.add_trace(go.Bar(
	name='Negative',
	x=df['Product'],
	y=df['Negative Ratio'],
	marker_color='rgb(220, 20, 60)'
	))
	ratio_fig.update_layout(
	barmode='group',
	title='Positive vs Negative Sentiment Ratio by Product',
	yaxis_title='Percentage (%)'
	)

	# Create summary table with investment-relevant metrics
	summary_df = pd.DataFrame({
	'Product': df['Product'],
	'Total Reviews': df['Total Reviews'],
	'Sentiment Score (0-100)': df['Sentiment Score'],
	'Positive Ratio (%)': df['Positive Ratio'].round(2),
	'Negative Ratio (%)': df['Negative Ratio'].round(2),
	'Neutral Ratio (%)': df['Neutral'].round(2)
	})

	# Calculate Confidence Score (avoiding division by zero)
	summary_df['Confidence Score'] = ((summary_df['Positive Ratio (%)'] + summary_df['Negative Ratio (%)']) /
	summary_df['Neutral Ratio (%)'].replace(0, 0.001)).round(2)

	# Sort by Sentiment Score for easy comparison
	summary_df = summary_df.sort_values('Sentiment Score (0-100)', ascending=False)

	return score_fig, ratio_fig, summary_df


	def process_file(file_obj):
	"""
	Process the input file and add sentiment analysis results
	"""
	try:
	file_path = file_obj.name
	sentiment_results = defaultdict(pd.Series)
	all_processed_dfs = {}

	if file_path.endswith('.csv'):
	df = pd.read_csv(file_path)
	product_name = "Product" # Default name for CSV
	processed_df, sentiment_counts = process_single_sheet(df, product_name)
	all_processed_dfs[product_name] = processed_df
	sentiment_results[product_name] = sentiment_counts

	elif file_path.endswith(('.xlsx', '.xls')):
	excel_file = pd.ExcelFile(file_path)
	for sheet_name in excel_file.sheet_names:
	df = pd.read_excel(file_path, sheet_name=sheet_name)
	processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
	all_processed_dfs[sheet_name] = processed_df
	sentiment_results[sheet_name] = sentiment_counts
	else:
	raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")

	# Create visualizations
	distribution_plot, summary_table = create_comparison_charts(sentiment_results)

	# Save results
	output_path = "sentiment_analysis_results.xlsx"
	with pd.ExcelWriter(output_path) as writer:
	for sheet_name, df in all_processed_dfs.items():
	df.to_excel(writer, sheet_name=sheet_name, index=False)
	summary_table.to_excel(writer, sheet_name='Summary', index=False)

	return (
	distribution_plot,
	summary_table,
	output_path
	)

	except Exception as e:
	raise gr.Error(str(e))


	# Create Gradio interface
	# In the Gradio interface section
	def create_comparison_charts(sentiment_results):
	"""
	Create simplified, investment-focused comparison charts
	"""
	# Prepare data
	plot_data = []
	for product, sentiment_counts in sentiment_results.items():
	sentiment_dict = sentiment_counts.to_dict()
	total = sum(sentiment_dict.values())

	row = {
	'Product': product,
	'Total Reviews': total
	}
	for sentiment, count in sentiment_dict.items():
	row[sentiment] = (count / total) * 100
	plot_data.append(row)

	df = pd.DataFrame(plot_data)

	# Ensure all sentiment columns exist
	for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']:
	if sentiment not in df.columns:
	df[sentiment] = 0

	# 1. Simple Stacked Bar Chart showing sentiment distribution
	stack_fig = go.Figure()
	sentiments = ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative']
	colors = ['rgb(39, 174, 96)', 'rgb(46, 204, 113)',
	'rgb(241, 196, 15)', 'rgb(231, 76, 60)',
	'rgb(192, 57, 43)']

	for sentiment, color in zip(sentiments, colors):
	stack_fig.add_trace(go.Bar(
	name=sentiment,
	x=df['Product'],
	y=df[sentiment],
	marker_color=color
	))

	stack_fig.update_layout(
	barmode='stack',
	title='Sentiment Distribution by Product',
	yaxis_title='Percentage (%)'
	)

	# 2. Aggregated Sentiment Ratios for Quick Comparison
	df['Positive_Total'] = df[['Positive', 'Very Positive']].sum(axis=1)
	df['Negative_Total'] = df[['Negative', 'Very Negative']].sum(axis=1)

	summary_df = pd.DataFrame({
	'Product': df['Product'],
	'Total Reviews': df['Total Reviews'],
	'Positive (%)': df['Positive_Total'].round(2),
	'Neutral (%)': df['Neutral'].round(2),
	'Negative (%)': df['Negative_Total'].round(2)
	})

	# Sort by Positive percentage for easy comparison
	summary_df = summary_df.sort_values('Positive (%)', ascending=False)

	return stack_fig, summary_df


	# Update the Gradio interface
	with gr.Blocks() as interface:
	gr.Markdown("# Product Review Sentiment Analysis")

	gr.Markdown("""
	### Quick Guide
	1. Excel File (Multiple Products):
	- Create separate sheets for each product
	- Name sheets with product/company names
	- Include "Reviews" column in each sheet

	2. CSV File (Single Product):
	- Include "Reviews" column

	Upload your file and click Analyze to get started.
	""")

	with gr.Row():
	file_input = gr.File(
	label="Upload File (CSV or Excel)",
	file_types=[".csv", ".xlsx", ".xls"]
	)

	with gr.Row():
	analyze_btn = gr.Button("Analyze Sentiments")

	with gr.Row():
	distribution_plot = gr.Plot(label="Sentiment Distribution")

	with gr.Row():
	summary_table = gr.Dataframe(label="Summary Metrics")

	with gr.Row():
	output_file = gr.File(label="Download Full Report")

	analyze_btn.click(
	fn=process_file,
	inputs=[file_input],
	outputs=[distribution_plot, summary_table, output_file]
	)

	# launch interface
	interface.launch()