|
import gradio as gr |
|
import pandas as pd |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from collections import defaultdict |
|
|
|
|
|
model_name = "tabularisai/multilingual-sentiment-analysis" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
|
|
def predict_sentiment(texts): |
|
""" |
|
Predict sentiment for a list of texts |
|
""" |
|
inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
sentiment_map = { |
|
0: "Very Negative", |
|
1: "Negative", |
|
2: "Neutral", |
|
3: "Positive", |
|
4: "Very Positive" |
|
} |
|
return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()] |
|
|
|
|
|
def process_single_sheet(df, product_name): |
|
""" |
|
Process a single dataframe and return sentiment analysis results |
|
""" |
|
if 'Reviews' not in df.columns: |
|
raise ValueError(f"'Reviews' column not found in sheet/file for {product_name}") |
|
|
|
reviews = df['Reviews'].fillna("") |
|
sentiments = predict_sentiment(reviews.tolist()) |
|
df['Sentiment'] = sentiments |
|
|
|
|
|
sentiment_counts = pd.Series(sentiments).value_counts() |
|
|
|
return df, sentiment_counts |
|
|
|
|
|
def create_comparison_charts(sentiment_results): |
|
""" |
|
Create investment-focused comparison charts for different products |
|
""" |
|
|
|
plot_data = [] |
|
for product, sentiment_counts in sentiment_results.items(): |
|
|
|
sentiment_dict = sentiment_counts.to_dict() |
|
total = sum(sentiment_dict.values()) |
|
|
|
row = { |
|
'Product': product, |
|
'Total Reviews': total |
|
} |
|
|
|
for sentiment, count in sentiment_dict.items(): |
|
row[sentiment] = (count / total) * 100 |
|
plot_data.append(row) |
|
|
|
df = pd.DataFrame(plot_data) |
|
|
|
|
|
for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']: |
|
if sentiment not in df.columns: |
|
df[sentiment] = 0 |
|
|
|
|
|
sentiment_weights = { |
|
'Very Negative': 0, |
|
'Negative': 25, |
|
'Neutral': 50, |
|
'Positive': 75, |
|
'Very Positive': 100 |
|
} |
|
|
|
df['Sentiment Score'] = 0 |
|
for product in df['Product']: |
|
score = 0 |
|
for sentiment, weight in sentiment_weights.items(): |
|
if sentiment in df.columns: |
|
score += (df.loc[df['Product'] == product, sentiment].iloc[0] * weight / 100) |
|
df.loc[df['Product'] == product, 'Sentiment Score'] = round(score, 2) |
|
|
|
|
|
score_fig = go.Figure() |
|
score_fig.add_trace(go.Bar( |
|
x=df['Product'], |
|
y=df['Sentiment Score'], |
|
text=df['Sentiment Score'].round(1), |
|
textposition='auto', |
|
marker_color='rgb(65, 105, 225)' |
|
)) |
|
score_fig.update_layout( |
|
title='Overall Sentiment Score by Product (0-100)', |
|
yaxis_title='Weighted Sentiment Score', |
|
yaxis_range=[0, 100], |
|
showlegend=False |
|
) |
|
|
|
|
|
df['Positive Ratio'] = df[['Positive', 'Very Positive']].sum(axis=1) |
|
df['Negative Ratio'] = df[['Negative', 'Very Negative']].sum(axis=1) |
|
|
|
|
|
ratio_fig = go.Figure() |
|
ratio_fig.add_trace(go.Bar( |
|
name='Positive', |
|
x=df['Product'], |
|
y=df['Positive Ratio'], |
|
marker_color='rgb(50, 205, 50)' |
|
)) |
|
ratio_fig.add_trace(go.Bar( |
|
name='Negative', |
|
x=df['Product'], |
|
y=df['Negative Ratio'], |
|
marker_color='rgb(220, 20, 60)' |
|
)) |
|
ratio_fig.update_layout( |
|
barmode='group', |
|
title='Positive vs Negative Sentiment Ratio by Product', |
|
yaxis_title='Percentage (%)' |
|
) |
|
|
|
|
|
summary_df = pd.DataFrame({ |
|
'Product': df['Product'], |
|
'Total Reviews': df['Total Reviews'], |
|
'Sentiment Score (0-100)': df['Sentiment Score'], |
|
'Positive Ratio (%)': df['Positive Ratio'].round(2), |
|
'Negative Ratio (%)': df['Negative Ratio'].round(2), |
|
'Neutral Ratio (%)': df['Neutral'].round(2) |
|
}) |
|
|
|
|
|
summary_df['Confidence Score'] = ((summary_df['Positive Ratio (%)'] + summary_df['Negative Ratio (%)']) / |
|
summary_df['Neutral Ratio (%)'].replace(0, 0.001)).round(2) |
|
|
|
|
|
summary_df = summary_df.sort_values('Sentiment Score (0-100)', ascending=False) |
|
|
|
return score_fig, ratio_fig, summary_df |
|
|
|
|
|
def process_file(file_obj): |
|
""" |
|
Process the input file and add sentiment analysis results |
|
""" |
|
try: |
|
file_path = file_obj.name |
|
sentiment_results = defaultdict(pd.Series) |
|
all_processed_dfs = {} |
|
|
|
if file_path.endswith('.csv'): |
|
df = pd.read_csv(file_path) |
|
product_name = "Product" |
|
processed_df, sentiment_counts = process_single_sheet(df, product_name) |
|
all_processed_dfs[product_name] = processed_df |
|
sentiment_results[product_name] = sentiment_counts |
|
|
|
elif file_path.endswith(('.xlsx', '.xls')): |
|
excel_file = pd.ExcelFile(file_path) |
|
for sheet_name in excel_file.sheet_names: |
|
df = pd.read_excel(file_path, sheet_name=sheet_name) |
|
processed_df, sentiment_counts = process_single_sheet(df, sheet_name) |
|
all_processed_dfs[sheet_name] = processed_df |
|
sentiment_results[sheet_name] = sentiment_counts |
|
else: |
|
raise ValueError("Unsupported file format. Please upload a CSV or Excel file.") |
|
|
|
|
|
distribution_plot, summary_table = create_comparison_charts(sentiment_results) |
|
|
|
|
|
output_path = "sentiment_analysis_results.xlsx" |
|
with pd.ExcelWriter(output_path) as writer: |
|
for sheet_name, df in all_processed_dfs.items(): |
|
df.to_excel(writer, sheet_name=sheet_name, index=False) |
|
summary_table.to_excel(writer, sheet_name='Summary', index=False) |
|
|
|
return ( |
|
distribution_plot, |
|
summary_table, |
|
output_path |
|
) |
|
|
|
except Exception as e: |
|
raise gr.Error(str(e)) |
|
|
|
|
|
|
|
|
|
def create_comparison_charts(sentiment_results): |
|
""" |
|
Create simplified, investment-focused comparison charts |
|
""" |
|
|
|
plot_data = [] |
|
for product, sentiment_counts in sentiment_results.items(): |
|
sentiment_dict = sentiment_counts.to_dict() |
|
total = sum(sentiment_dict.values()) |
|
|
|
row = { |
|
'Product': product, |
|
'Total Reviews': total |
|
} |
|
for sentiment, count in sentiment_dict.items(): |
|
row[sentiment] = (count / total) * 100 |
|
plot_data.append(row) |
|
|
|
df = pd.DataFrame(plot_data) |
|
|
|
|
|
for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']: |
|
if sentiment not in df.columns: |
|
df[sentiment] = 0 |
|
|
|
|
|
stack_fig = go.Figure() |
|
sentiments = ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative'] |
|
colors = ['rgb(39, 174, 96)', 'rgb(46, 204, 113)', |
|
'rgb(241, 196, 15)', 'rgb(231, 76, 60)', |
|
'rgb(192, 57, 43)'] |
|
|
|
for sentiment, color in zip(sentiments, colors): |
|
stack_fig.add_trace(go.Bar( |
|
name=sentiment, |
|
x=df['Product'], |
|
y=df[sentiment], |
|
marker_color=color |
|
)) |
|
|
|
stack_fig.update_layout( |
|
barmode='stack', |
|
title='Sentiment Distribution by Product', |
|
yaxis_title='Percentage (%)' |
|
) |
|
|
|
|
|
df['Positive_Total'] = df[['Positive', 'Very Positive']].sum(axis=1) |
|
df['Negative_Total'] = df[['Negative', 'Very Negative']].sum(axis=1) |
|
|
|
summary_df = pd.DataFrame({ |
|
'Product': df['Product'], |
|
'Total Reviews': df['Total Reviews'], |
|
'Positive (%)': df['Positive_Total'].round(2), |
|
'Neutral (%)': df['Neutral'].round(2), |
|
'Negative (%)': df['Negative_Total'].round(2) |
|
}) |
|
|
|
|
|
summary_df = summary_df.sort_values('Positive (%)', ascending=False) |
|
|
|
return stack_fig, summary_df |
|
|
|
|
|
|
|
with gr.Blocks() as interface: |
|
gr.Markdown("# Product Review Sentiment Analysis") |
|
|
|
gr.Markdown(""" |
|
### Quick Guide |
|
1. **Excel File (Multiple Products)**: |
|
- Create separate sheets for each product |
|
- Name sheets with product/company names |
|
- Include "Reviews" column in each sheet |
|
|
|
2. **CSV File (Single Product)**: |
|
- Include "Reviews" column |
|
|
|
Upload your file and click Analyze to get started. |
|
""") |
|
|
|
with gr.Row(): |
|
file_input = gr.File( |
|
label="Upload File (CSV or Excel)", |
|
file_types=[".csv", ".xlsx", ".xls"] |
|
) |
|
|
|
with gr.Row(): |
|
analyze_btn = gr.Button("Analyze Sentiments") |
|
|
|
with gr.Row(): |
|
distribution_plot = gr.Plot(label="Sentiment Distribution") |
|
|
|
with gr.Row(): |
|
summary_table = gr.Dataframe(label="Summary Metrics") |
|
|
|
with gr.Row(): |
|
output_file = gr.File(label="Download Full Report") |
|
|
|
analyze_btn.click( |
|
fn=process_file, |
|
inputs=[file_input], |
|
outputs=[distribution_plot, summary_table, output_file] |
|
) |
|
|
|
|
|
interface.launch() |
|
|