|
import gradio as gr |
|
import pandas as pd |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from collections import defaultdict |
|
|
|
|
|
model_name = "tabularisai/multilingual-sentiment-analysis" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
|
|
SENTIMENT_WEIGHTS = { |
|
0: 0.0, |
|
1: 0.25, |
|
2: 0.5, |
|
3: 0.75, |
|
4: 1.0 |
|
} |
|
|
|
|
|
def predict_sentiment_with_scores(texts): |
|
""" |
|
Predict sentiment for a list of texts and return both class labels and sentiment scores |
|
""" |
|
inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
|
|
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
|
|
|
|
sentiment_map = { |
|
0: "Very Negative", |
|
1: "Negative", |
|
2: "Neutral", |
|
3: "Positive", |
|
4: "Very Positive" |
|
} |
|
predicted_classes = [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()] |
|
|
|
|
|
sentiment_scores = [] |
|
for prob in probabilities: |
|
|
|
score = sum(prob[i].item() * SENTIMENT_WEIGHTS[i] for i in range(len(prob))) |
|
|
|
sentiment_scores.append(round(score * 100, 2)) |
|
|
|
return predicted_classes, sentiment_scores |
|
|
|
|
|
def process_single_sheet(df, product_name): |
|
""" |
|
Process a single dataframe and return sentiment analysis results |
|
""" |
|
if 'Reviews' not in df.columns: |
|
raise ValueError(f"'Reviews' column not found in sheet/file for {product_name}") |
|
|
|
reviews = df['Reviews'].fillna("") |
|
sentiments, scores = predict_sentiment_with_scores(reviews.tolist()) |
|
|
|
df['Sentiment'] = sentiments |
|
df['Sentiment_Score'] = scores |
|
|
|
|
|
sentiment_counts = pd.Series(sentiments).value_counts() |
|
avg_sentiment_score = round(sum(scores) / len(scores), 2) |
|
|
|
return df, sentiment_counts, avg_sentiment_score |
|
|
|
|
|
def create_comparison_charts(sentiment_results, avg_scores): |
|
""" |
|
Create investment-focused comparison charts including the new sentiment score visualization |
|
""" |
|
|
|
plot_data = [] |
|
for product, sentiment_counts in sentiment_results.items(): |
|
sentiment_dict = sentiment_counts.to_dict() |
|
total = sum(sentiment_dict.values()) |
|
|
|
row = { |
|
'Product': product, |
|
'Total Reviews': total |
|
} |
|
|
|
for sentiment, count in sentiment_dict.items(): |
|
row[sentiment] = (count / total) * 100 |
|
plot_data.append(row) |
|
|
|
df = pd.DataFrame(plot_data) |
|
|
|
|
|
sentiments = ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative'] |
|
for sentiment in sentiments: |
|
if sentiment not in df.columns: |
|
df[sentiment] = 0 |
|
|
|
|
|
sentiment_weights = { |
|
'Very Negative': 0, |
|
'Negative': 25, |
|
'Neutral': 50, |
|
'Positive': 75, |
|
'Very Positive': 100 |
|
} |
|
|
|
|
|
distribution_fig = go.Figure() |
|
sentiments = ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative'] |
|
colors = ['rgb(39, 174, 96)', 'rgb(46, 204, 113)', |
|
'rgb(241, 196, 15)', 'rgb(231, 76, 60)', |
|
'rgb(192, 57, 43)'] |
|
|
|
for sentiment, color in zip(sentiments, colors): |
|
distribution_fig.add_trace(go.Bar( |
|
name=sentiment, |
|
x=df['Product'], |
|
y=df[sentiment], |
|
marker_color=color |
|
)) |
|
|
|
distribution_fig.update_layout( |
|
barmode='stack', |
|
title='Sentiment Distribution by Product', |
|
yaxis_title='Percentage (%)', |
|
showlegend=True |
|
) |
|
|
|
|
|
df['Positive Ratio'] = df[['Positive', 'Very Positive']].sum(axis=1) |
|
df['Negative Ratio'] = df[['Negative', 'Very Negative']].sum(axis=1) |
|
|
|
|
|
ratio_fig = go.Figure() |
|
ratio_fig.add_trace(go.Bar( |
|
name='Positive', |
|
x=df['Product'], |
|
y=df['Positive Ratio'], |
|
marker_color='rgb(50, 205, 50)' |
|
)) |
|
ratio_fig.add_trace(go.Bar( |
|
name='Negative', |
|
x=df['Product'], |
|
y=df['Negative Ratio'], |
|
marker_color='rgb(220, 20, 60)' |
|
)) |
|
ratio_fig.update_layout( |
|
barmode='group', |
|
title='Positive vs Negative Sentiment Ratio by Product', |
|
yaxis_title='Percentage (%)' |
|
) |
|
|
|
|
|
summary_data = { |
|
'Product': df['Product'].tolist(), |
|
'Total Reviews': df['Total Reviews'].tolist(), |
|
'Positive Ratio (%)': df['Positive Ratio'].round(2).tolist(), |
|
'Negative Ratio (%)': df['Negative Ratio'].round(2).tolist(), |
|
'Neutral Ratio (%)': df['Neutral'].round(2).tolist(), |
|
'Weighted Sentiment Score': [avg_scores[prod] for prod in df['Product']] |
|
} |
|
summary_df = pd.DataFrame(summary_data) |
|
|
|
|
|
score_comparison_fig = go.Figure() |
|
score_comparison_fig.add_trace(go.Bar( |
|
x=summary_df['Product'], |
|
y=summary_df['Weighted Sentiment Score'], |
|
text=[f"{score:.1f}" for score in summary_df['Weighted Sentiment Score']], |
|
textposition='auto', |
|
marker_color='rgb(65, 105, 225)', |
|
name='Sentiment Score' |
|
)) |
|
score_comparison_fig.update_layout( |
|
title='Weighted Sentiment Scores by Product (0-100)', |
|
yaxis_title='Sentiment Score', |
|
yaxis_range=[0, 100], |
|
showlegend=False, |
|
bargap=0.3, |
|
plot_bgcolor='white' |
|
) |
|
|
|
return score_comparison_fig, distribution_fig, ratio_fig, summary_df |
|
|
|
products = list(avg_scores.keys()) |
|
scores = list(avg_scores.values()) |
|
|
|
|
|
score_comparison_fig.add_trace(go.Bar( |
|
x=products, |
|
y=scores, |
|
text=[f"{score:.1f}" for score in scores], |
|
textposition='auto', |
|
marker_color='rgb(65, 105, 225)', |
|
name='Sentiment Score' |
|
)) |
|
|
|
|
|
score_comparison_fig.update_layout( |
|
title='Weighted Sentiment Scores by Product (0-100)', |
|
yaxis_title='Sentiment Score', |
|
yaxis_range=[0, 100], |
|
showlegend=False, |
|
bargap=0.3, |
|
plot_bgcolor='white' |
|
) |
|
|
|
|
|
summary_df['Weighted Sentiment Score'] = [avg_scores[prod] for prod in summary_df['Product']] |
|
|
|
|
|
distribution_fig = go.Figure() |
|
colors = ['rgb(39, 174, 96)', 'rgb(46, 204, 113)', |
|
'rgb(241, 196, 15)', 'rgb(231, 76, 60)', |
|
'rgb(192, 57, 43)'] |
|
|
|
|
|
for sentiment, color in zip(sentiments, colors): |
|
distribution_fig.add_trace(go.Bar( |
|
name=sentiment, |
|
x=df['Product'], |
|
y=df[sentiment], |
|
marker_color=color |
|
)) |
|
|
|
distribution_fig.update_layout( |
|
barmode='stack', |
|
title='Sentiment Distribution by Product', |
|
yaxis_title='Percentage (%)', |
|
showlegend=True |
|
) |
|
|
|
return score_comparison_fig, distribution_fig, summary_df, output_path |
|
|
|
|
|
def process_file(file_obj): |
|
""" |
|
Process the input file and add sentiment analysis results |
|
""" |
|
try: |
|
file_path = file_obj.name |
|
sentiment_results = defaultdict(pd.Series) |
|
avg_sentiment_scores = {} |
|
all_processed_dfs = {} |
|
|
|
if file_path.endswith('.csv'): |
|
df = pd.read_csv(file_path) |
|
product_name = "Product" |
|
processed_df, sentiment_counts, avg_score = process_single_sheet(df, product_name) |
|
all_processed_dfs[product_name] = processed_df |
|
sentiment_results[product_name] = sentiment_counts |
|
avg_sentiment_scores[product_name] = avg_score |
|
|
|
elif file_path.endswith(('.xlsx', '.xls')): |
|
excel_file = pd.ExcelFile(file_path) |
|
for sheet_name in excel_file.sheet_names: |
|
df = pd.read_excel(file_path, sheet_name=sheet_name) |
|
processed_df, sentiment_counts, avg_score = process_single_sheet(df, sheet_name) |
|
all_processed_dfs[sheet_name] = processed_df |
|
sentiment_results[sheet_name] = sentiment_counts |
|
avg_sentiment_scores[sheet_name] = avg_score |
|
else: |
|
raise ValueError("Unsupported file format. Please upload a CSV or Excel file.") |
|
|
|
|
|
score_comparison_fig, distribution_fig, ratio_fig, summary_df = create_comparison_charts( |
|
sentiment_results, avg_sentiment_scores |
|
) |
|
|
|
|
|
output_path = "sentiment_analysis_results.xlsx" |
|
with pd.ExcelWriter(output_path) as writer: |
|
for sheet_name, df in all_processed_dfs.items(): |
|
df.to_excel(writer, sheet_name=sheet_name, index=False) |
|
if isinstance(summary_df, pd.DataFrame): |
|
summary_df.to_excel(writer, sheet_name='Summary', index=False) |
|
|
|
|
|
output_path = "sentiment_analysis_results.xlsx" |
|
with pd.ExcelWriter(output_path) as writer: |
|
|
|
for sheet_name, df in all_processed_dfs.items(): |
|
df.to_excel(writer, sheet_name=sheet_name, index=False) |
|
|
|
|
|
if isinstance(summary_df, pd.DataFrame): |
|
summary_df.to_excel(writer, sheet_name='Summary', index=False) |
|
|
|
return score_comparison_fig, distribution_fig, summary_df, output_path |
|
|
|
except Exception as e: |
|
raise gr.Error(str(e)) |
|
|
|
|
|
|
|
with gr.Blocks() as interface: |
|
gr.Markdown("# Product Review Sentiment Analysis") |
|
|
|
gr.Markdown(""" |
|
### Quick Guide |
|
1. **Excel File (Multiple Products)**: |
|
- Create separate sheets for each product |
|
- Name sheets with product/company names |
|
- Include "Reviews" column in each sheet |
|
|
|
2. **CSV File (Single Product)**: |
|
- Include "Reviews" column |
|
|
|
Upload your file and click Analyze to get started. |
|
""") |
|
|
|
with gr.Row(): |
|
file_input = gr.File( |
|
label="Upload File (CSV or Excel)", |
|
file_types=[".csv", ".xlsx", ".xls"] |
|
) |
|
|
|
with gr.Row(): |
|
analyze_btn = gr.Button("Analyze Sentiments") |
|
|
|
with gr.Row(): |
|
sentiment_score_plot = gr.Plot(label="Weighted Sentiment Scores") |
|
|
|
with gr.Row(): |
|
distribution_plot = gr.Plot(label="Sentiment Distribution") |
|
|
|
with gr.Row(): |
|
summary_table = gr.Dataframe(label="Summary Metrics") |
|
|
|
with gr.Row(): |
|
output_file = gr.File(label="Download Full Report") |
|
|
|
analyze_btn.click( |
|
fn=process_file, |
|
inputs=[file_input], |
|
outputs=[sentiment_score_plot, distribution_plot, summary_table, output_file] |
|
) |
|
|
|
|
|
interface.launch() |