Spaces:
Sleeping
Sleeping
| import spacy | |
| import pandas as pd | |
| import streamlit as st | |
| from preprocessing import preprocess_reviews | |
| from aspects_extraction import extract_aspects | |
| from clustering import cluster_aspect_terms | |
| import plotly.express as px | |
| import matplotlib.pyplot as plt | |
| defaultCsv = { | |
| 'Serco USB Hub + Sound Card': 'reviews.csv', | |
| 'Honey': 'reviews_honey.csv', | |
| } | |
| st.set_page_config( | |
| page_title="Actionble Insights From Reviews", | |
| layout="wide", | |
| ) | |
| def load_reviews(uploaded_file=None, default_file=None): | |
| if default_file is not None: | |
| reviews = pd.read_csv(default_file) | |
| if uploaded_file is not None: | |
| reviews = pd.read_csv(uploaded_file) | |
| reviews = validate_reviews_dataframe(reviews) | |
| return preprocess_reviews(reviews) | |
| def validate_reviews_dataframe(r): | |
| if 'title' not in r.columns: | |
| raise ValueError("column title is required") | |
| if 'review' not in r.columns: | |
| raise ValueError("column review is required") | |
| if 'rating' not in r.columns: | |
| raise ValueError("column rating is required") | |
| if r['title'].dtype != 'O': | |
| raise ValueError("column title must be string") | |
| if r['review'].dtype != 'O': | |
| raise ValueError("column review must be string") | |
| if r['rating'].dtype != 'float64': | |
| raise ValueError("column rating must be float") | |
| r = r.dropna() | |
| if ((r['rating'] < 0) & (r['rating'] > 5)).any(): | |
| raise ValueError("values in column rating must be between 0 and 5") | |
| return r | |
| def load_model(): | |
| return spacy.load("en_core_web_lg") | |
| def get_aspects(reviews): | |
| nlp = load_model() | |
| return extract_aspects(nlp, reviews) | |
| def cluster_aspects(aspects): | |
| nlp = load_model() | |
| replacements = cluster_aspect_terms(nlp, aspects) | |
| aspects['aspect'] = aspects['aspect'].map(replacements) | |
| return aspects | |
| def get_aspects_with_ratings(aspects, reviews): | |
| aspect_with_ratings = pd.merge(aspects, | |
| reviews[['rating']], | |
| left_on='review_id', | |
| right_index=True) | |
| aspect_with_ratings['review_sentiment'] = pd.cut(aspect_with_ratings['rating'], | |
| bins=[0, 3, 4, 5], | |
| right=True, | |
| labels=['Negative', 'Neutral', 'Positive'] | |
| ) | |
| return aspect_with_ratings | |
| def get_aspect_treemap(aspects): | |
| treemap = px.treemap(aspects.groupby(['aspect', 'opinion']).size().reset_index(), | |
| path=[px.Constant('Aspects'), 'aspect', 'opinion'], | |
| values=0, | |
| ) | |
| treemap.update_layout(margin = dict(t=0, l=0, r=0, b=0)) | |
| return treemap | |
| def plot_pain_points(aspect_with_ratings): | |
| pain_points = (aspect_with_ratings | |
| .query('review_sentiment == "Negative"') | |
| .groupby('aspect') | |
| .size() | |
| .sort_values(ascending=False)[:10] | |
| ) | |
| fig = px.bar(pain_points) | |
| fig.update_layout(margin = dict(t=0, l=0, r=0, b=0)) | |
| fig.update_traces(marker_color='red', showlegend=False) | |
| return fig | |
| def plot_gain_points(aspect_with_ratings): | |
| gain_points = (aspect_with_ratings | |
| .query('review_sentiment == "Positive"') | |
| .groupby('aspect') | |
| .size() | |
| .sort_values(ascending=False)[:10] | |
| ) | |
| fig = px.bar(gain_points) | |
| fig.update_layout(margin = dict(t=0, l=0, r=0, b=0)) | |
| fig.update_traces(marker_color='green', showlegend=False) | |
| return fig | |
| def plot_sentiment_by_aspect(aspect_with_ratings, top=15): | |
| pivot = pd.crosstab( | |
| index=aspect_with_ratings['aspect'], | |
| columns=aspect_with_ratings['review_sentiment'], | |
| margins=True, | |
| ).sort_values(by='All', ascending=False).iloc[1:, :-1] | |
| fig = px.bar(pivot[:top], barmode='group', color_discrete_map={ | |
| 'Positive': 'green', | |
| 'Negative': 'red', | |
| 'Neutral': 'blue', | |
| }) | |
| fig.update_layout(margin = dict(t=0, l=0, r=0, b=0)) | |
| return fig | |
| st.write("## Actionble Insights From Reviews") | |
| st.write(""" | |
| Key to building a successfull product is understanding what users want and what users don't want. | |
| This insight can be useful in serveral ways. | |
| 1. Designing product that users actually want. | |
| 2. Fixing defects in product or addressing users pain points. | |
| 3. Staying ahead of the competition. | |
| There are millions of reviews that people leave on sites like amazon, tripadvisor etc. | |
| To gain insights from this data, you could either read all the reviews one by one or | |
| let machine analyze these reviews and find main topics that user care about. | |
| """) | |
| st.write("## Extracting Aspect Opinion Pairs") | |
| st.write(""" | |
| Let's say the customer wrote, `The material of the shirt is not soft`. | |
| Here `material` is the `aspect` of shirt and `not soft` is the users `opinion` | |
| about this aspect. The analyzer finds aspect opinion pairs from the reviews. | |
| """) | |
| st.write("### Customer Reviews") | |
| st.write(""" | |
| Dataframe containing reviews of the customer. Title, review, and rating columns are required | |
| """) | |
| st.sidebar.title("Select Reviews File") | |
| default_file = st.sidebar.selectbox( | |
| "Choose Sample File", | |
| defaultCsv.keys(), | |
| ) | |
| if default_file is not None: | |
| default_file = defaultCsv[default_file] | |
| st.sidebar.write("<div style='text-align:center'>or</div>", unsafe_allow_html=True) | |
| uploaded_file = st.sidebar.file_uploader( | |
| 'Choose a CSV File', | |
| type='csv', | |
| ) | |
| st.sidebar.write("CSV with title(string), review(string) and ratings(float 0-5) columns") | |
| try: | |
| reviews = load_reviews(uploaded_file, default_file) | |
| st.write(reviews) | |
| aspects = get_aspects(reviews) | |
| aspects = cluster_aspects(aspects) | |
| aspects_with_ratings = get_aspects_with_ratings(aspects, reviews) | |
| st.write("### Extracted Aspect Opinion Pairs") | |
| st.write(""" | |
| Treemap of aspect opinion pairs extracted from reviews, treemap | |
| is sized according to number of reviews. | |
| """) | |
| st.plotly_chart(get_aspect_treemap(aspects), use_container_width=True) | |
| st.write("### Pain Points And Gain Points") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write('Top Pain Points (by number of -ve reviews)') | |
| st.plotly_chart(plot_pain_points(aspects_with_ratings), use_container_width=True) | |
| with col2: | |
| st.write('Top Gain Points (by number of +ve reviews)') | |
| st.plotly_chart(plot_gain_points(aspects_with_ratings), use_container_width=True) | |
| st.write("### Sentiment for each aspect") | |
| st.write('(0-3 Negative) (4 Neutral) (5 Positive)') | |
| st.plotly_chart(plot_sentiment_by_aspect(aspects_with_ratings), use_container_width=True) | |
| except ValueError as e: | |
| st.error(e) |