Chat_Analyzer / app.py
panchadip's picture
Update app.py
ea85d87 verified
raw
history blame
4.55 kB
import streamlit as st
import preprocessor, helper
import matplotlib.pyplot as plt
import seaborn as sns
import chardet
st.sidebar.title("Whatsapp Chat Analyzer")
uploaded_file = st.sidebar.file_uploader("Choose a file")
if uploaded_file is not None:
bytes_data = uploaded_file.read()
# Detect the encoding
result = chardet.detect(bytes_data)
encoding = result['encoding']
# Fallback to utf-8 if encoding is None
if encoding is None:
encoding = 'utf-8' # or 'latin1' if utf-8 fails
# Decode with the detected or fallback encoding
try:
data = bytes_data.decode(encoding)
except UnicodeDecodeError:
# If utf-8 decoding fails, fallback to latin1
data = bytes_data.decode('latin1')
df = preprocessor.preprocess(data)
# Fetch unique users
user_list = df['user'].unique().tolist()
if 'group_notification' in user_list:
user_list.remove('group_notification')
user_list.sort()
user_list.insert(0, "Overall")
selected_user = st.sidebar.selectbox("Show analysis wrt", user_list)
if st.sidebar.button("Show Analysis"):
# Stats Area
num_messages, words, num_media_messages, num_links = helper.fetch_stats(selected_user, df)
st.title("Top Statistics")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.header("Total Messages")
st.title(num_messages)
with col2:
st.header("Total Words")
st.title(words)
with col3:
st.header("Media Shared")
st.title(num_media_messages)
with col4:
st.header("Links Shared")
st.title(num_links)
# Monthly timeline
st.title("Monthly Timeline")
timeline = helper.monthly_timeline(selected_user, df)
fig, ax = plt.subplots()
ax.plot(timeline['time'], timeline['message'], color='green')
plt.xticks(rotation='vertical')
st.pyplot(fig)
# Daily timeline
st.title("Daily Timeline")
daily_timeline = helper.daily_timeline(selected_user, df)
fig, ax = plt.subplots()
ax.plot(daily_timeline['only_date'], daily_timeline['message'], color='black')
plt.xticks(rotation='vertical')
st.pyplot(fig)
# Activity map
st.title('Activity Map')
col1, col2 = st.columns(2)
with col1:
st.header("Most busy day")
busy_day = helper.week_activity_map(selected_user, df)
fig, ax = plt.subplots()
ax.bar(busy_day.index, busy_day.values, color='purple')
plt.xticks(rotation='vertical')
st.pyplot(fig)
with col2:
st.header("Most busy month")
busy_month = helper.month_activity_map(selected_user, df)
fig, ax = plt.subplots()
ax.bar(busy_month.index, busy_month.values, color='orange')
plt.xticks(rotation='vertical')
st.pyplot(fig)
st.title("Weekly Activity Map")
user_heatmap = helper.activity_heatmap(selected_user, df)
fig, ax = plt.subplots()
ax = sns.heatmap(user_heatmap)
st.pyplot(fig)
# Finding the busiest users in the group (Group level)
if selected_user == 'Overall':
st.title('Most Busy Users')
x, new_df = helper.most_busy_users(df)
fig, ax = plt.subplots()
col1, col2 = st.columns(2)
with col1:
ax.bar(x.index, x.values, color='red')
plt.xticks(rotation='vertical')
st.pyplot(fig)
with col2:
st.dataframe(new_df)
# WordCloud
st.title("Wordcloud")
df_wc = helper.create_wordcloud(selected_user, df)
fig, ax = plt.subplots()
ax.imshow(df_wc)
st.pyplot(fig)
# Most common words
most_common_df = helper.most_common_words(selected_user, df)
fig, ax = plt.subplots()
ax.barh(most_common_df[0], most_common_df[1])
plt.xticks(rotation='vertical')
st.title('Most common words')
st.pyplot(fig)
# Emoji analysis
emoji_df = helper.emoji_helper(selected_user, df)
st.title("Emoji Analysis")
col1, col2 = st.columns(2)
with col1:
st.dataframe(emoji_df)
with col2:
fig, ax = plt.subplots()
ax.pie(emoji_df[1].head(), labels=emoji_df[0].head(), autopct="%0.2f")
st.pyplot(fig)