Ansh commited on
Commit
d3e7dea
·
1 Parent(s): b8253fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -0
app.py CHANGED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import plotly.express as px
5
+ from wordcloud import WordCloud, STOPWORDS
6
+ import matplotlib.pyplot as plt
7
+ import folium
8
+ import plotly.express as px
9
+ import seaborn as sns
10
+ import json
11
+ import os
12
+ from streamlit_folium import folium_static
13
+
14
+ st.set_option('deprecation.showPyplotGlobalUse', False)
15
+
16
+ DATA_ = pd.read_csv("states.csv")
17
+ st.title("Sentiment Analysis of Tweets")
18
+ st.sidebar.title("Sentiment Analysis of Tweets")
19
+ st.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets")
20
+ st.sidebar.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets")
21
+
22
+
23
+ def run():
24
+
25
+ @st.cache(persist=True)
26
+ def load_data():
27
+ DATA_['tweet_created'] = pd.to_datetime(DATA_['Datetime'])
28
+ return DATA_
29
+ data = load_data()
30
+
31
+ st.sidebar.subheader("Show random tweet")
32
+ random_tweet = st.sidebar.radio('Sentiment', ('Positive', 'Neutral', 'Negative'))
33
+ st.sidebar.markdown(data.query('sentiment_flair == @random_tweet')[["Text"]].sample(n=1).iat[0,0])
34
+
35
+ st.sidebar.markdown("### Number of tweets by sentiment")
36
+ select = st.sidebar.selectbox('Visualization type', ['Histogram', 'Pie chart'])
37
+ sentiment_count = data['sentiment_flair'].value_counts()
38
+ sentiment_count = pd.DataFrame({'Sentiment':sentiment_count.index, 'Tweets':sentiment_count.values})
39
+
40
+ if not st.sidebar.checkbox("Hide", True):
41
+ st.markdown("### Number of tweets by sentiment")
42
+ if select == "Histogram":
43
+ fig = px.bar(sentiment_count, x='Sentiment', y='Tweets', color='Tweets', height=500)
44
+ st.plotly_chart(fig)
45
+ else:
46
+ fig = px.pie(sentiment_count, values='Tweets', names='Sentiment')
47
+ st.plotly_chart(fig)
48
+
49
+
50
+ st.sidebar.subheader("When and Where are users tweeting from?")
51
+ hour = st.sidebar.slider("Hour of day", 0,23)
52
+ modified_data = data[data['tweet_created'].dt.hour == hour]
53
+ if not st.sidebar.checkbox("Close", True, key='1'):
54
+ st.markdown("### Tweets locations based on the time of date")
55
+ st.markdown("%i tweets between %i:00 and %i:00" % (len(modified_data), hour, (hour+1)%24))
56
+ st.map(modified_data)
57
+ if st.sidebar.checkbox("Show Raw Data", False):
58
+ st.write(modified_data)
59
+ st.sidebar.subheader("Breakdown language tweets by sentiment")
60
+ choice = st.sidebar.multiselect('Pick language', ('en', 'hi'), key='0')
61
+
62
+ if len(choice) > 0:
63
+ choice_data = data[data.language.isin(choice)]
64
+ fig_choice = px.histogram(choice_data, x='language',
65
+ y='sentiment_flair',
66
+ histfunc = 'count', color = 'sentiment_flair',
67
+ facet_col='sentiment_flair',
68
+ labels={'sentiment_flair':'tweets'}, height=600, width=800)
69
+ st.plotly_chart(fig_choice)
70
+
71
+ st.sidebar.header("Word Cloud")
72
+ word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?',('Positive', 'Neutral','Negative'))
73
+
74
+ if not st.sidebar.checkbox("Close", True, key='3'):
75
+ st.header('Word cloud for %s sentiment' % (word_sentiment))
76
+ df = data[data['sentiment_flair']==word_sentiment]
77
+ words = ' '.join(df['Text'])
78
+ processed_words = ' '.join([word for word in words.split() if 'http' not in word and not word.startswith('@') and word !='RT'])
79
+ wordcloud = WordCloud(stopwords=STOPWORDS,
80
+ background_color='white', height=640, width=800).generate(processed_words)
81
+ plt.imshow(wordcloud)
82
+ plt.xticks([])
83
+ plt.yticks([])
84
+ st.pyplot()
85
+
86
+ #################################### choropleth map #############################################################
87
+ with open('india_state.json') as file:
88
+ geojsonData = json.load(file)
89
+
90
+ for i in geojsonData['features']:
91
+ i['id'] = i['properties']['NAME_1']
92
+
93
+ map_choropleth_high_public = folium.Map(location = [20.5937,78.9629], zoom_start = 4)
94
+ df1 = data
95
+ df1 = df1[df1['location'].notna()]
96
+
97
+ def get_state(x):
98
+
99
+ states = ["Andaman and Nicobar Islands","Andhra Pradesh","Arunachal Pradesh","Assam","Bihar","Chandigarh","Chhattisgarh",
100
+ "Dadra and Nagar Haveli","Daman and Diu","Delhi","Goa","Gujarat","Haryana","Himachal Pradesh","Jammu and Kashmir",
101
+ "Jharkhand","Karnataka","Kerala","Ladakh","Lakshadweep","Madhya Pradesh","Maharashtra","Manipur","Meghalaya",
102
+ "Mizoram","Nagaland","Odisha","Puducherry","Punjab","Rajasthan","Sikkim","Tamil Nadu","Telangana","Tripura","Uttar Pradesh","Uttarakhand","West Bengal"]
103
+
104
+ states_dict = {"Delhi":"New Delhi","Gujarat":"Surat","Haryana":"Gurgaon", "Karnataka":"Bangalore", "Karnataka":"Bengaluru", "Maharashtra":"Pune","Maharashtra":"Mumbai","Maharashtra":"Navi Mumbai","Telangana":"Hyderabad","West Bengal":"Kolkata",
105
+ "Gujarat":"Surat","Rajasthan":"Kota","Rajasthan":"Jodhpur","Karnataka":"Bengaluru South","Uttar Pradesh":"Lukhnow","Uttar Pradesh":"Noida","Bihar":"Patna","Uttarakhand":"Dehradun","Madhya Pradesh":"Indore" , "Madhya Pradesh":"Bhopal",
106
+ "Andaman and Nicobar Islands":"Andaman and Nicobar Islands", "Andhra Pradesh":"Andhra Pradesh","Arunachal Pradesh":"Arunachal Pradesh","Assam":"Assam","Bihar":"Bihar",
107
+ "Chandigarh":"Chandigarh","Chhattisgarh":"Chhattisgarh", "Dadra and Nagar Haveli": "Dadra and Nagar Haveli","Daman and Diu":"Daman and Diu","Delhi":"Delhi",
108
+ "Goa":"Goa","Gujarat":"Gujarat","Haryana":"Haryana","Himachal Pradesh":"Himachal Pradesh","Jammu and Kashmir":"Jammu and Kashmir", "Jharkhand": "Jharkhand",
109
+ "Karnataka":"Karnataka","Kerala":"Kerala","Ladakh":"Ladakh","Lakshadweep":"Lakshadweep","Madhya Pradesh":"Madhya Pradesh","Maharashtra":"Maharashtra",
110
+ "Odisha":"Odisha","Puducherry":"Puducherry","Punjab":"Punjab","Rajasthan":"Rajasthan","Tamil Nadu":"Tamil Nadu","Telangana":"Telangana","Uttar Pradesh":"Uttar Pradesh",
111
+ "Uttarakhand":"Uttarakhand","West Bengal":"West Bengal","West Bengal":"Calcutta","Uttar Pradesh":"Lucknow"
112
+ }
113
+
114
+ abv = x.split(',')[-1].lstrip()
115
+ state_name = x.split(',')[0].lstrip()
116
+
117
+ if abv in states:
118
+ state = abv
119
+ else:
120
+ if state_name in states_dict.values():
121
+ state = list(states_dict.keys())[list(states_dict.values()).index(state_name)]
122
+
123
+ else:
124
+ state = 'Non_India'
125
+
126
+ return state
127
+
128
+ # create abreviated states column
129
+ df2 = df1.copy()
130
+
131
+ df2['states'] = df1['location'].apply(get_state)
132
+
133
+ # extract total sentiment per state
134
+ df_state_sentiment = df2.groupby(['states'])['Label'].value_counts().unstack().fillna(0.0).reset_index()
135
+ df_state_sentiment['total_sentiment'] = -(df_state_sentiment[0])+df_state_sentiment[1]
136
+ dff = df_state_sentiment[df_state_sentiment['states'] != 'Non_India']
137
+
138
+ folium.Choropleth(geo_data=geojsonData,
139
+ data=dff,
140
+ name='CHOROPLETH',
141
+ key_on='feature.id',
142
+ columns = ['states','total_sentiment'],
143
+ fill_color='YlOrRd',
144
+ fill_opacity=0.7,
145
+ line_opacity=0.4,
146
+ legend_name='Sentiments',
147
+ highlight=True).add_to(map_choropleth_high_public)
148
+
149
+ folium.LayerControl().add_to(map_choropleth_high_public)
150
+
151
+ #display(map_choropleth_high_public)
152
+
153
+ st.sidebar.header("Map Visualisation")
154
+ if not st.sidebar.checkbox("Close", True, key='4'):
155
+ folium_static(map_choropleth_high_public)
156
+
157
+
158
+ if __name__ == '__main__':
159
+ run()