File size: 7,727 Bytes
d3e7dea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4207fd
da5456d
d3e7dea
 
 
bb00028
d3e7dea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da5456d
 
 
d3e7dea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
import folium
import plotly.express as px
import seaborn as sns
import json
import os
from streamlit_folium import folium_static 

st.set_option('deprecation.showPyplotGlobalUse', False)

DATA_ = pd.read_csv("states.csv")
st.title("Sentiment Analysis of Tweets")
st.sidebar.title("Sentiment Analysis of Tweets")
st.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets")
st.sidebar.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets")


def run():
    
    @st.cache(persist=True)
    def load_data():
        DATA_['tweet_created'] = pd.to_datetime(DATA_['Datetime'])
        return DATA_
    data = load_data()
    
    st.sidebar.subheader("Show random tweet")
    random_tweet = st.sidebar.radio('Sentiment', ('-1','1'))
    st.sidebar.markdown(data.query('Labels1 == @random_tweet')[["text_clean_translated"]].sample(n=1).iat[0,0])
    
    st.sidebar.markdown("### Number of tweets by sentiment")
    select = st.sidebar.selectbox('Visualization type', ['Histogram', 'Pie chart'])
    sentiment_count = data['Labels1'].value_counts()
    sentiment_count = pd.DataFrame({'Sentiment':sentiment_count.index, 'Tweets':sentiment_count.values})
    
    if not st.sidebar.checkbox("Hide", True):
        st.markdown("### Number of tweets by sentiment")
        if select == "Histogram":
            fig = px.bar(sentiment_count, x='Sentiment', y='Tweets', color='Tweets', height=500)
            st.plotly_chart(fig)
        else:
            fig = px.pie(sentiment_count, values='Tweets', names='Sentiment')
            st.plotly_chart(fig)
            
    
    st.sidebar.subheader("When and Where are users tweeting from?")
    hour = st.sidebar.slider("Hour of day", 0,23)
    modified_data = data[data['tweet_created'].dt.hour == hour]
    if not st.sidebar.checkbox("Close", True, key='1'):
        st.markdown("### Tweets locations based on the time of date")
        st.markdown("%i tweets between %i:00 and %i:00" % (len(modified_data), hour, (hour+1)%24))
        st.map(modified_data)
        if st.sidebar.checkbox("Show Raw Data", False):
            st.write(modified_data)
    st.sidebar.subheader("Breakdown language tweets by sentiment")
    choice = st.sidebar.multiselect('Pick language', ('en', 'hi'), key='0')

    if len(choice) > 0:
        choice_data = data[data.language.isin(choice)]
        fig_choice = px.histogram(choice_data, x='language',
        y='sentiment_flair',
        histfunc = 'count', color = 'Labels1',
        facet_col='Labels1',
        labels={'Labels1':'tweets'}, height=600, width=800)
        st.plotly_chart(fig_choice)

    st.sidebar.header("Word Cloud")
    word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?',('Positive', 'Neutral','Negative'))

    if not st.sidebar.checkbox("Close", True, key='3'):
        st.header('Word cloud for %s sentiment' % (word_sentiment))
        df = data[data['sentiment_flair']==word_sentiment]
        words = ' '.join(df['Text'])
        processed_words = ' '.join([word for word in words.split() if 'http' not in word and not word.startswith('@') and word !='RT'])
        wordcloud = WordCloud(stopwords=STOPWORDS,
        background_color='white', height=640, width=800).generate(processed_words)
        plt.imshow(wordcloud)
        plt.xticks([])
        plt.yticks([])
        st.pyplot() 
            
  ####################################        choropleth map       #############################################################     
    with open('india_state.json') as file:
      geojsonData = json.load(file)
        
    for i in geojsonData['features']:
       i['id'] = i['properties']['NAME_1']
    
    map_choropleth_high_public = folium.Map(location = [20.5937,78.9629], zoom_start = 4)
    df1 = data
    df1 = df1[df1['location'].notna()]
    
    def get_state(x):
    
        states = ["Andaman and Nicobar Islands","Andhra Pradesh","Arunachal Pradesh","Assam","Bihar","Chandigarh","Chhattisgarh",
              "Dadra and Nagar Haveli","Daman and Diu","Delhi","Goa","Gujarat","Haryana","Himachal Pradesh","Jammu and Kashmir",
              "Jharkhand","Karnataka","Kerala","Ladakh","Lakshadweep","Madhya Pradesh","Maharashtra","Manipur","Meghalaya",
              "Mizoram","Nagaland","Odisha","Puducherry","Punjab","Rajasthan","Sikkim","Tamil Nadu","Telangana","Tripura","Uttar Pradesh","Uttarakhand","West Bengal"]

        states_dict = {"Delhi":"New Delhi","Gujarat":"Surat","Haryana":"Gurgaon", "Karnataka":"Bangalore", "Karnataka":"Bengaluru",  "Maharashtra":"Pune","Maharashtra":"Mumbai","Maharashtra":"Navi Mumbai","Telangana":"Hyderabad","West Bengal":"Kolkata",
                   "Gujarat":"Surat","Rajasthan":"Kota","Rajasthan":"Jodhpur","Karnataka":"Bengaluru South","Uttar Pradesh":"Lukhnow","Uttar Pradesh":"Noida","Bihar":"Patna","Uttarakhand":"Dehradun","Madhya Pradesh":"Indore" , "Madhya Pradesh":"Bhopal",
                    "Andaman and Nicobar Islands":"Andaman and Nicobar Islands", "Andhra Pradesh":"Andhra Pradesh","Arunachal Pradesh":"Arunachal Pradesh","Assam":"Assam","Bihar":"Bihar",
                     "Chandigarh":"Chandigarh","Chhattisgarh":"Chhattisgarh", "Dadra and Nagar Haveli": "Dadra and Nagar Haveli","Daman and Diu":"Daman and Diu","Delhi":"Delhi",
                      "Goa":"Goa","Gujarat":"Gujarat","Haryana":"Haryana","Himachal Pradesh":"Himachal Pradesh","Jammu and Kashmir":"Jammu and Kashmir", "Jharkhand": "Jharkhand",
                      "Karnataka":"Karnataka","Kerala":"Kerala","Ladakh":"Ladakh","Lakshadweep":"Lakshadweep","Madhya Pradesh":"Madhya Pradesh","Maharashtra":"Maharashtra",
                      "Odisha":"Odisha","Puducherry":"Puducherry","Punjab":"Punjab","Rajasthan":"Rajasthan","Tamil Nadu":"Tamil Nadu","Telangana":"Telangana","Uttar Pradesh":"Uttar Pradesh",
                       "Uttarakhand":"Uttarakhand","West Bengal":"West Bengal","West Bengal":"Calcutta","Uttar Pradesh":"Lucknow"
                   }

        abv = x.split(',')[-1].lstrip()
        state_name = x.split(',')[0].lstrip()
   
        if abv in states:
          state = abv
        else:
         if state_name in states_dict.values():
            state = list(states_dict.keys())[list(states_dict.values()).index(state_name)]
        
         else:
            state = 'Non_India'    
    
        return state

    # create abreviated states column
    df2 = df1.copy()

    df2['states'] = df1['location'].apply(get_state)
   
   # extract total sentiment per state
    df_state_sentiment = df2.groupby(['states'])['Label'].value_counts().unstack().fillna(0.0).reset_index()
    df_state_sentiment['total_sentiment'] = -(df_state_sentiment[0])+df_state_sentiment[1] 
    dff = df_state_sentiment[df_state_sentiment['states'] != 'Non_India']
    
    folium.Choropleth(geo_data=geojsonData,
                 data=dff,
                 name='CHOROPLETH',
                 key_on='feature.id',
                 columns = ['states','total_sentiment'],
                 fill_color='YlOrRd',
                 fill_opacity=0.7,
                 line_opacity=0.4,
                 legend_name='Sentiments',
                 highlight=True).add_to(map_choropleth_high_public)

    folium.LayerControl().add_to(map_choropleth_high_public)
       
  #display(map_choropleth_high_public)
    
    st.sidebar.header("Map Visualisation")   
    if not st.sidebar.checkbox("Close", True, key='4'):   
       folium_static(map_choropleth_high_public)
        
            
if __name__ == '__main__':
    run()