Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
# Country mapping dictionary | |
country_mapping = { | |
'United States': 'USA', | |
'United Kingdom': 'GBR', | |
'France': 'FRA', | |
'Canada': 'CAN', | |
'Germany': 'DEU', | |
'Japan': 'JPN', | |
'India': 'IND', | |
'Australia': 'AUS', | |
'China': 'CHN', | |
'Italy': 'ITA', | |
'Spain': 'ESP', | |
'Mexico': 'MEX', | |
'Hong Kong': 'HKG', | |
'Sweden': 'SWE', | |
'Denmark': 'DNK', | |
'New Zealand': 'NZL', | |
'Belgium': 'BEL', | |
'South Korea': 'KOR', | |
'Ireland': 'IRL', | |
'Czech Republic': 'CZE', | |
'Switzerland': 'CHE', | |
'Hungary': 'HUN', | |
'Norway': 'NOR', | |
'United Arab Emirates': 'ARE', | |
'Netherlands': 'NLD', | |
'South Africa': 'ZAF', | |
'Poland': 'POL', | |
'Austria': 'AUT', | |
'Turkey': 'TUR', | |
'Brazil': 'BRA', | |
'Russia': 'RUS', | |
'Argentina': 'ARG', | |
'Singapore': 'SGP', | |
'Thailand': 'THA', | |
'Portugal': 'PRT', | |
'Greece': 'GRC', | |
'Egypt': 'EGY', | |
'Vietnam': 'VNM', | |
'Malaysia': 'MYS', | |
'Philippines': 'PHL', | |
'Taiwan': 'TWN', | |
'Israel': 'ISR', | |
'Saudi Arabia': 'SAU', | |
'Indonesia': 'IDN', | |
'Pakistan': 'PAK', | |
'Iran': 'IRN', | |
'Iraq': 'IRQ', | |
'Syria': 'SYR', | |
'Lebanon': 'LBN', | |
'Jordan': 'JOR', | |
'Qatar': 'QAT', | |
'Oman': 'OMN', | |
'Kuwait': 'KWT', | |
'Bahrain': 'BHR', | |
'Yemen': 'YEM', | |
'Morocco': 'MAR', | |
'Tunisia': 'TUN', | |
'Algeria': 'DZA', | |
'Libya': 'LBY', | |
'Sudan': 'SDN', | |
'Kenya': 'KEN', | |
'Nigeria': 'NGA', | |
'Ghana': 'GHA', | |
'Ethiopia': 'ETH', | |
'Botswana': 'BWA', | |
'Namibia': 'NAM', | |
'Zimbabwe': 'ZWE', | |
'Zambia': 'ZMB', | |
'Uganda': 'UGA', | |
'Rwanda': 'RWA', | |
'Burundi': 'BDI', | |
'Tanzania': 'TZA', | |
'Angola': 'AGO', | |
'Mozambique': 'MOZ', | |
'Madagascar': 'MDG', | |
'Mauritius': 'MUS', | |
'Somalia': 'SOM', | |
'Somaliland': 'SOM', | |
'Senegal': 'SEN', | |
'Ivory Coast': 'CIV', | |
'Cameroon': 'CMR', | |
'Benin': 'BEN', | |
'Togo': 'TGO', | |
'Gambia': 'GMB', | |
'Guinea': 'GIN', | |
'Guinea-Bissau': 'GNB', | |
'Equatorial Guinea': 'GNQ', | |
'Gabon': 'GAB', | |
'Congo': 'COG', | |
'Democratic Republic of the Congo': 'COD', | |
'Central African Republic': 'CAF', | |
'Chad': 'TCD', | |
'Niger': 'NER', | |
'Mali': 'MLI', | |
'Burkina Faso': 'BFA', | |
'Mauritania': 'MRT', | |
'Western Sahara': 'ESH', | |
'Sierra Leone': 'SLE', | |
'Liberia': 'LBR', | |
'Cape Verde': 'CPV', | |
'Seychelles': 'SYC', | |
'Comoros': 'COM', | |
'Maldives': 'MDV' | |
} | |
# Load your dataframes | |
df_movies = pd.read_csv('movie_after_cleaning.csv') | |
df_tv_series = pd.read_csv('series_after_cleaning.csv') | |
# Splitting genres and countries | |
df_movies['genre'] = df_movies['genre'].str.split(',') | |
df_tv_series['genre'] = df_tv_series['genre'].str.split(',') | |
df_movies['country'] = df_movies['country'].str.split(',') | |
df_tv_series['country'] = df_tv_series['country'].str.split(',') | |
# Function to create treemap | |
def create_treemap(df, title): | |
fig = px.treemap(df, path=['parentalguide'], title=title) | |
return fig | |
# Function to create genre bar chart | |
def create_genre_bar_chart(df, title): | |
df_exploded = df.explode('genre') | |
genre_counts = df_exploded['genre'].value_counts().reset_index() | |
genre_counts.columns = ['genre', 'count'] | |
genre_counts = genre_counts.head(10).sort_values('count', ascending=True) # Top 10 genres sorted with the largest on top | |
fig = px.bar(genre_counts, x='count', y='genre', orientation='h', title=title) | |
return fig | |
# Function to create choropleth map | |
def create_country_map(df, title): | |
df_exploded = df.explode('country') | |
country_counts = df_exploded['country'].value_counts().reset_index() | |
country_counts.columns = ['country', 'count'] | |
# Map country names to ISO codes | |
country_counts['country'] = country_counts['country'].map(country_mapping) | |
fig = px.choropleth(country_counts, | |
locations="country", | |
color="count", | |
hover_name="country", | |
title=title, | |
projection="natural earth") | |
return fig | |
# Function to create rating distribution box chart | |
def create_rating_box_chart(df, title): | |
fig = px.box(df, x="rating", title=title) | |
fig.update_traces(marker=dict(opacity=0.6)) # Show points by default | |
return fig | |
# Streamlit app | |
st.title('Parental Guide Analysis') | |
# Display two charts per row | |
col1, col2 = st.columns(2) | |
selection_movies = col1.button('Movies') | |
selection_tv_series = col2.button('TV Series') | |
if not selection_movies and not selection_tv_series: | |
selection_movies = True | |
# Displaying charts in a customized layout based on selection | |
if selection_movies: | |
st.subheader('Movies') | |
col1_1, col1_2 = st.columns(2) | |
with col1_1: | |
st.plotly_chart(create_treemap(df_movies, 'Parental Guide - Movies'), use_container_width=True) | |
with col1_2: | |
st.plotly_chart(create_genre_bar_chart(df_movies, 'Top 10 Genres - Movies'), use_container_width=True) | |
col2_1, col2_2 = st.columns(2) | |
with col2_1: | |
st.plotly_chart(create_country_map(df_movies, 'Global Distribution of Movies'), use_container_width=True) | |
with col2_2: | |
st.plotly_chart(create_rating_box_chart(df_movies, 'Rating Distribution - Movies'), use_container_width=True) | |
elif selection_tv_series: | |
st.subheader('TV Series') | |
col1_1, col1_2 = st.columns(2) | |
with col1_1: | |
st.plotly_chart(create_treemap(df_tv_series, 'Parental Guide - TV Series'), use_container_width=True) | |
with col1_2: | |
st.plotly_chart(create_genre_bar_chart(df_tv_series, 'Top 10 Genres - TV Series'), use_container_width=True) | |
col2_1, col2_2 = st.columns(2) | |
with col2_1: | |
st.plotly_chart(create_country_map(df_tv_series, 'Global Distribution of TV Series'), use_container_width=True) | |
with col2_2: | |
st.plotly_chart(create_rating_box_chart(df_tv_series, 'Rating Distribution - TV Series'), use_container_width=True) | |