Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
# Function to assign color based on count | |
def assign_color(count): | |
if count <= 10: | |
return '1-10' | |
elif count <= 50: | |
return '10-50' | |
elif count <= 100: | |
return '50-100' | |
elif count <= 500: | |
return '100-500' | |
elif count <= 1000: | |
return '500-1000' | |
else: | |
return '>1000' | |
# Country mapping dictionary | |
country_mapping = { | |
'United States': 'USA', | |
'United Kingdom': 'GBR', | |
'France': 'FRA', | |
'Canada': 'CAN', | |
'Germany': 'DEU', | |
'Japan': 'JPN', | |
'India': 'IND', | |
'Australia': 'AUS', | |
'China': 'CHN', | |
'Italy': 'ITA', | |
'Spain': 'ESP', | |
'Mexico': 'MEX', | |
'Hong Kong': 'HKG', | |
'Sweden': 'SWE', | |
'Denmark': 'DNK', | |
'New Zealand': 'NZL', | |
'Belgium': 'BEL', | |
'South Korea': 'KOR', | |
'Ireland': 'IRL', | |
'Czech Republic': 'CZE', | |
'Switzerland': 'CHE', | |
'Hungary': 'HUN', | |
'Norway': 'NOR', | |
'United Arab Emirates': 'ARE', | |
'Netherlands': 'NLD', | |
'South Africa': 'ZAF', | |
'Poland': 'POL', | |
'Austria': 'AUT', | |
'Turkey': 'TUR', | |
'Brazil': 'BRA', | |
'Russia': 'RUS', | |
'Argentina': 'ARG', | |
'Singapore': 'SGP', | |
'Thailand': 'THA', | |
'Portugal': 'PRT', | |
'Greece': 'GRC', | |
'Egypt': 'EGY', | |
'Vietnam': 'VNM', | |
'Malaysia': 'MYS', | |
'Philippines': 'PHL', | |
'Taiwan': 'TWN', | |
'Israel': 'ISR', | |
'Saudi Arabia': 'SAU', | |
'Indonesia': 'IDN', | |
'Pakistan': 'PAK', | |
'Iran': 'IRN', | |
'Iraq': 'IRQ', | |
'Syria': 'SYR', | |
'Lebanon': 'LBN', | |
'Jordan': 'JOR', | |
'Qatar': 'QAT', | |
'Oman': 'OMN', | |
'Kuwait': 'KWT', | |
'Bahrain': 'BHR', | |
'Yemen': 'YEM', | |
'Morocco': 'MAR', | |
'Tunisia': 'TUN', | |
'Algeria': 'DZA', | |
'Libya': 'LBY', | |
'Sudan': 'SDN', | |
'Kenya': 'KEN', | |
'Nigeria': 'NGA', | |
'Ghana': 'GHA', | |
'Ethiopia': 'ETH', | |
'Botswana': 'BWA', | |
'Namibia': 'NAM', | |
'Zimbabwe': 'ZWE', | |
'Zambia': 'ZMB', | |
'Uganda': 'UGA', | |
'Rwanda': 'RWA', | |
'Burundi': 'BDI', | |
'Tanzania': 'TZA', | |
'Angola': 'AGO', | |
'Mozambique': 'MOZ', | |
'Madagascar': 'MDG', | |
'Mauritius': 'MUS', | |
'Somalia': 'SOM', | |
'Somaliland': 'SOM', | |
'Senegal': 'SEN', | |
'Ivory Coast': 'CIV', | |
'Cameroon': 'CMR', | |
'Benin': 'BEN', | |
'Togo': 'TGO', | |
'Gambia': 'GMB', | |
'Guinea': 'GIN', | |
'Guinea-Bissau': 'GNB', | |
'Equatorial Guinea': 'GNQ', | |
'Gabon': 'GAB', | |
'Congo': 'COG', | |
'Democratic Republic of the Congo': 'COD', | |
'Central African Republic': 'CAF', | |
'Chad': 'TCD', | |
'Niger': 'NER', | |
'Mali': 'MLI', | |
'Burkina Faso': 'BFA', | |
'Mauritania': 'MRT', | |
'Western Sahara': 'ESH', | |
'Sierra Leone': 'SLE', | |
'Liberia': 'LBR', | |
'Cape Verde': 'CPV', | |
'Seychelles': 'SYC', | |
'Comoros': 'COM', | |
'Maldives': 'MDV' | |
} | |
# Load your dataframes (replace with actual CSV filenames) | |
df_movies = pd.read_csv('movie_after_cleaning.csv') | |
df_tv_series = pd.read_csv('series_after_cleaning.csv') | |
# Splitting genres and countries | |
df_movies['genre'] = df_movies['genre'].str.split(',') | |
df_tv_series['genre'] = df_tv_series['genre'].str.split(',') | |
df_movies['country'] = df_movies['country'].str.split(',') | |
df_tv_series['country'] = df_tv_series['country'].str.split(',') | |
# Function to create treemap | |
def create_treemap(df, title): | |
fig = px.treemap(df, path=['parentalguide'], title=title) | |
return fig | |
# Function to create genre bar chart | |
def create_genre_bar_chart(df, title): | |
df_exploded = df.explode('genre') | |
genre_counts = df_exploded['genre'].value_counts().reset_index() | |
genre_counts.columns = ['genre', 'count'] | |
genre_counts = genre_counts.head(10) # Top 10 genres | |
fig = px.bar(genre_counts, x='count', y='genre', orientation='h', title=title, | |
labels={'count': 'Count', 'genre': 'Genre'}, | |
color_discrete_sequence=['#FFA07A']) | |
fig.update_traces(marker_line_color='rgb(8,48,107)', marker_line_width=1.5, opacity=0.6) | |
fig.update_layout(title_font_size=20, title_font_family='Arial', title_font_color='#00308F') | |
return fig | |
# Function to create choropleth map | |
def create_country_map(df, title): | |
df_exploded = df.explode('country') | |
country_counts = df_exploded['country'].value_counts().reset_index() | |
country_counts.columns = ['country', 'count'] | |
# Map country names to ISO codes | |
country_counts['country'] = country_counts['country'].map(country_mapping) | |
# Assign color based on count | |
country_counts['color'] = country_counts['count'].apply(assign_color) | |
fig = px.choropleth(country_counts, | |
locations="country", | |
color="color", | |
hover_name="country", | |
title=title, | |
projection="natural earth", | |
color_discrete_sequence=['#7FFF00', '#FFD700', '#FFA500', '#FF4500', '#DC143C', '#8B0000'], | |
category_orders={"color": ['1-10', '10-50', '50-100', '100-500', '500-1000', '>1000']}) | |
fig.update_geos(showcoastlines=True, coastlinecolor="LightBlue", showland=True, landcolor="LightGreen", | |
showocean=True, oceancolor="LightBlue", showlakes=True, lakecolor="LightBlue", | |
showrivers=True, rivercolor="LightBlue") | |
fig.update_layout(title_font_size=20, title_font_family='Arial', title_font_color='#00308F') | |
return fig | |
# Function to create rating distribution box chart | |
def create_rating_box_chart(df, title): | |
fig = px.box(df, x="rating", points="all", title=title, | |
labels={'rating': 'Rating'}, | |
boxmean=True, | |
orientation='h', | |
color_discrete_sequence=['#FF6347']) | |
fig.update_traces(marker_line_color='rgb(8,48,107)', marker_line_width=1.5, opacity=0.6) | |
fig.update_layout(title_font_size=20, title_font_family='Arial', title_font_color='#00308F') | |
return fig | |
# Streamlit app | |
st.title('Parental Guide Analysis') | |
# Split into two columns for buttons | |
col1, col2 = st.columns(2) | |
# Default selection | |
selection = 'Movies' | |
# Buttons for Movies and TV Series | |
with col1: | |
if st.button('Movies'): | |
selection = 'Movies' | |
with col2: | |
if st.button('TV Series'): | |
selection = 'TV Series' | |
# Display treemap, genre bar chart, rating distribution, and choropleth map based on selection | |
if selection == 'Movies': | |
st.plotly_chart(create_treemap(df_movies, 'Parental Guide - Movies'), use_container_width=True) | |
st.plotly_chart(create_genre_bar_chart(df_movies, 'Top 10 Genres - Movies'), use_container_width=True) | |
st.plotly_chart(create_rating_box_chart(df_movies, 'Rating Distribution - Movies'), use_container_width=True) | |
st.plotly_chart(create_country_map(df_movies, 'Global Distribution of Movies'), use_container_width=True) | |
elif selection == 'TV Series': | |
st.plotly_chart(create_treemap(df_tv_series, 'Parental Guide - TV Series'), use_container_width=True) | |
st.plotly_chart(create_genre_bar_chart(df_tv_series, 'Top 10 Genres - TV Series'), use_container_width=True) | |
st.plotly_chart(create_rating_box_chart(df_tv_series, 'Rating Distribution - TV Series'), use_container_width=True) | |
st.plotly_chart(create_country_map(df_tv_series, 'Global Distribution of TV Series'), use_container_width=True) | |