import streamlit as st import pandas as pd import plotly.express as px # Country mapping dictionary country_mapping = { 'United States': 'USA', 'United Kingdom': 'GBR', 'France': 'FRA', 'Canada': 'CAN', 'Germany': 'DEU', 'Japan': 'JPN', 'India': 'IND', 'Australia': 'AUS', 'China': 'CHN', 'Italy': 'ITA', 'Spain': 'ESP', 'Mexico': 'MEX', 'Hong Kong': 'HKG', 'Sweden': 'SWE', 'Denmark': 'DNK', 'New Zealand': 'NZL', 'Belgium': 'BEL', 'South Korea': 'KOR', 'Ireland': 'IRL', 'Czech Republic': 'CZE', 'Switzerland': 'CHE', 'Hungary': 'HUN', 'Norway': 'NOR', 'United Arab Emirates': 'ARE', 'Netherlands': 'NLD', 'South Africa': 'ZAF', 'Poland': 'POL', 'Austria': 'AUT', 'Turkey': 'TUR', 'Brazil': 'BRA', 'Russia': 'RUS', 'Argentina': 'ARG', 'Singapore': 'SGP', 'Thailand': 'THA', 'Portugal': 'PRT', 'Greece': 'GRC', 'Egypt': 'EGY', 'Vietnam': 'VNM', 'Malaysia': 'MYS', 'Philippines': 'PHL', 'Taiwan': 'TWN', 'Israel': 'ISR', 'Saudi Arabia': 'SAU', 'Indonesia': 'IDN', 'Pakistan': 'PAK', 'Iran': 'IRN', 'Iraq': 'IRQ', 'Syria': 'SYR', 'Lebanon': 'LBN', 'Jordan': 'JOR', 'Qatar': 'QAT', 'Oman': 'OMN', 'Kuwait': 'KWT', 'Bahrain': 'BHR', 'Yemen': 'YEM', 'Morocco': 'MAR', 'Tunisia': 'TUN', 'Algeria': 'DZA', 'Libya': 'LBY', 'Sudan': 'SDN', 'Kenya': 'KEN', 'Nigeria': 'NGA', 'Ghana': 'GHA', 'Ethiopia': 'ETH', 'Botswana': 'BWA', 'Namibia': 'NAM', 'Zimbabwe': 'ZWE', 'Zambia': 'ZMB', 'Uganda': 'UGA', 'Rwanda': 'RWA', 'Burundi': 'BDI', 'Tanzania': 'TZA', 'Angola': 'AGO', 'Mozambique': 'MOZ', 'Madagascar': 'MDG', 'Mauritius': 'MUS', 'Somalia': 'SOM', 'Somaliland': 'SOM', 'Senegal': 'SEN', 'Ivory Coast': 'CIV', 'Cameroon': 'CMR', 'Benin': 'BEN', 'Togo': 'TGO', 'Gambia': 'GMB', 'Guinea': 'GIN', 'Guinea-Bissau': 'GNB', 'Equatorial Guinea': 'GNQ', 'Gabon': 'GAB', 'Congo': 'COG', 'Democratic Republic of the Congo': 'COD', 'Central African Republic': 'CAF', 'Chad': 'TCD', 'Niger': 'NER', 'Mali': 'MLI', 'Burkina Faso': 'BFA', 'Mauritania': 'MRT', 'Western Sahara': 'ESH', 'Sierra Leone': 'SLE', 'Liberia': 'LBR', 'Cape Verde': 'CPV', 'Seychelles': 'SYC', 'Comoros': 'COM', 'Maldives': 'MDV' } # Load your dataframes df_movies = pd.read_csv('movie_after_cleaning.csv') df_tv_series = pd.read_csv('series_after_cleaning.csv') # Splitting genres and countries df_movies['genre'] = df_movies['genre'].str.split(',') df_tv_series['genre'] = df_tv_series['genre'].str.split(',') df_movies['country'] = df_movies['country'].str.split(',') df_tv_series['country'] = df_tv_series['country'].str.split(',') # Function to create treemap def create_treemap(df, title): fig = px.treemap(df, path=['parentalguide'], title=title) return fig def create_best_genres_line_chart(df, title): # Combine genres data from both movies and TV series df_genres = df.explode('genre') # Group by year and genre to count occurrences genre_counts = df_genres.groupby(['year', 'genre']).size().reset_index(name='count') # Find the most popular genre by count for each year idx = genre_counts.groupby(['year'])['count'].transform(max) == genre_counts['count'] best_genres = genre_counts[idx] # Plotly line chart for best genres over the years fig_best_genres = px.line(best_genres, x='year', y='count', color='genre', title=title) fig_best_genres.update_layout(xaxis_title='Year', yaxis_title='Number of Works', legend_title='Genre') return fig_best_genres # Function to create genre bar chart def create_genre_bar_chart(df, title): df_exploded = df.explode('genre') genre_counts = df_exploded['genre'].value_counts().reset_index() genre_counts.columns = ['genre', 'count'] genre_counts = genre_counts.head(10).sort_values('count', ascending=True) # Top 10 genres sorted with the largest on top fig = px.bar(genre_counts, x='count', y='genre', orientation='h', title=title) return fig # Function to create choropleth map def create_country_map(df, title): df_exploded = df.explode('country') country_counts = df_exploded['country'].value_counts().reset_index() country_counts.columns = ['country', 'count'] # Map country names to ISO codes country_counts['country'] = country_counts['country'].map(country_mapping) fig = px.choropleth(country_counts, locations="country", color="count", hover_name="country", title=title, projection="natural earth") return fig def display_summary_metrics(df): num_works = len(df) num_languages = df['language'].nunique() num_countries = df.explode('country')['country'].nunique() num_votes = df['votes'].sum() col1, col2, col3, col4 = st.columns(4) col1.metric("Number of Works", num_works) col2.metric("Number of Languages", num_languages) col3.metric("Number of Countries", num_countries) col4.metric("Number of Votes", num_votes) # Function to create rating distribution box chart def create_rating_box_chart(df, title): fig = px.box(df, x="rating", title=title) fig.update_traces(marker=dict(opacity=0.6)) # Show points by default return fig # Streamlit app st.title('Parental Guide Analysis') # Display two charts per row col1, col2 = st.columns(2) selection_movies = col1.button('Movies') selection_tv_series = col2.button('TV Series') if not selection_movies and not selection_tv_series: selection_movies = True # Displaying charts in a customized layout based on selection if selection_movies: st.subheader('Movies') display_summary_metrics(df_movies) col1_1, col1_2 = st.columns(2) with col1_1: st.plotly_chart(create_treemap(df_movies, 'Parental Guide - Movies'), use_container_width=True) with col1_2: st.plotly_chart(create_genre_bar_chart(df_movies, 'Top 10 Genres - Movies'), use_container_width=True) col2_1, col2_2 = st.columns(2) with col2_1: st.plotly_chart(create_country_map(df_movies, 'Global Distribution of Movies'), use_container_width=True) with col2_2: st.plotly_chart(create_rating_box_chart(df_movies, 'Rating Distribution - Movies'), use_container_width=True) st.plotly_chart(create_best_genres_line_chart(df_movies, 'Best Genres Over the Years - Movies'), use_container_width=True) elif selection_tv_series: st.subheader('TV Series') display_summary_metrics(df_tv_series) col1_1, col1_2 = st.columns(2) with col1_1: st.plotly_chart(create_treemap(df_tv_series, 'Parental Guide - TV Series'), use_container_width=True) with col1_2: st.plotly_chart(create_genre_bar_chart(df_tv_series, 'Top 10 Genres - TV Series'), use_container_width=True) col2_1, col2_2 = st.columns(2) with col2_1: st.plotly_chart(create_country_map(df_tv_series, 'Global Distribution of TV Series'), use_container_width=True) with col2_2: st.plotly_chart(create_rating_box_chart(df_tv_series, 'Rating Distribution - TV Series'), use_container_width=True) st.plotly_chart(create_best_genres_line_chart(df_tv_series, 'Best Genres Over the Years - TV Series'), use_container_width=True)