Spaces:

Penguni
/

dashboardmovie

Sleeping

App Files Files Community

Penguni commited on Jun 24, 2024

Commit

99f87a3

verified ·

1 Parent(s): 8057ddd

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -88

app.py CHANGED Viewed

@@ -1,51 +1,84 @@
 import sqlite3
 import pandas as pd
-import streamlit as st
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
-import plotly.express as px
 # Function to load data from SQLite database
 def load_data(db_file):
     conn = sqlite3.connect(db_file)
     return conn
 # Function to fetch data from database based on query
 def fetch_data(conn, query):
     return pd.read_sql_query(query, conn)
-# Function to calculate total movie releases by year
-def total_movie_releases_by_year(conn):
-    query = '''
-        SELECT startYear, COUNT(*) as total_movies
         FROM title_basics
-        WHERE titleType = 'movie' AND startYear IS NOT NULL AND startYear != '\\N'
-        GROUP BY startYear
-        ORDER BY startYear
     '''
-    df = fetch_data(conn, query)
-    return df
-# Function to calculate total count of years
-def total_count_of_years(conn):
-    query = '''
         SELECT COUNT(DISTINCT startYear) as total_years
         FROM title_basics
         WHERE titleType = 'movie' AND startYear IS NOT NULL AND startYear != '\\N'
     '''
-    df = fetch_data(conn, query)
-    return df
-# Function to calculate average rating of movies
-def average_rating_of_movies(conn):
-    query = '''
         SELECT AVG(averageRating) as avg_rating
         FROM title_ratings
     '''
-    df = fetch_data(conn, query)
-    return df
-# Function to extract genres and create a word cloud
 def create_genre_wordcloud(conn):
     query = '''
         SELECT genres
@@ -53,85 +86,77 @@ def create_genre_wordcloud(conn):
         WHERE titleType = 'movie' AND genres IS NOT NULL AND genres != '\\N'
     '''
     df = fetch_data(conn, query)
-    # Combine all genres into a single string
-    all_genres = ','.join(df['genres']).replace(',', ' ')
-    # Generate word cloud
-    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(all_genres)
-    # Plot word cloud
-    plt.figure(figsize=(10, 5))
-    plt.imshow(wordcloud, interpolation='bilinear')
-    plt.axis('off')
-    plt.tight_layout()
-    # Save word cloud to a temporary file
-    temp_file = '/tmp/wordcloud.png'
-    plt.savefig(temp_file)
-    return temp_file
-# Function to plot globe map using Plotly
-def plot_globe_map(df):
-    fig = px.scatter_geo(df, lat='latitude', lon='longitude', hover_name='primaryTitle',
-                         color='country', size='runtimeMinutes', size_max=20,
-                         projection='natural earth')
-    fig.update_geos(showland=True, landcolor='rgb(217, 217, 217)',
-                    countrycolor='rgb(0, 0, 0)')
-    fig.update_layout(title='Movie Locations on Globe',
-                      margin={"r":0,"t":0,"l":0,"b":0})
-    return fig
-# Main function to run the Streamlit app
-def main():
-    st.title('Movie Locations Dashboard')
-    # Show globe map of movie locations
-    st.subheader('Movie Locations on Globe')
-    fig = plot_globe_map(df)  # Assuming df is already loaded with CSV data
-    st.plotly_chart(fig)
-    st.title('IMDb Movie Dashboard')
     # Load data from SQLite database
     db_file = 'imdb_data.db'  # Adjust path as needed
     conn = load_data(db_file)
-    # Calculate total count of years
-    df_total_years = total_count_of_years(conn)
-    total_years = df_total_years.iloc[0]['total_years']
-    # Calculate total movie releases by year
-    df_total_movies = total_movie_releases_by_year(conn)
-    total_movies = df_total_movies['total_movies'].sum()
-    # Calculate average rating of movies
-    df_avg_rating = average_rating_of_movies(conn)
-    avg_rating = df_avg_rating.iloc[0]['avg_rating']
-    # Generate word cloud of genres
-    wordcloud_file = create_genre_wordcloud(conn)
     # Close database connection
     conn.close()
-    # Display total count of years, total movie releases, and average rating in three columns
-    col1, col2, col3 = st.columns(3)
-    # Column 1: Total Count of Years
-    with col1:
-        st.subheader('Total Count of Years')
-        st.markdown(f"<p style='font-size:36px; font-weight:bold; text-align:center;'>{total_years}</p>", unsafe_allow_html=True)
-    # Column 2: Total Movie Releases
-    with col2:
-        st.subheader('Total Movie Releases')
-        st.markdown(f"<p style='font-size:36px; font-weight:bold; text-align:center;'>{total_movies}</p>", unsafe_allow_html=True)
-    # Column 3: Average Rating
-    with col3:
-        st.subheader('Average Rating')
-        st.markdown(f"<p style='font-size:36px; font-weight:bold; text-align:center;'>{avg_rating:.2f}</p>", unsafe_allow_html=True)
-    # Display word cloud of ge

 import sqlite3
 import pandas as pd
+import plotly.express as px
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
+from collections import Counter
+import numpy as np
+import streamlit as st
 # Function to load data from SQLite database
+@st.cache(allow_output_mutation=True)
 def load_data(db_file):
     conn = sqlite3.connect(db_file)
     return conn
 # Function to fetch data from database based on query
+@st.cache(allow_output_mutation=True)
 def fetch_data(conn, query):
     return pd.read_sql_query(query, conn)
+# Function to fetch summary info from database
+@st.cache(allow_output_mutation=True)
+def fetch_summary_info(conn):
+    # Fetch total count of movies
+    query_total_movies = '''
+        SELECT COUNT(*) as total_movies
         FROM title_basics
+        WHERE titleType = 'movie'
     '''
+    total_movies = fetch_data(conn, query_total_movies).iloc[0]['total_movies']
+    # Fetch total count of years
+    query_total_years = '''
         SELECT COUNT(DISTINCT startYear) as total_years
         FROM title_basics
         WHERE titleType = 'movie' AND startYear IS NOT NULL AND startYear != '\\N'
     '''
+    total_years = fetch_data(conn, query_total_years).iloc[0]['total_years']
+    # Fetch average rating of movies
+    query_avg_rating = '''
         SELECT AVG(averageRating) as avg_rating
         FROM title_ratings
     '''
+    avg_rating = fetch_data(conn, query_avg_rating).iloc[0]['avg_rating']
+    return total_movies, total_years, avg_rating
+# Function to plot global map of total films per region
+@st.cache(allow_output_mutation=True)
+def plot_global_map(df):
+    # Country code to name mapping (only a few examples shown for brevity)
+    country_mapping = {
+        'AF': 'Afghanistan', 'AX': 'Åland Islands', 'AL': 'Albania', 'DZ': 'Algeria', 'AS': 'American Samoa',
+        # Add more mappings as per your original list
+    }
+    # Map country codes to country names
+    df['region'] = df['region'].map(country_mapping)
+    # Group by country and count the number of films
+    df_grouped = df.groupby('region').size().reset_index(name='total_films')
+    # Apply log transformation to handle outliers
+    df_grouped['log_total_films'] = np.log1p(df_grouped['total_films'])
+    # Create a choropleth map with the log-transformed data
+    fig = px.choropleth(df_grouped, locations='region', locationmode='country names',
+                        color='log_total_films', hover_name='region',
+                        color_continuous_scale='Plasma',  # Change the color scheme here
+                        labels={'log_total_films': 'Total Films (log scale)'})
+    # Update layout of the map
+    fig.update_layout(title='Total Films by Country (Log Scale)',
+                      geo=dict(showframe=False, showcoastlines=False,
+                               projection_type='equirectangular'))
+    return fig
+# Function to create word cloud of genres
+@st.cache(allow_output_mutation=True)
 def create_genre_wordcloud(conn):
     query = '''
         SELECT genres
         WHERE titleType = 'movie' AND genres IS NOT NULL AND genres != '\\N'
     '''
     df = fetch_data(conn, query)
+    # Process genres
+    genres = df['genres'].str.split(',', expand=True).stack().replace('\\N', pd.NA).dropna().reset_index(drop=True)
+    genre_counts = Counter(genres)
+    # Generate the word cloud
+    wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(genre_counts)
+    # Display the word cloud using Streamlit
+    st.image(wordcloud.to_array(), use_column_width=True)
+    # Save word cloud to a temporary file (optional)
+    # temp_file = '/tmp/wordcloud.png'
+    # plt.savefig(temp_file)
+    # return temp_file
+# Function to find best movie of each genre by numVotes * averageRating
+@st.cache(allow_output_mutation=True)
+def find_best_movies_by_genre(conn):
+    query = '''
+        SELECT tb.tconst, tb.primaryTitle, tb.startYear, tb.genres, tr.averageRating, tr.numVotes
+        FROM title_basics tb
+        JOIN title_ratings tr ON tb.tconst = tr.tconst
+        WHERE tb.titleType = 'movie' AND tb.genres IS NOT NULL AND tb.genres != '\\N'
+    '''
+    df = fetch_data(conn, query)
+    # Split genres and select the first genre for each movie
+    df['genre'] = df['genres'].str.split(',', expand=True)[0]
+    # Calculate score based on numVotes * averageRating
+    df['score'] = df['numVotes'] * df['averageRating']
+    # Get the best movie (highest score) for each genre
+    idx = df.groupby('genre')['score'].idxmax()
+    best_movies_by_genre = df.loc[idx, ['genre', 'primaryTitle', 'startYear', 'averageRating', 'numVotes', 'score']] \
+        .sort_values(by='score', ascending=False).reset_index(drop=True)
+    return best_movies_by_genre
+# Main function to orchestrate the dashboard
+def main():
+    st.title('IMDb Dashboard')
     # Load data from SQLite database
     db_file = 'imdb_data.db'  # Adjust path as needed
     conn = load_data(db_file)
+    # Fetch and display summary info
+    total_movies, total_years, avg_rating = fetch_summary_info(conn)
+    st.write(f"Total Movies: {total_movies}")
+    st.write(f"Total Years: {total_years}")
+    st.write(f"Average Rating: {avg_rating:.2f}")
+    # Display global map of total films per region
+    df_movie_region = pd.read_csv('movie_region.csv')  # Replace with your actual CSV loading
+    fig = plot_global_map(df_movie_region)
+    st.plotly_chart(fig)
+    # Display word cloud of genres
+    create_genre_wordcloud(conn)
+    # Find and display the best movie of each genre
+    best_movies_by_genre = find_best_movies_by_genre(conn)
+    st.subheader("Best Movie of Each Genre:")
+    st.write(best_movies_by_genre)
     # Close database connection
     conn.close()
+# Execute the main function
+if __name__ == '__main__':
+    main()