Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

File size: 23,160 Bytes

b560569
575b933
b0464a9
87a87e7
791c130
 
 
f7fc39b
575b933
791c130
4ad44b9
575b933
 
 
 
2a3b22e
575b933
 
 
 
 
 
9d99925
3b4dccb
 
 
 
 
 
 
 
 
 
 
deb2291
 
 
 
 
 
 
3b4dccb
b0464a9
2a3b22e
3b4dccb
2a3b22e
791c130
 
 
 
 
 
575b933
deb2291
 
 
791c130
 
 
3b4dccb
 
a342a6b
575b933
deb2291
 
3b4dccb
 
 
 
 
348bc84
791c130
 
deb2291
 
 
 
 
 
 
 
791c130
 
 
3b4dccb
 
791c130
 
 
348bc84
 
791c130
3b4dccb
 
791c130
 
deb2291
3b4dccb
 
 
 
 
 
 
deb2291
348bc84
3b4dccb
 
 
 
 
 
 
348bc84
 
 
 
3b4dccb
 
 
 
791c130
deb2291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
791c130
 
 
 
 
 
3b4dccb
 
 
 
deb2291
 
 
 
 
3b4dccb
348bc84
deb2291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
791c130
575b933
791c130
 
3b4dccb
 
a342a6b
b0464a9
2a3b22e
adb3bbe
deb2291
179ea1f
67742c4
a342a6b
3b4dccb
348bc84
a342a6b
575b933
deb2291
348bc84
791c130
 
deb2291
 
67742c4
adb3bbe
a342a6b
575b933
 
f9d8231
179ea1f
a342a6b
575b933
0612e1d
 
4ad44b9
348bc84
0612e1d
adb3bbe
791c130
 
a342a6b
0612e1d
 
575b933
a342a6b
2a3b22e
4ad44b9
2a3b22e
a342a6b
 
2a3b22e
791c130
 
0612e1d
575b933
791c130
0612e1d
575b933
791c130
 
 
 
4ad44b9
791c130
4ad44b9
a342a6b
 
faf26ff
575b933
791c130
 
 
3b4dccb
791c130
 
 
 
 
 
3b4dccb
791c130
 
deb2291
 
791c130
 
 
 
3b902c0
 
 
791c130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b4dccb
 
 
 
 
 
 
 
 
 
 
 
 
 
348bc84
3b4dccb
deb2291
 
3b4dccb
deb2291
 
 
 
 
 
 
 
 
 
 
 
 
3b4dccb
 
 
 
 
 
 
deb2291
 
 
 
 
3b4dccb
791c130
 
 
 
3b4dccb
791c130
 
 
deb2291
348bc84
791c130
 
3b4dccb
a342a6b
adb3bbe
06d22e5
791c130
a342a6b
 
791c130
4ad44b9
 
348bc84
a342a6b
 
575b933
791c130
a342a6b
 
 
791c130
a342a6b
 
575b933
a342a6b
 
 
348bc84
a342a6b
538b42b
791c130
575b933
adb3bbe
575b933
791c130
575b933
 
 
791c130
a342a6b
 
575b933
a342a6b
791c130
a342a6b
791c130

import gradio as gr
import pandas as pd
import os
import logging
import matplotlib
matplotlib.use('Agg') # Set backend for Matplotlib to avoid GUI conflicts with Gradio
import matplotlib.pyplot as plt 

# --- Module Imports ---
from gradio_utils import get_url_user_token 

# Functions from newly created/refactored modules
from config import (
    LINKEDIN_CLIENT_ID_ENV_VAR, BUBBLE_APP_NAME_ENV_VAR,
    BUBBLE_API_KEY_PRIVATE_ENV_VAR, BUBBLE_API_ENDPOINT_ENV_VAR
)
from state_manager import process_and_store_bubble_token
from sync_logic import sync_all_linkedin_data_orchestrator
from ui_generators import (
    display_main_dashboard,
    run_mentions_tab_display,
    run_follower_stats_tab_display
)
# Corrected import for analytics_data_processing
from analytics_data_processing import prepare_filtered_analytics_data 
from analytics_plot_generator import (
    generate_posts_activity_plot, generate_engagement_type_plot,
    generate_mentions_activity_plot, generate_mention_sentiment_plot,
    generate_followers_count_over_time_plot,
    generate_followers_growth_rate_plot,
    generate_followers_by_demographics_plot,
    generate_engagement_rate_over_time_plot,
    generate_reach_over_time_plot,
    generate_impressions_over_time_plot,
    create_placeholder_plot, # For initializing plots
    # --- Import new plot functions ---
    generate_likes_over_time_plot,
    generate_clicks_over_time_plot,
    generate_shares_over_time_plot,
    generate_comments_over_time_plot,
    generate_comments_sentiment_breakdown_plot
)

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')

# --- Analytics Tab: Plot Update Function ---
def update_analytics_plots(token_state_value, date_filter_option, custom_start_date, custom_end_date):
    """
    Prepares analytics data using external processing function and then generates plots.
    """
    logging.info(f"Updating analytics plots. Filter: {date_filter_option}, Custom Start: {custom_start_date}, Custom End: {custom_end_date}")

    # --- Increased number of expected plots ---
    num_expected_plots = 18 # Was 13, added 5 new plots

    if not token_state_value or not token_state_value.get("token"):
        message = "❌ Access denied. No token. Cannot generate analytics."
        logging.warning(message)
        placeholder_figs = [create_placeholder_plot(title="Access Denied", message="No token.") for _ in range(num_expected_plots)]
        return [message] + placeholder_figs

    try:
        # prepare_filtered_analytics_data might need to be updated if new DFs are required for new plots (e.g. comment sentiment)
        # For now, we assume it returns the same set of DFs and new plots will try to use them or handle missing data.
        (filtered_merged_posts_df, 
         filtered_mentions_df, 
         date_filtered_follower_stats_df, 
         raw_follower_stats_df, 
         start_dt_for_msg, end_dt_for_msg) = \
            prepare_filtered_analytics_data(
                token_state_value, date_filter_option, custom_start_date, custom_end_date
            )
        
        # Hypothetical: If prepare_filtered_analytics_data was updated to return comment sentiment data:
        # filtered_comments_with_sentiment_df = ... # (This would be the 7th item in the tuple)
        # For now, we will pass filtered_merged_posts_df to generate_comments_sentiment_breakdown_plot,
        # and that function will handle missing sentiment columns by showing a placeholder.
        # Or, if you have comment sentiment data in another DataFrame in token_state, retrieve it here.
        # e.g., comments_df_with_sentiment = token_state_value.get("bubble_comments_sentiment_df", pd.DataFrame())

    except Exception as e:
        error_msg = f"❌ Error preparing analytics data: {e}"
        logging.error(error_msg, exc_info=True)
        placeholder_figs = [create_placeholder_plot(title="Data Preparation Error", message=str(e)) for _ in range(num_expected_plots)]
        return [error_msg] + placeholder_figs

    date_column_posts = token_state_value.get("config_date_col_posts", "published_at")
    date_column_mentions = token_state_value.get("config_date_col_mentions", "date")
    # config_date_col_followers_source = token_state_value.get("config_date_col_followers", "date") 


    logging.info(f"Data for plotting - Filtered Merged Posts: {len(filtered_merged_posts_df)} rows, Filtered Mentions: {len(filtered_mentions_df)} rows.")
    logging.info(f"Date-Filtered Follower Stats: {len(date_filtered_follower_stats_df)} rows, Raw Follower Stats: {len(raw_follower_stats_df)} rows.")

    try:
        # Existing plots
        plot_posts_activity = generate_posts_activity_plot(filtered_merged_posts_df, date_column=date_column_posts)
        plot_engagement_type = generate_engagement_type_plot(filtered_merged_posts_df) 
        plot_mentions_activity = generate_mentions_activity_plot(filtered_mentions_df, date_column=date_column_mentions)
        plot_mention_sentiment = generate_mention_sentiment_plot(filtered_mentions_df) 

        plot_followers_count = generate_followers_count_over_time_plot(
            date_filtered_follower_stats_df, 
            type_filter_column='follower_count_type',
            type_value='follower_gains_monthly' 
        )
        plot_followers_growth_rate = generate_followers_growth_rate_plot(
            date_filtered_follower_stats_df,
            type_filter_column='follower_count_type',
            type_value='follower_gains_monthly'
        )

        plot_followers_by_location = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_geo', plot_title="Followers by Location")
        plot_followers_by_role = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_function', plot_title="Followers by Role")
        plot_followers_by_industry = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_industry', plot_title="Followers by Industry")
        plot_followers_by_seniority = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_seniority', plot_title="Followers by Seniority")

        plot_engagement_rate = generate_engagement_rate_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, engagement_rate_col='engagement')
        plot_reach_over_time = generate_reach_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, reach_col='clickCount')
        plot_impressions_over_time = generate_impressions_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, impressions_col='impressionCount')

        # --- Generate new plots ---
        plot_likes_over_time = generate_likes_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, likes_col='likeCount')
        plot_clicks_over_time = generate_clicks_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, clicks_col='clickCount')
        plot_shares_over_time = generate_shares_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, shares_col='shareCount')
        plot_comments_over_time = generate_comments_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, comments_col='commentCount')
        
        # For comment sentiment, pass a DataFrame that is expected to have comment-level sentiment.
        # If `filtered_merged_posts_df` is passed and lacks 'comment_sentiment' column, the plot function will show a placeholder.
        # If you have a specific df for this, e.g., `filtered_comments_with_sentiment_df` from `prepare_filtered_analytics_data` (if modified)
        # or from `token_state_value.get("bubble_comments_sentiment_df")`, use that one.
        # For this example, we assume `filtered_merged_posts_df` is passed and the plot function handles it.
        plot_comments_sentiment_breakdown = generate_comments_sentiment_breakdown_plot(
            filtered_merged_posts_df, # Or your specific df with comment sentiments
            sentiment_column='sentiment' # Assuming 'sentiment' column in post_df might be a proxy, or change to 'comment_sentiment' if that column exists
                                         # The plot function will show a placeholder if this column isn't suitable or found.
        )


        message = f"📊 Analytics updated for period: {date_filter_option}"
        if date_filter_option == "Custom Range":
            s_display = start_dt_for_msg.strftime('%Y-%m-%d') if start_dt_for_msg else "Any"
            e_display = end_dt_for_msg.strftime('%Y-%m-%d') if end_dt_for_msg else "Any" 
            message += f" (From: {s_display} To: {e_display})"
        
        all_generated_plots = [
            plot_posts_activity, plot_engagement_type, plot_mentions_activity, plot_mention_sentiment,
            plot_followers_count, plot_followers_growth_rate,
            plot_followers_by_location, plot_followers_by_role, plot_followers_by_industry, plot_followers_by_seniority,
            plot_engagement_rate, plot_reach_over_time, plot_impressions_over_time,
            # --- Add new plot objects to the list ---
            plot_likes_over_time, plot_clicks_over_time,
            plot_shares_over_time, plot_comments_over_time,
            plot_comments_sentiment_breakdown
        ]
        num_plots_generated = sum(1 for p in all_generated_plots if p is not None and not isinstance(p, str)) 
        logging.info(f"Successfully generated {num_plots_generated} plots out of {num_expected_plots} expected.")
        
        # Ensure the number of returned plots matches num_expected_plots, padding with placeholders if necessary
        # This is crucial if some plot functions might return None on error and we need to match the Gradio outputs list length
        final_plots_list = []
        for p in all_generated_plots:
            if p is not None and not isinstance(p, str): # isinstance check for safety, though plots should be figs
                final_plots_list.append(p)
            else: # If a plot failed and returned None or an error string (which it shouldn't, should be placeholder fig)
                logging.warning(f"A plot generation failed or returned unexpected type, using placeholder. Plot: {p}")
                final_plots_list.append(create_placeholder_plot(title="Plot Error", message="Failed to generate this plot."))

        # If fewer plots were generated than expected (e.g. due to early exit or major error in a plot function)
        while len(final_plots_list) < num_expected_plots:
            logging.warning(f"Padding missing plot with placeholder. Expected {num_expected_plots}, got {len(final_plots_list)} so far.")
            final_plots_list.append(create_placeholder_plot(title="Missing Plot", message="Plot could not be generated."))
            if len(final_plots_list) > num_expected_plots + 5: # Safety break
                 logging.error("Too many placeholders added, breaking loop.")
                 break


        return [message] + final_plots_list[:num_expected_plots] # Ensure correct number of outputs

    except Exception as e:
        error_msg = f"❌ Error generating analytics plots: {e}"
        logging.error(error_msg, exc_info=True)
        placeholder_figs = [create_placeholder_plot(title="Plot Generation Error", message=str(e)) for _ in range(num_expected_plots)]
        return [error_msg] + placeholder_figs


# --- Gradio UI Blocks ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
                title="LinkedIn Organization Dashboard") as app:

    token_state = gr.State(value={
        "token": None, "client_id": None, "org_urn": None,
        "bubble_posts_df": pd.DataFrame(), 
        "bubble_post_stats_df": pd.DataFrame(), 
        "bubble_mentions_df": pd.DataFrame(),
        "bubble_follower_stats_df": pd.DataFrame(),
        # Consider adding "bubble_comments_sentiment_df": pd.DataFrame() if you plan to fetch this data
        "fetch_count_for_api": 0, 
        "url_user_token_temp_storage": None,
        "config_date_col_posts": "published_at", 
        "config_date_col_mentions": "date",       
        "config_date_col_followers": "date" 
    })

    gr.Markdown("# 🚀 LinkedIn Organization Dashboard")
    url_user_token_display = gr.Textbox(label="User Token (from URL - Hidden)", interactive=False, visible=False)
    status_box = gr.Textbox(label="Overall LinkedIn Token Status", interactive=False, value="Initializing...")
    org_urn_display = gr.Textbox(label="Organization URN (from URL - Hidden)", interactive=False, visible=False)

    app.load(fn=get_url_user_token, inputs=None, outputs=[url_user_token_display, org_urn_display], api_name="get_url_params", show_progress=False)

    def initial_load_sequence(url_token, org_urn_val, current_state):
        logging.info(f"Initial load sequence triggered. Org URN: {org_urn_val}, URL Token: {'Present' if url_token else 'Absent'}")
        status_msg, new_state, btn_update = process_and_store_bubble_token(url_token, org_urn_val, current_state)
        dashboard_content = display_main_dashboard(new_state) 
        return status_msg, new_state, btn_update, dashboard_content

    with gr.Tabs() as tabs:
        with gr.TabItem("1️⃣ Dashboard & Sync", id="tab_dashboard_sync"):
            gr.Markdown("System checks for existing data from Bubble. The 'Sync' button activates if new data needs to be fetched from LinkedIn based on the last sync times and data availability.")
            sync_data_btn = gr.Button("🔄 Sync LinkedIn Data", variant="primary", visible=False, interactive=False)
            sync_status_html_output = gr.HTML("<p style='text-align:center;'>Sync status will appear here.</p>")
            dashboard_display_html = gr.HTML("<p style='text-align:center;'>Dashboard loading...</p>")

            org_urn_display.change(
                fn=initial_load_sequence,
                inputs=[url_user_token_display, org_urn_display, token_state],
                outputs=[status_box, token_state, sync_data_btn, dashboard_display_html],
                show_progress="full"
            )
            
            sync_click_event = sync_data_btn.click(
                fn=sync_all_linkedin_data_orchestrator,
                inputs=[token_state],
                outputs=[sync_status_html_output, token_state], 
                show_progress="full"
            ).then(
                fn=process_and_store_bubble_token, 
                inputs=[url_user_token_display, org_urn_display, token_state], 
                outputs=[status_box, token_state, sync_data_btn], 
                show_progress=False 
            ).then(
                fn=display_main_dashboard, 
                inputs=[token_state],
                outputs=[dashboard_display_html],
                show_progress=False
            )


        with gr.TabItem("2️⃣ Analytics", id="tab_analytics"):
            gr.Markdown("## 📈 LinkedIn Performance Analytics")
            gr.Markdown("Select a date range to filter Posts and Mentions analytics. Follower demographic plots show overall latest data. Follower time-series plots respect the selected date range if applicable to their data source (e.g. monthly gains).")
            
            analytics_status_md = gr.Markdown("Analytics status will appear here...")

            with gr.Row():
                date_filter_selector = gr.Radio(
                    ["All Time", "Last 7 Days", "Last 30 Days", "Custom Range"],
                    label="Select Date Range (for Posts, Mentions, and some Follower time-series)",
                    value="Last 30 Days"
                )
                custom_start_date_picker = gr.DateTime(label="Start Date (Custom)", visible=False, include_time=False, type="datetime") # Changed to datetime
                custom_end_date_picker = gr.DateTime(label="End Date (Custom)", visible=False, include_time=False, type="datetime") # Changed to datetime
            
            apply_filter_btn = gr.Button("🔍 Apply Filter & Refresh Analytics", variant="primary")

            def toggle_custom_date_pickers(selection):
                is_custom = selection == "Custom Range"
                return gr.update(visible=is_custom), gr.update(visible=is_custom)

            date_filter_selector.change(
                fn=toggle_custom_date_pickers,
                inputs=[date_filter_selector],
                outputs=[custom_start_date_picker, custom_end_date_picker]
            )

            gr.Markdown("### Posts & Engagement Overview (Filtered by Date)")
            with gr.Row():
                posts_activity_plot = gr.Plot(label="Posts Activity Over Time")
                engagement_type_plot = gr.Plot(label="Post Engagement Types")
            
            gr.Markdown("### Mentions Overview (Filtered by Date)")
            with gr.Row():
                mentions_activity_plot = gr.Plot(label="Mentions Activity Over Time")
                mention_sentiment_plot = gr.Plot(label="Mention Sentiment Distribution")

            gr.Markdown("### Follower Dynamics")
            with gr.Row():
                followers_count_plot = gr.Plot(label="Followers Count Over Time (e.g., Monthly Gains)")
                followers_growth_rate_plot = gr.Plot(label="Followers Growth Rate (e.g., Monthly Gains)")
            
            gr.Markdown("### Follower Demographics (Overall Latest Data)")
            with gr.Row():
                followers_by_location_plot = gr.Plot(label="Followers by Location")
                followers_by_role_plot = gr.Plot(label="Followers by Role (Function)")
            with gr.Row():
                followers_by_industry_plot = gr.Plot(label="Followers by Industry")
                followers_by_seniority_plot = gr.Plot(label="Followers by Seniority")

            gr.Markdown("### Post Performance Insights (Filtered by Date)")
            with gr.Row(): 
                engagement_rate_plot = gr.Plot(label="Engagement Rate Over Time")
                reach_over_time_plot = gr.Plot(label="Reach Over Time (Clicks)") # This was originally in its own row
            with gr.Row(): # Moved impressions to be paired with reach if desired, or keep separate
                impressions_over_time_plot = gr.Plot(label="Impressions Over Time")
                # New plots will start here, keeping 2 per row
                likes_over_time_plot = gr.Plot(label="Reactions (Likes) Over Time")

            gr.Markdown("### Detailed Post Engagement Over Time (Filtered by Date)")
            with gr.Row():
                clicks_over_time_plot = gr.Plot(label="Clicks Over Time")
                shares_over_time_plot = gr.Plot(label="Shares Over Time")
            with gr.Row():
                comments_over_time_plot = gr.Plot(label="Comments Over Time")
                # For the 5th new plot, "Breakdown of Comments by Sentiment"
                # It will be alone in this row, or you can add another plot next to it later.
                comments_sentiment_plot = gr.Plot(label="Breakdown of Comments by Sentiment")


            analytics_plot_outputs = [
                analytics_status_md, posts_activity_plot, engagement_type_plot, 
                mentions_activity_plot, mention_sentiment_plot,
                followers_count_plot, followers_growth_rate_plot,
                followers_by_location_plot, followers_by_role_plot, 
                followers_by_industry_plot, followers_by_seniority_plot,
                engagement_rate_plot, reach_over_time_plot, impressions_over_time_plot,
                # --- Add new plot components to the output list in the correct order ---
                likes_over_time_plot, clicks_over_time_plot,
                shares_over_time_plot, comments_over_time_plot,
                comments_sentiment_plot
            ]

            apply_filter_btn.click(
                fn=update_analytics_plots,
                inputs=[token_state, date_filter_selector, custom_start_date_picker, custom_end_date_picker],
                outputs=analytics_plot_outputs,
                show_progress="full"
            )
            
            # Also update analytics after sync
            sync_click_event.then( 
                fn=update_analytics_plots,
                inputs=[token_state, date_filter_selector, custom_start_date_picker, custom_end_date_picker],
                outputs=analytics_plot_outputs,
                show_progress="full"
            )

        with gr.TabItem("3️⃣ Mentions", id="tab_mentions"):
            refresh_mentions_display_btn = gr.Button("🔄 Refresh Mentions Display (from local data)", variant="secondary")
            mentions_html = gr.HTML("Mentions data loads from Bubble after sync. Click refresh to view current local data.")
            mentions_sentiment_dist_plot = gr.Plot(label="Mention Sentiment Distribution") 
            refresh_mentions_display_btn.click(
                fn=run_mentions_tab_display, inputs=[token_state],
                outputs=[mentions_html, mentions_sentiment_dist_plot], 
                show_progress="full"
            )

        with gr.TabItem("4️⃣ Follower Stats", id="tab_follower_stats"):
            refresh_follower_stats_btn = gr.Button("🔄 Refresh Follower Stats Display (from local data)", variant="secondary")
            follower_stats_html = gr.HTML("Follower statistics load from Bubble after sync. Click refresh to view current local data.")
            with gr.Row():
                fs_plot_monthly_gains = gr.Plot(label="Monthly Follower Gains") 
            with gr.Row():
                fs_plot_seniority = gr.Plot(label="Followers by Seniority (Top 10 Organic)")
                fs_plot_industry = gr.Plot(label="Followers by Industry (Top 10 Organic)")

            refresh_follower_stats_btn.click(
                fn=run_follower_stats_tab_display, inputs=[token_state],
                outputs=[follower_stats_html, fs_plot_monthly_gains, fs_plot_seniority, fs_plot_industry], 
                show_progress="full"
            )
    

if __name__ == "__main__":
    if not os.environ.get(LINKEDIN_CLIENT_ID_ENV_VAR):
        logging.warning(f"WARNING: '{LINKEDIN_CLIENT_ID_ENV_VAR}' environment variable not set.")
    if not os.environ.get(BUBBLE_APP_NAME_ENV_VAR) or \
       not os.environ.get(BUBBLE_API_KEY_PRIVATE_ENV_VAR) or \
       not os.environ.get(BUBBLE_API_ENDPOINT_ENV_VAR):
        logging.warning("WARNING: Bubble environment variables not fully set.")

    try:
        logging.info(f"Matplotlib version: {matplotlib.__version__} found. Backend: {matplotlib.get_backend()}")
    except ImportError:
        logging.error("Matplotlib is not installed. Plots will not be generated.")

    app.launch(server_name="0.0.0.0", server_port=7860, debug=True)