Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

LinkedinMonitor / app.py

GuglielmoTor

Update app.py

deb2291 verified 18 days ago

raw

history blame

23.2 kB

	import gradio as gr
	import pandas as pd
	import os
	import logging
	import matplotlib
	matplotlib.use('Agg') # Set backend for Matplotlib to avoid GUI conflicts with Gradio
	import matplotlib.pyplot as plt

	# --- Module Imports ---
	from gradio_utils import get_url_user_token

	# Functions from newly created/refactored modules
	from config import (
	LINKEDIN_CLIENT_ID_ENV_VAR, BUBBLE_APP_NAME_ENV_VAR,
	BUBBLE_API_KEY_PRIVATE_ENV_VAR, BUBBLE_API_ENDPOINT_ENV_VAR
	)
	from state_manager import process_and_store_bubble_token
	from sync_logic import sync_all_linkedin_data_orchestrator
	from ui_generators import (
	display_main_dashboard,
	run_mentions_tab_display,
	run_follower_stats_tab_display
	)
	# Corrected import for analytics_data_processing
	from analytics_data_processing import prepare_filtered_analytics_data
	from analytics_plot_generator import (
	generate_posts_activity_plot, generate_engagement_type_plot,
	generate_mentions_activity_plot, generate_mention_sentiment_plot,
	generate_followers_count_over_time_plot,
	generate_followers_growth_rate_plot,
	generate_followers_by_demographics_plot,
	generate_engagement_rate_over_time_plot,
	generate_reach_over_time_plot,
	generate_impressions_over_time_plot,
	create_placeholder_plot, # For initializing plots
	# --- Import new plot functions ---
	generate_likes_over_time_plot,
	generate_clicks_over_time_plot,
	generate_shares_over_time_plot,
	generate_comments_over_time_plot,
	generate_comments_sentiment_breakdown_plot
	)

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')

	# --- Analytics Tab: Plot Update Function ---
	def update_analytics_plots(token_state_value, date_filter_option, custom_start_date, custom_end_date):
	"""
	Prepares analytics data using external processing function and then generates plots.
	"""
	logging.info(f"Updating analytics plots. Filter: {date_filter_option}, Custom Start: {custom_start_date}, Custom End: {custom_end_date}")

	# --- Increased number of expected plots ---
	num_expected_plots = 18 # Was 13, added 5 new plots

	if not token_state_value or not token_state_value.get("token"):
	message = "❌ Access denied. No token. Cannot generate analytics."
	logging.warning(message)
	placeholder_figs = [create_placeholder_plot(title="Access Denied", message="No token.") for _ in range(num_expected_plots)]
	return [message] + placeholder_figs

	try:
	# prepare_filtered_analytics_data might need to be updated if new DFs are required for new plots (e.g. comment sentiment)
	# For now, we assume it returns the same set of DFs and new plots will try to use them or handle missing data.
	(filtered_merged_posts_df,
	filtered_mentions_df,
	date_filtered_follower_stats_df,
	raw_follower_stats_df,
	start_dt_for_msg, end_dt_for_msg) = \
	prepare_filtered_analytics_data(
	token_state_value, date_filter_option, custom_start_date, custom_end_date
	)

	# Hypothetical: If prepare_filtered_analytics_data was updated to return comment sentiment data:
	# filtered_comments_with_sentiment_df = ... # (This would be the 7th item in the tuple)
	# For now, we will pass filtered_merged_posts_df to generate_comments_sentiment_breakdown_plot,
	# and that function will handle missing sentiment columns by showing a placeholder.
	# Or, if you have comment sentiment data in another DataFrame in token_state, retrieve it here.
	# e.g., comments_df_with_sentiment = token_state_value.get("bubble_comments_sentiment_df", pd.DataFrame())

	except Exception as e:
	error_msg = f"❌ Error preparing analytics data: {e}"
	logging.error(error_msg, exc_info=True)
	placeholder_figs = [create_placeholder_plot(title="Data Preparation Error", message=str(e)) for _ in range(num_expected_plots)]
	return [error_msg] + placeholder_figs

	date_column_posts = token_state_value.get("config_date_col_posts", "published_at")
	date_column_mentions = token_state_value.get("config_date_col_mentions", "date")
	# config_date_col_followers_source = token_state_value.get("config_date_col_followers", "date")


	logging.info(f"Data for plotting - Filtered Merged Posts: {len(filtered_merged_posts_df)} rows, Filtered Mentions: {len(filtered_mentions_df)} rows.")
	logging.info(f"Date-Filtered Follower Stats: {len(date_filtered_follower_stats_df)} rows, Raw Follower Stats: {len(raw_follower_stats_df)} rows.")

	try:
	# Existing plots
	plot_posts_activity = generate_posts_activity_plot(filtered_merged_posts_df, date_column=date_column_posts)
	plot_engagement_type = generate_engagement_type_plot(filtered_merged_posts_df)
	plot_mentions_activity = generate_mentions_activity_plot(filtered_mentions_df, date_column=date_column_mentions)
	plot_mention_sentiment = generate_mention_sentiment_plot(filtered_mentions_df)

	plot_followers_count = generate_followers_count_over_time_plot(
	date_filtered_follower_stats_df,
	type_filter_column='follower_count_type',
	type_value='follower_gains_monthly'
	)
	plot_followers_growth_rate = generate_followers_growth_rate_plot(
	date_filtered_follower_stats_df,
	type_filter_column='follower_count_type',
	type_value='follower_gains_monthly'
	)

	plot_followers_by_location = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_geo', plot_title="Followers by Location")
	plot_followers_by_role = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_function', plot_title="Followers by Role")
	plot_followers_by_industry = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_industry', plot_title="Followers by Industry")
	plot_followers_by_seniority = generate_followers_by_demographics_plot(raw_follower_stats_df, category_col='category_name', type_filter_column='follower_count_type', type_value='follower_seniority', plot_title="Followers by Seniority")

	plot_engagement_rate = generate_engagement_rate_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, engagement_rate_col='engagement')
	plot_reach_over_time = generate_reach_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, reach_col='clickCount')
	plot_impressions_over_time = generate_impressions_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, impressions_col='impressionCount')

	# --- Generate new plots ---
	plot_likes_over_time = generate_likes_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, likes_col='likeCount')
	plot_clicks_over_time = generate_clicks_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, clicks_col='clickCount')
	plot_shares_over_time = generate_shares_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, shares_col='shareCount')
	plot_comments_over_time = generate_comments_over_time_plot(filtered_merged_posts_df, date_column=date_column_posts, comments_col='commentCount')

	# For comment sentiment, pass a DataFrame that is expected to have comment-level sentiment.
	# If `filtered_merged_posts_df` is passed and lacks 'comment_sentiment' column, the plot function will show a placeholder.
	# If you have a specific df for this, e.g., `filtered_comments_with_sentiment_df` from `prepare_filtered_analytics_data` (if modified)
	# or from `token_state_value.get("bubble_comments_sentiment_df")`, use that one.
	# For this example, we assume `filtered_merged_posts_df` is passed and the plot function handles it.
	plot_comments_sentiment_breakdown = generate_comments_sentiment_breakdown_plot(
	filtered_merged_posts_df, # Or your specific df with comment sentiments
	sentiment_column='sentiment' # Assuming 'sentiment' column in post_df might be a proxy, or change to 'comment_sentiment' if that column exists
	# The plot function will show a placeholder if this column isn't suitable or found.
	)


	message = f"📊 Analytics updated for period: {date_filter_option}"
	if date_filter_option == "Custom Range":
	s_display = start_dt_for_msg.strftime('%Y-%m-%d') if start_dt_for_msg else "Any"
	e_display = end_dt_for_msg.strftime('%Y-%m-%d') if end_dt_for_msg else "Any"
	message += f" (From: {s_display} To: {e_display})"

	all_generated_plots = [
	plot_posts_activity, plot_engagement_type, plot_mentions_activity, plot_mention_sentiment,
	plot_followers_count, plot_followers_growth_rate,
	plot_followers_by_location, plot_followers_by_role, plot_followers_by_industry, plot_followers_by_seniority,
	plot_engagement_rate, plot_reach_over_time, plot_impressions_over_time,
	# --- Add new plot objects to the list ---
	plot_likes_over_time, plot_clicks_over_time,
	plot_shares_over_time, plot_comments_over_time,
	plot_comments_sentiment_breakdown
	]
	num_plots_generated = sum(1 for p in all_generated_plots if p is not None and not isinstance(p, str))
	logging.info(f"Successfully generated {num_plots_generated} plots out of {num_expected_plots} expected.")

	# Ensure the number of returned plots matches num_expected_plots, padding with placeholders if necessary
	# This is crucial if some plot functions might return None on error and we need to match the Gradio outputs list length
	final_plots_list = []
	for p in all_generated_plots:
	if p is not None and not isinstance(p, str): # isinstance check for safety, though plots should be figs
	final_plots_list.append(p)
	else: # If a plot failed and returned None or an error string (which it shouldn't, should be placeholder fig)
	logging.warning(f"A plot generation failed or returned unexpected type, using placeholder. Plot: {p}")
	final_plots_list.append(create_placeholder_plot(title="Plot Error", message="Failed to generate this plot."))

	# If fewer plots were generated than expected (e.g. due to early exit or major error in a plot function)
	while len(final_plots_list) < num_expected_plots:
	logging.warning(f"Padding missing plot with placeholder. Expected {num_expected_plots}, got {len(final_plots_list)} so far.")
	final_plots_list.append(create_placeholder_plot(title="Missing Plot", message="Plot could not be generated."))
	if len(final_plots_list) > num_expected_plots + 5: # Safety break
	logging.error("Too many placeholders added, breaking loop.")
	break


	return [message] + final_plots_list[:num_expected_plots] # Ensure correct number of outputs

	except Exception as e:
	error_msg = f"❌ Error generating analytics plots: {e}"
	logging.error(error_msg, exc_info=True)
	placeholder_figs = [create_placeholder_plot(title="Plot Generation Error", message=str(e)) for _ in range(num_expected_plots)]
	return [error_msg] + placeholder_figs


	# --- Gradio UI Blocks ---
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
	title="LinkedIn Organization Dashboard") as app:

	token_state = gr.State(value={
	"token": None, "client_id": None, "org_urn": None,
	"bubble_posts_df": pd.DataFrame(),
	"bubble_post_stats_df": pd.DataFrame(),
	"bubble_mentions_df": pd.DataFrame(),
	"bubble_follower_stats_df": pd.DataFrame(),
	# Consider adding "bubble_comments_sentiment_df": pd.DataFrame() if you plan to fetch this data
	"fetch_count_for_api": 0,
	"url_user_token_temp_storage": None,
	"config_date_col_posts": "published_at",
	"config_date_col_mentions": "date",
	"config_date_col_followers": "date"
	})

	gr.Markdown("# 🚀 LinkedIn Organization Dashboard")
	url_user_token_display = gr.Textbox(label="User Token (from URL - Hidden)", interactive=False, visible=False)
	status_box = gr.Textbox(label="Overall LinkedIn Token Status", interactive=False, value="Initializing...")
	org_urn_display = gr.Textbox(label="Organization URN (from URL - Hidden)", interactive=False, visible=False)

	app.load(fn=get_url_user_token, inputs=None, outputs=[url_user_token_display, org_urn_display], api_name="get_url_params", show_progress=False)

	def initial_load_sequence(url_token, org_urn_val, current_state):
	logging.info(f"Initial load sequence triggered. Org URN: {org_urn_val}, URL Token: {'Present' if url_token else 'Absent'}")
	status_msg, new_state, btn_update = process_and_store_bubble_token(url_token, org_urn_val, current_state)
	dashboard_content = display_main_dashboard(new_state)
	return status_msg, new_state, btn_update, dashboard_content

	with gr.Tabs() as tabs:
	with gr.TabItem("1️⃣ Dashboard & Sync", id="tab_dashboard_sync"):
	gr.Markdown("System checks for existing data from Bubble. The 'Sync' button activates if new data needs to be fetched from LinkedIn based on the last sync times and data availability.")
	sync_data_btn = gr.Button("🔄 Sync LinkedIn Data", variant="primary", visible=False, interactive=False)
	sync_status_html_output = gr.HTML("<p style='text-align:center;'>Sync status will appear here.</p>")
	dashboard_display_html = gr.HTML("<p style='text-align:center;'>Dashboard loading...</p>")

	org_urn_display.change(
	fn=initial_load_sequence,
	inputs=[url_user_token_display, org_urn_display, token_state],
	outputs=[status_box, token_state, sync_data_btn, dashboard_display_html],
	show_progress="full"
	)

	sync_click_event = sync_data_btn.click(
	fn=sync_all_linkedin_data_orchestrator,
	inputs=[token_state],
	outputs=[sync_status_html_output, token_state],
	show_progress="full"
	).then(
	fn=process_and_store_bubble_token,
	inputs=[url_user_token_display, org_urn_display, token_state],
	outputs=[status_box, token_state, sync_data_btn],
	show_progress=False
	).then(
	fn=display_main_dashboard,
	inputs=[token_state],
	outputs=[dashboard_display_html],
	show_progress=False
	)


	with gr.TabItem("2️⃣ Analytics", id="tab_analytics"):
	gr.Markdown("## 📈 LinkedIn Performance Analytics")
	gr.Markdown("Select a date range to filter Posts and Mentions analytics. Follower demographic plots show overall latest data. Follower time-series plots respect the selected date range if applicable to their data source (e.g. monthly gains).")

	analytics_status_md = gr.Markdown("Analytics status will appear here...")

	with gr.Row():
	date_filter_selector = gr.Radio(
	["All Time", "Last 7 Days", "Last 30 Days", "Custom Range"],
	label="Select Date Range (for Posts, Mentions, and some Follower time-series)",
	value="Last 30 Days"
	)
	custom_start_date_picker = gr.DateTime(label="Start Date (Custom)", visible=False, include_time=False, type="datetime") # Changed to datetime
	custom_end_date_picker = gr.DateTime(label="End Date (Custom)", visible=False, include_time=False, type="datetime") # Changed to datetime

	apply_filter_btn = gr.Button("🔍 Apply Filter & Refresh Analytics", variant="primary")

	def toggle_custom_date_pickers(selection):
	is_custom = selection == "Custom Range"
	return gr.update(visible=is_custom), gr.update(visible=is_custom)

	date_filter_selector.change(
	fn=toggle_custom_date_pickers,
	inputs=[date_filter_selector],
	outputs=[custom_start_date_picker, custom_end_date_picker]
	)

	gr.Markdown("### Posts & Engagement Overview (Filtered by Date)")
	with gr.Row():
	posts_activity_plot = gr.Plot(label="Posts Activity Over Time")
	engagement_type_plot = gr.Plot(label="Post Engagement Types")

	gr.Markdown("### Mentions Overview (Filtered by Date)")
	with gr.Row():
	mentions_activity_plot = gr.Plot(label="Mentions Activity Over Time")
	mention_sentiment_plot = gr.Plot(label="Mention Sentiment Distribution")

	gr.Markdown("### Follower Dynamics")
	with gr.Row():
	followers_count_plot = gr.Plot(label="Followers Count Over Time (e.g., Monthly Gains)")
	followers_growth_rate_plot = gr.Plot(label="Followers Growth Rate (e.g., Monthly Gains)")

	gr.Markdown("### Follower Demographics (Overall Latest Data)")
	with gr.Row():
	followers_by_location_plot = gr.Plot(label="Followers by Location")
	followers_by_role_plot = gr.Plot(label="Followers by Role (Function)")
	with gr.Row():
	followers_by_industry_plot = gr.Plot(label="Followers by Industry")
	followers_by_seniority_plot = gr.Plot(label="Followers by Seniority")

	gr.Markdown("### Post Performance Insights (Filtered by Date)")
	with gr.Row():
	engagement_rate_plot = gr.Plot(label="Engagement Rate Over Time")
	reach_over_time_plot = gr.Plot(label="Reach Over Time (Clicks)") # This was originally in its own row
	with gr.Row(): # Moved impressions to be paired with reach if desired, or keep separate
	impressions_over_time_plot = gr.Plot(label="Impressions Over Time")
	# New plots will start here, keeping 2 per row
	likes_over_time_plot = gr.Plot(label="Reactions (Likes) Over Time")

	gr.Markdown("### Detailed Post Engagement Over Time (Filtered by Date)")
	with gr.Row():
	clicks_over_time_plot = gr.Plot(label="Clicks Over Time")
	shares_over_time_plot = gr.Plot(label="Shares Over Time")
	with gr.Row():
	comments_over_time_plot = gr.Plot(label="Comments Over Time")
	# For the 5th new plot, "Breakdown of Comments by Sentiment"
	# It will be alone in this row, or you can add another plot next to it later.
	comments_sentiment_plot = gr.Plot(label="Breakdown of Comments by Sentiment")


	analytics_plot_outputs = [
	analytics_status_md, posts_activity_plot, engagement_type_plot,
	mentions_activity_plot, mention_sentiment_plot,
	followers_count_plot, followers_growth_rate_plot,
	followers_by_location_plot, followers_by_role_plot,
	followers_by_industry_plot, followers_by_seniority_plot,
	engagement_rate_plot, reach_over_time_plot, impressions_over_time_plot,
	# --- Add new plot components to the output list in the correct order ---
	likes_over_time_plot, clicks_over_time_plot,
	shares_over_time_plot, comments_over_time_plot,
	comments_sentiment_plot
	]

	apply_filter_btn.click(
	fn=update_analytics_plots,
	inputs=[token_state, date_filter_selector, custom_start_date_picker, custom_end_date_picker],
	outputs=analytics_plot_outputs,
	show_progress="full"
	)

	# Also update analytics after sync
	sync_click_event.then(
	fn=update_analytics_plots,
	inputs=[token_state, date_filter_selector, custom_start_date_picker, custom_end_date_picker],
	outputs=analytics_plot_outputs,
	show_progress="full"
	)

	with gr.TabItem("3️⃣ Mentions", id="tab_mentions"):
	refresh_mentions_display_btn = gr.Button("🔄 Refresh Mentions Display (from local data)", variant="secondary")
	mentions_html = gr.HTML("Mentions data loads from Bubble after sync. Click refresh to view current local data.")
	mentions_sentiment_dist_plot = gr.Plot(label="Mention Sentiment Distribution")
	refresh_mentions_display_btn.click(
	fn=run_mentions_tab_display, inputs=[token_state],
	outputs=[mentions_html, mentions_sentiment_dist_plot],
	show_progress="full"
	)

	with gr.TabItem("4️⃣ Follower Stats", id="tab_follower_stats"):
	refresh_follower_stats_btn = gr.Button("🔄 Refresh Follower Stats Display (from local data)", variant="secondary")
	follower_stats_html = gr.HTML("Follower statistics load from Bubble after sync. Click refresh to view current local data.")
	with gr.Row():
	fs_plot_monthly_gains = gr.Plot(label="Monthly Follower Gains")
	with gr.Row():
	fs_plot_seniority = gr.Plot(label="Followers by Seniority (Top 10 Organic)")
	fs_plot_industry = gr.Plot(label="Followers by Industry (Top 10 Organic)")

	refresh_follower_stats_btn.click(
	fn=run_follower_stats_tab_display, inputs=[token_state],
	outputs=[follower_stats_html, fs_plot_monthly_gains, fs_plot_seniority, fs_plot_industry],
	show_progress="full"
	)


	if __name__ == "__main__":
	if not os.environ.get(LINKEDIN_CLIENT_ID_ENV_VAR):
	logging.warning(f"WARNING: '{LINKEDIN_CLIENT_ID_ENV_VAR}' environment variable not set.")
	if not os.environ.get(BUBBLE_APP_NAME_ENV_VAR) or \
	not os.environ.get(BUBBLE_API_KEY_PRIVATE_ENV_VAR) or \
	not os.environ.get(BUBBLE_API_ENDPOINT_ENV_VAR):
	logging.warning("WARNING: Bubble environment variables not fully set.")

	try:
	logging.info(f"Matplotlib version: {matplotlib.__version__} found. Backend: {matplotlib.get_backend()}")
	except ImportError:
	logging.error("Matplotlib is not installed. Plots will not be generated.")

	app.launch(server_name="0.0.0.0", server_port=7860, debug=True)