Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,909 +1,57 @@
|
|
|
|
|
| 1 |
# -- coding: utf-8 --
|
| 2 |
import gradio as gr
|
| 3 |
-
import
|
| 4 |
import os
|
| 5 |
import logging
|
| 6 |
-
import html
|
| 7 |
-
import pandas as pd
|
| 8 |
-
from datetime import datetime, timedelta, timezone # Added timezone
|
| 9 |
|
| 10 |
-
#
|
| 11 |
-
from
|
| 12 |
-
from
|
|
|
|
| 13 |
|
| 14 |
-
from
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
)
|
| 19 |
-
|
| 20 |
-
from
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
prepare_data_for_bubble, # For posts, stats, comments
|
| 26 |
-
fetch_linkedin_mentions_core,
|
| 27 |
-
analyze_mentions_sentiment, # For individual mentions
|
| 28 |
-
compile_detailed_mentions, # Compiles to user-specified format
|
| 29 |
-
prepare_mentions_for_bubble # Prepares user-specified format for Bubble
|
| 30 |
)
|
| 31 |
|
| 32 |
-
# Import follower stats function
|
| 33 |
-
from linkedin_follower_stats import get_linkedin_follower_stats
|
| 34 |
-
|
| 35 |
# Configure logging
|
| 36 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 37 |
|
| 38 |
-
# ---
|
| 39 |
-
DEFAULT_INITIAL_FETCH_COUNT = 10
|
| 40 |
-
LINKEDIN_POST_URN_KEY = 'id'
|
| 41 |
-
BUBBLE_POST_URN_COLUMN_NAME = 'id' # Assuming this is the unique post ID in Bubble
|
| 42 |
-
BUBBLE_POST_DATE_COLUMN_NAME = 'published_at' # Assuming this is the post publication date in Bubble
|
| 43 |
-
|
| 44 |
-
# Constants for Mentions
|
| 45 |
-
BUBBLE_MENTIONS_TABLE_NAME = "LI_mentions"
|
| 46 |
-
BUBBLE_MENTIONS_ID_COLUMN_NAME = "id" # Assuming this is the unique mention ID in Bubble
|
| 47 |
-
BUBBLE_MENTIONS_DATE_COLUMN_NAME = "date" # Assuming this is the mention date in Bubble
|
| 48 |
-
|
| 49 |
-
DEFAULT_MENTIONS_INITIAL_FETCH_COUNT = 20
|
| 50 |
-
DEFAULT_MENTIONS_UPDATE_FETCH_COUNT = 10
|
| 51 |
-
|
| 52 |
-
# Constants for Follower Stats
|
| 53 |
-
BUBBLE_FOLLOWER_STATS_TABLE_NAME = "LI_follower_stats"
|
| 54 |
-
FOLLOWER_STATS_CATEGORY_COLUMN = "category_name" # For demographics: name (e.g., "Engineering"), for monthly gains: date string 'YYYY-MM-DD'
|
| 55 |
-
FOLLOWER_STATS_TYPE_COLUMN = "follower_count_type" # e.g., "follower_seniority", "follower_gains_monthly"
|
| 56 |
-
FOLLOWER_STATS_ORG_URN_COLUMN = "organization_urn" # URN of the organization
|
| 57 |
-
FOLLOWER_STATS_ORGANIC_COLUMN = "follower_count_organic"
|
| 58 |
-
FOLLOWER_STATS_PAID_COLUMN = "follower_count_paid"
|
| 59 |
-
FOLLOWER_STATS_CATEGORY_COLUMN_DT = 'category_name_dt'
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
def check_token_status(token_state):
|
| 63 |
-
"""Checks the status of the LinkedIn token."""
|
| 64 |
-
return "β
Token available" if token_state and token_state.get("token") else "β Token not available"
|
| 65 |
-
|
| 66 |
-
def process_and_store_bubble_token(url_user_token, org_urn, token_state):
|
| 67 |
-
"""
|
| 68 |
-
Processes user token, fetches LinkedIn token, fetches existing Bubble data (posts, mentions, follower stats),
|
| 69 |
-
and determines if an initial fetch or update is needed for each data type.
|
| 70 |
-
Updates token state and UI for the sync button.
|
| 71 |
-
"""
|
| 72 |
-
logging.info(f"Processing token with URL user token: '{url_user_token}', Org URN: '{org_urn}'")
|
| 73 |
-
|
| 74 |
-
# Initialize or update state safely
|
| 75 |
-
new_state = token_state.copy() if token_state else {
|
| 76 |
-
"token": None, "client_id": None, "org_urn": None,
|
| 77 |
-
"bubble_posts_df": pd.DataFrame(), "fetch_count_for_api": 0,
|
| 78 |
-
"bubble_mentions_df": pd.DataFrame(),
|
| 79 |
-
"bubble_follower_stats_df": pd.DataFrame(),
|
| 80 |
-
"url_user_token_temp_storage": None
|
| 81 |
-
}
|
| 82 |
-
new_state.update({
|
| 83 |
-
"org_urn": org_urn,
|
| 84 |
-
"bubble_posts_df": new_state.get("bubble_posts_df", pd.DataFrame()), # Ensure DF exists
|
| 85 |
-
"fetch_count_for_api": new_state.get("fetch_count_for_api", 0),
|
| 86 |
-
"bubble_mentions_df": new_state.get("bubble_mentions_df", pd.DataFrame()), # Ensure DF exists
|
| 87 |
-
"bubble_follower_stats_df": new_state.get("bubble_follower_stats_df", pd.DataFrame()), # Ensure DF exists
|
| 88 |
-
"url_user_token_temp_storage": url_user_token
|
| 89 |
-
})
|
| 90 |
-
|
| 91 |
-
button_update = gr.update(visible=False, interactive=False, value="π Sync LinkedIn Data") # Default to hidden
|
| 92 |
-
|
| 93 |
-
client_id = os.environ.get("Linkedin_client_id")
|
| 94 |
-
new_state["client_id"] = client_id if client_id else "ENV VAR MISSING"
|
| 95 |
-
if not client_id: logging.error("CRITICAL ERROR: 'Linkedin_client_id' environment variable not set.")
|
| 96 |
-
|
| 97 |
-
# Fetch LinkedIn Token from Bubble
|
| 98 |
-
if url_user_token and "not found" not in url_user_token and "Could not access" not in url_user_token:
|
| 99 |
-
logging.info(f"Attempting to fetch LinkedIn token from Bubble with user token: {url_user_token}")
|
| 100 |
-
try:
|
| 101 |
-
parsed_linkedin_token = fetch_linkedin_token_from_bubble(url_user_token)
|
| 102 |
-
if isinstance(parsed_linkedin_token, dict) and "access_token" in parsed_linkedin_token:
|
| 103 |
-
new_state["token"] = parsed_linkedin_token
|
| 104 |
-
logging.info("β
LinkedIn Token successfully fetched from Bubble.")
|
| 105 |
-
else:
|
| 106 |
-
new_state["token"] = None
|
| 107 |
-
logging.warning(f"β Failed to fetch a valid LinkedIn token from Bubble. Response: {parsed_linkedin_token}")
|
| 108 |
-
except Exception as e:
|
| 109 |
-
new_state["token"] = None
|
| 110 |
-
logging.error(f"β Exception while fetching LinkedIn token from Bubble: {e}", exc_info=True)
|
| 111 |
-
else:
|
| 112 |
-
new_state["token"] = None
|
| 113 |
-
logging.info("No valid URL user token provided for LinkedIn token fetch, or an error was indicated.")
|
| 114 |
-
|
| 115 |
-
# Fetch existing data from Bubble if Org URN is available
|
| 116 |
-
current_org_urn = new_state.get("org_urn")
|
| 117 |
-
if current_org_urn:
|
| 118 |
-
# Fetch Posts from Bubble
|
| 119 |
-
logging.info(f"Attempting to fetch posts from Bubble for org_urn: {current_org_urn}")
|
| 120 |
-
try:
|
| 121 |
-
fetched_posts_df, error_message_posts = fetch_linkedin_posts_data_from_bubble(current_org_urn, "LI_posts") # Assuming "LI_posts" is the table name
|
| 122 |
-
new_state["bubble_posts_df"] = pd.DataFrame() if error_message_posts or fetched_posts_df is None else fetched_posts_df
|
| 123 |
-
if error_message_posts: logging.warning(f"Error fetching LI_posts from Bubble: {error_message_posts}.")
|
| 124 |
-
except Exception as e:
|
| 125 |
-
logging.error(f"β Error fetching posts from Bubble: {e}.", exc_info=True)
|
| 126 |
-
new_state["bubble_posts_df"] = pd.DataFrame()
|
| 127 |
-
|
| 128 |
-
# Fetch Mentions from Bubble
|
| 129 |
-
logging.info(f"Attempting to fetch mentions from Bubble for org_urn: {current_org_urn}")
|
| 130 |
-
try:
|
| 131 |
-
fetched_mentions_df, error_message_mentions = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_MENTIONS_TABLE_NAME)
|
| 132 |
-
new_state["bubble_mentions_df"] = pd.DataFrame() if error_message_mentions or fetched_mentions_df is None else fetched_mentions_df
|
| 133 |
-
if error_message_mentions: logging.warning(f"Error fetching {BUBBLE_MENTIONS_TABLE_NAME} from Bubble: {error_message_mentions}.")
|
| 134 |
-
except Exception as e:
|
| 135 |
-
logging.error(f"β Error fetching mentions from Bubble: {e}.", exc_info=True)
|
| 136 |
-
new_state["bubble_mentions_df"] = pd.DataFrame()
|
| 137 |
-
|
| 138 |
-
# Fetch Follower Stats from Bubble
|
| 139 |
-
logging.info(f"Attempting to fetch follower stats from Bubble for org_urn: {current_org_urn}")
|
| 140 |
-
try:
|
| 141 |
-
fetched_follower_stats_df, error_message_fs = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_FOLLOWER_STATS_TABLE_NAME)
|
| 142 |
-
new_state["bubble_follower_stats_df"] = pd.DataFrame() if error_message_fs or fetched_follower_stats_df is None else fetched_follower_stats_df
|
| 143 |
-
if error_message_fs: logging.warning(f"Error fetching {BUBBLE_FOLLOWER_STATS_TABLE_NAME} from Bubble: {error_message_fs}.")
|
| 144 |
-
except Exception as e:
|
| 145 |
-
logging.error(f"β Error fetching follower stats from Bubble: {e}.", exc_info=True)
|
| 146 |
-
new_state["bubble_follower_stats_df"] = pd.DataFrame()
|
| 147 |
-
else:
|
| 148 |
-
logging.warning("Org URN not available in state. Cannot fetch data from Bubble.")
|
| 149 |
-
new_state["bubble_posts_df"] = pd.DataFrame()
|
| 150 |
-
new_state["bubble_mentions_df"] = pd.DataFrame()
|
| 151 |
-
new_state["bubble_follower_stats_df"] = pd.DataFrame()
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
# Determine fetch count for Posts API
|
| 155 |
-
if new_state["bubble_posts_df"].empty:
|
| 156 |
-
logging.info(f"βΉοΈ No posts in Bubble. Setting to fetch initial {DEFAULT_INITIAL_FETCH_COUNT} posts.")
|
| 157 |
-
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
| 158 |
-
else:
|
| 159 |
-
try:
|
| 160 |
-
df_posts_check = new_state["bubble_posts_df"].copy() # Use .copy()
|
| 161 |
-
if BUBBLE_POST_DATE_COLUMN_NAME not in df_posts_check.columns or df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME].isnull().all():
|
| 162 |
-
logging.warning(f"Date column '{BUBBLE_POST_DATE_COLUMN_NAME}' for posts missing or all null values. Triggering initial fetch.")
|
| 163 |
-
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
| 164 |
-
else:
|
| 165 |
-
df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME] = pd.to_datetime(df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME], errors='coerce', utc=True)
|
| 166 |
-
last_post_date_utc = df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME].dropna().max()
|
| 167 |
-
if pd.isna(last_post_date_utc): # No valid dates found after conversion
|
| 168 |
-
logging.warning("No valid post dates found after conversion. Triggering initial fetch.")
|
| 169 |
-
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
| 170 |
-
else:
|
| 171 |
-
days_diff = (pd.Timestamp('now', tz='UTC').normalize() - last_post_date_utc.normalize()).days
|
| 172 |
-
if days_diff >= 7:
|
| 173 |
-
# Fetch more if data is older, e.g., 10 posts per week of difference
|
| 174 |
-
new_state['fetch_count_for_api'] = max(1, days_diff // 7) * 10
|
| 175 |
-
logging.info(f"Posts data is {days_diff} days old. Setting fetch count to {new_state['fetch_count_for_api']}.")
|
| 176 |
-
else:
|
| 177 |
-
new_state['fetch_count_for_api'] = 0 # Data is recent
|
| 178 |
-
logging.info("Posts data is recent. No new posts fetch needed based on date.")
|
| 179 |
-
except Exception as e:
|
| 180 |
-
logging.error(f"Error processing post dates: {e}. Defaulting to initial fetch for posts.", exc_info=True)
|
| 181 |
-
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
| 182 |
-
|
| 183 |
-
# Determine if Mentions need sync
|
| 184 |
-
mentions_need_sync = False
|
| 185 |
-
if new_state["bubble_mentions_df"].empty:
|
| 186 |
-
mentions_need_sync = True
|
| 187 |
-
logging.info("Mentions need sync: Bubble mentions DF is empty.")
|
| 188 |
-
else:
|
| 189 |
-
# Check if the crucial date column exists and has any non-null values
|
| 190 |
-
if BUBBLE_MENTIONS_DATE_COLUMN_NAME not in new_state["bubble_mentions_df"].columns or \
|
| 191 |
-
new_state["bubble_mentions_df"][BUBBLE_MENTIONS_DATE_COLUMN_NAME].isnull().all():
|
| 192 |
-
mentions_need_sync = True
|
| 193 |
-
logging.info(f"Mentions need sync: Date column '{BUBBLE_MENTIONS_DATE_COLUMN_NAME}' missing or all null values.")
|
| 194 |
-
else:
|
| 195 |
-
df_mentions_check = new_state["bubble_mentions_df"].copy() # Use .copy()
|
| 196 |
-
df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce', utc=True)
|
| 197 |
-
last_mention_date_utc = df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME].dropna().max()
|
| 198 |
-
# Sync if no valid last mention date or if it's 7 days or older
|
| 199 |
-
if pd.isna(last_mention_date_utc) or \
|
| 200 |
-
(pd.Timestamp('now', tz='UTC').normalize() - last_mention_date_utc.normalize()).days >= 7:
|
| 201 |
-
mentions_need_sync = True
|
| 202 |
-
logging.info(f"Mentions need sync: Last mention date {last_mention_date_utc} is old or invalid.")
|
| 203 |
-
else:
|
| 204 |
-
logging.info(f"Mentions up-to-date. Last mention: {last_mention_date_utc}")
|
| 205 |
-
|
| 206 |
-
# Determine if Follower Stats need sync
|
| 207 |
-
follower_stats_need_sync = False
|
| 208 |
-
fs_df = new_state.get("bubble_follower_stats_df", pd.DataFrame())
|
| 209 |
-
if fs_df.empty:
|
| 210 |
-
follower_stats_need_sync = True
|
| 211 |
-
logging.info("Follower stats need sync: Bubble follower stats DF is empty.")
|
| 212 |
-
else:
|
| 213 |
-
# Check monthly gains data
|
| 214 |
-
monthly_gains_df = fs_df[fs_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].copy() # Use .copy()
|
| 215 |
-
if monthly_gains_df.empty:
|
| 216 |
-
follower_stats_need_sync = True
|
| 217 |
-
logging.info("Follower stats need sync: No monthly gains data in Bubble.")
|
| 218 |
-
elif FOLLOWER_STATS_CATEGORY_COLUMN not in monthly_gains_df.columns:
|
| 219 |
-
follower_stats_need_sync = True
|
| 220 |
-
logging.info(f"Follower stats need sync: Date column '{FOLLOWER_STATS_CATEGORY_COLUMN}' missing in monthly gains.")
|
| 221 |
-
else:
|
| 222 |
-
# Ensure date conversion does not raise SettingWithCopyWarning by using .loc
|
| 223 |
-
monthly_gains_df.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN] = pd.to_datetime(monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce').dt.normalize()
|
| 224 |
-
last_gain_date = monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN].dropna().max()
|
| 225 |
-
if pd.isna(last_gain_date): # No valid dates after conversion
|
| 226 |
-
follower_stats_need_sync = True
|
| 227 |
-
logging.info("Follower stats need sync: No valid dates in monthly gains after conversion.")
|
| 228 |
-
else:
|
| 229 |
-
|
| 230 |
-
# CRITICAL FIX: Ensure last_gain_date is timezone-aware (UTC) before comparison.
|
| 231 |
-
# pd.Timestamp.tzinfo is None checks if it's naive.
|
| 232 |
-
# pd.Timestamp.tzinfo.utcoffset(pd.Timestamp) is None is a more robust check for naivety.
|
| 233 |
-
if last_gain_date.tzinfo is None or last_gain_date.tzinfo.utcoffset(last_gain_date) is None:
|
| 234 |
-
# If last_gain_date is naive, localize it to UTC.
|
| 235 |
-
# This assumes naive dates should be interpreted as UTC.
|
| 236 |
-
last_gain_date = last_gain_date.tz_localize('UTC')
|
| 237 |
-
logging.info(f"Localized naive last_gain_date to UTC: {last_gain_date}")
|
| 238 |
-
else:
|
| 239 |
-
# If last_gain_date is already timezone-aware, convert it to UTC.
|
| 240 |
-
# This handles cases where it might be aware but in a different timezone.
|
| 241 |
-
# If it's already UTC, tz_convert('UTC') is a no-op.
|
| 242 |
-
last_gain_date = last_gain_date.tz_convert('UTC')
|
| 243 |
-
logging.info(f"Converted aware last_gain_date to UTC: {last_gain_date}")
|
| 244 |
-
|
| 245 |
-
# Sync if the last recorded gain is for a month *before* the start of the current month.
|
| 246 |
-
# This ensures we attempt to fetch the previous month's data if it's not there.
|
| 247 |
-
start_of_current_month = pd.Timestamp('now', tz='UTC').normalize().replace(day=1)
|
| 248 |
-
if last_gain_date < start_of_current_month:
|
| 249 |
-
follower_stats_need_sync = True
|
| 250 |
-
logging.info(f"Follower stats need sync: Last gain date {last_gain_date} is before current month start {start_of_current_month}.")
|
| 251 |
-
else:
|
| 252 |
-
logging.info(f"Follower monthly gains up-to-date. Last gain recorded on: {last_gain_date}")
|
| 253 |
-
|
| 254 |
-
# Also trigger sync if demographic data (non-monthly gains) is missing entirely
|
| 255 |
-
# This is a basic check; more granular checks could be added for specific demographic types if needed.
|
| 256 |
-
if fs_df[fs_df[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'].empty:
|
| 257 |
-
follower_stats_need_sync = True
|
| 258 |
-
logging.info("Follower stats need sync: Demographic data (non-monthly types) missing.")
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
# Update Sync Button based on token and needed actions
|
| 262 |
-
sync_actions = []
|
| 263 |
-
if new_state['fetch_count_for_api'] > 0:
|
| 264 |
-
sync_actions.append(f"{new_state['fetch_count_for_api']} Posts")
|
| 265 |
-
if mentions_need_sync:
|
| 266 |
-
sync_actions.append("Mentions")
|
| 267 |
-
if follower_stats_need_sync:
|
| 268 |
-
sync_actions.append("Follower Stats")
|
| 269 |
-
|
| 270 |
-
if new_state["token"] and sync_actions: # Token present and actions needed
|
| 271 |
-
button_label = f"π Sync LinkedIn Data ({', '.join(sync_actions)})"
|
| 272 |
-
button_update = gr.update(value=button_label, visible=True, interactive=True)
|
| 273 |
-
elif new_state["token"]: # Token present but nothing to sync
|
| 274 |
-
button_label = "β
Data Up-to-Date"
|
| 275 |
-
button_update = gr.update(value=button_label, visible=True, interactive=False) # Visible but not interactive
|
| 276 |
-
else: # No token
|
| 277 |
-
button_update = gr.update(visible=False, interactive=False) # Keep hidden
|
| 278 |
-
|
| 279 |
-
token_status_message = check_token_status(new_state)
|
| 280 |
-
logging.info(f"Token processing complete. Status: {token_status_message}. Button: {button_update}. Sync actions: {sync_actions}")
|
| 281 |
-
return token_status_message, new_state, button_update
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
def sync_linkedin_mentions(token_state):
|
| 285 |
-
"""Fetches new LinkedIn mentions and uploads them to Bubble."""
|
| 286 |
-
logging.info("Starting LinkedIn mentions sync process.")
|
| 287 |
-
if not token_state or not token_state.get("token"):
|
| 288 |
-
logging.error("Mentions sync: Access denied. No LinkedIn token.")
|
| 289 |
-
return "Mentions: No token. ", token_state
|
| 290 |
-
|
| 291 |
-
client_id = token_state.get("client_id")
|
| 292 |
-
token_dict = token_state.get("token")
|
| 293 |
-
org_urn = token_state.get('org_urn')
|
| 294 |
-
bubble_mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame()).copy() # Work with a copy
|
| 295 |
-
|
| 296 |
-
if not org_urn or not client_id or client_id == "ENV VAR MISSING":
|
| 297 |
-
logging.error("Mentions sync: Configuration error (Org URN or Client ID missing).")
|
| 298 |
-
return "Mentions: Config error. ", token_state
|
| 299 |
-
|
| 300 |
-
# Determine if mentions sync is needed and how many to fetch
|
| 301 |
-
fetch_count_for_mentions_api = 0
|
| 302 |
-
mentions_sync_is_needed_now = False
|
| 303 |
-
if bubble_mentions_df.empty:
|
| 304 |
-
mentions_sync_is_needed_now = True
|
| 305 |
-
fetch_count_for_mentions_api = DEFAULT_MENTIONS_INITIAL_FETCH_COUNT
|
| 306 |
-
logging.info("Mentions sync needed: Bubble DF empty. Fetching initial count.")
|
| 307 |
-
else:
|
| 308 |
-
if BUBBLE_MENTIONS_DATE_COLUMN_NAME not in bubble_mentions_df.columns or \
|
| 309 |
-
bubble_mentions_df[BUBBLE_MENTIONS_DATE_COLUMN_NAME].isnull().all():
|
| 310 |
-
mentions_sync_is_needed_now = True
|
| 311 |
-
fetch_count_for_mentions_api = DEFAULT_MENTIONS_INITIAL_FETCH_COUNT
|
| 312 |
-
logging.info(f"Mentions sync needed: Date column '{BUBBLE_MENTIONS_DATE_COLUMN_NAME}' missing or all null. Fetching initial count.")
|
| 313 |
-
else:
|
| 314 |
-
mentions_df_copy = bubble_mentions_df.copy() # Redundant copy, already copied above
|
| 315 |
-
mentions_df_copy[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(mentions_df_copy[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce', utc=True)
|
| 316 |
-
last_mention_date_utc = mentions_df_copy[BUBBLE_MENTIONS_DATE_COLUMN_NAME].dropna().max()
|
| 317 |
-
if pd.isna(last_mention_date_utc) or \
|
| 318 |
-
(pd.Timestamp('now', tz='UTC').normalize() - last_mention_date_utc.normalize()).days >= 7:
|
| 319 |
-
mentions_sync_is_needed_now = True
|
| 320 |
-
fetch_count_for_mentions_api = DEFAULT_MENTIONS_UPDATE_FETCH_COUNT # Fetch update count if data is old
|
| 321 |
-
logging.info(f"Mentions sync needed: Last mention date {last_mention_date_utc} is old or invalid. Fetching update count.")
|
| 322 |
-
|
| 323 |
-
if not mentions_sync_is_needed_now:
|
| 324 |
-
logging.info("Mentions data is fresh based on current check. No API fetch needed for mentions.")
|
| 325 |
-
return "Mentions: Up-to-date. ", token_state
|
| 326 |
-
|
| 327 |
-
logging.info(f"Mentions sync proceeding. Fetch count: {fetch_count_for_mentions_api}")
|
| 328 |
-
|
| 329 |
-
try:
|
| 330 |
-
processed_raw_mentions = fetch_linkedin_mentions_core(client_id, token_dict, org_urn, count=fetch_count_for_mentions_api)
|
| 331 |
-
if not processed_raw_mentions:
|
| 332 |
-
logging.info("Mentions sync: No new mentions found via API.")
|
| 333 |
-
return "Mentions: None found via API. ", token_state
|
| 334 |
-
|
| 335 |
-
existing_mention_ids = set()
|
| 336 |
-
if not bubble_mentions_df.empty and BUBBLE_MENTIONS_ID_COLUMN_NAME in bubble_mentions_df.columns:
|
| 337 |
-
# Ensure IDs are strings for reliable comparison, handling potential NaNs
|
| 338 |
-
existing_mention_ids = set(bubble_mentions_df[BUBBLE_MENTIONS_ID_COLUMN_NAME].dropna().astype(str))
|
| 339 |
-
|
| 340 |
-
sentiments_map = analyze_mentions_sentiment(processed_raw_mentions) # Assumes this returns a map {mention_id: sentiment_data}
|
| 341 |
-
all_compiled_mentions = compile_detailed_mentions(processed_raw_mentions, sentiments_map) # Assumes this adds sentiment to each mention dict
|
| 342 |
-
|
| 343 |
-
# Filter out mentions already in Bubble
|
| 344 |
-
new_compiled_mentions_to_upload = [
|
| 345 |
-
m for m in all_compiled_mentions if str(m.get("id")) not in existing_mention_ids
|
| 346 |
-
]
|
| 347 |
-
|
| 348 |
-
if not new_compiled_mentions_to_upload:
|
| 349 |
-
logging.info("Mentions sync: All fetched mentions are already in Bubble.")
|
| 350 |
-
return "Mentions: All fetched already in Bubble. ", token_state
|
| 351 |
-
|
| 352 |
-
bubble_ready_mentions = prepare_mentions_for_bubble(new_compiled_mentions_to_upload) # Prepare for Bubble format
|
| 353 |
-
if bubble_ready_mentions:
|
| 354 |
-
bulk_upload_to_bubble(bubble_ready_mentions, BUBBLE_MENTIONS_TABLE_NAME)
|
| 355 |
-
logging.info(f"Successfully uploaded {len(bubble_ready_mentions)} new mentions to Bubble.")
|
| 356 |
-
# Update in-memory DataFrame
|
| 357 |
-
updated_mentions_df = pd.concat([bubble_mentions_df, pd.DataFrame(bubble_ready_mentions)], ignore_index=True)
|
| 358 |
-
# Drop duplicates based on ID, keeping the latest (which would be the newly added ones if IDs overlapped, though logic above should prevent this)
|
| 359 |
-
token_state["bubble_mentions_df"] = updated_mentions_df.drop_duplicates(subset=[BUBBLE_MENTIONS_ID_COLUMN_NAME], keep='last')
|
| 360 |
-
return f"Mentions: Synced {len(bubble_ready_mentions)} new. ", token_state
|
| 361 |
-
else:
|
| 362 |
-
logging.info("Mentions sync: No new mentions were prepared for Bubble upload (possibly all filtered or empty after prep).")
|
| 363 |
-
return "Mentions: No new ones to upload. ", token_state
|
| 364 |
-
except ValueError as ve: # Catch specific errors if your API calls raise them
|
| 365 |
-
logging.error(f"ValueError during mentions sync: {ve}", exc_info=True)
|
| 366 |
-
return f"Mentions Error: {html.escape(str(ve))}. ", token_state
|
| 367 |
-
except Exception as e:
|
| 368 |
-
logging.exception("Unexpected error in sync_linkedin_mentions.") # Logs full traceback
|
| 369 |
-
return f"Mentions: Unexpected error ({type(e).__name__}). ", token_state
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
def sync_linkedin_follower_stats(token_state):
|
| 373 |
-
"""Fetches new LinkedIn follower statistics and uploads them to Bubble."""
|
| 374 |
-
logging.info("Starting LinkedIn follower stats sync process.")
|
| 375 |
-
if not token_state or not token_state.get("token"):
|
| 376 |
-
logging.error("Follower Stats sync: Access denied. No LinkedIn token.")
|
| 377 |
-
return "Follower Stats: No token. ", token_state
|
| 378 |
-
|
| 379 |
-
client_id = token_state.get("client_id")
|
| 380 |
-
token_dict = token_state.get("token")
|
| 381 |
-
org_urn = token_state.get('org_urn')
|
| 382 |
-
|
| 383 |
-
if not org_urn or not client_id or client_id == "ENV VAR MISSING":
|
| 384 |
-
logging.error("Follower Stats sync: Configuration error (Org URN or Client ID missing).")
|
| 385 |
-
return "Follower Stats: Config error. ", token_state
|
| 386 |
-
|
| 387 |
-
# Determine if follower stats sync is needed (logic copied and adapted from process_and_store_bubble_token)
|
| 388 |
-
follower_stats_sync_is_needed_now = False
|
| 389 |
-
fs_df_current = token_state.get("bubble_follower_stats_df", pd.DataFrame()).copy() # Work with a copy
|
| 390 |
-
if fs_df_current.empty:
|
| 391 |
-
follower_stats_sync_is_needed_now = True
|
| 392 |
-
logging.info("Follower stats sync needed: Bubble DF is empty.")
|
| 393 |
-
else:
|
| 394 |
-
monthly_gains_df = fs_df_current[fs_df_current[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].copy()
|
| 395 |
-
if monthly_gains_df.empty or FOLLOWER_STATS_CATEGORY_COLUMN not in monthly_gains_df.columns:
|
| 396 |
-
follower_stats_sync_is_needed_now = True
|
| 397 |
-
logging.info("Follower stats sync needed: Monthly gains data missing or date column absent.")
|
| 398 |
-
else:
|
| 399 |
-
monthly_gains_df.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN] = pd.to_datetime(monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce').dt.normalize()
|
| 400 |
-
last_gain_date = monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN].dropna().max()
|
| 401 |
-
start_of_current_month = pd.Timestamp('now', tz='UTC').normalize().replace(day=1)
|
| 402 |
-
if pd.isna(last_gain_date) or last_gain_date < start_of_current_month:
|
| 403 |
-
follower_stats_sync_is_needed_now = True
|
| 404 |
-
logging.info(f"Follower stats sync needed: Last gain date {last_gain_date} is old or invalid.")
|
| 405 |
-
|
| 406 |
-
if fs_df_current[fs_df_current[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'].empty:
|
| 407 |
-
follower_stats_sync_is_needed_now = True
|
| 408 |
-
logging.info("Follower stats sync needed: Demographic data (non-monthly) is missing.")
|
| 409 |
-
|
| 410 |
-
if not follower_stats_sync_is_needed_now:
|
| 411 |
-
logging.info("Follower stats data is fresh based on current check. No API fetch needed.")
|
| 412 |
-
return "Follower Stats: Data up-to-date. ", token_state
|
| 413 |
-
|
| 414 |
-
logging.info(f"Follower stats sync proceeding for org_urn: {org_urn}")
|
| 415 |
-
try:
|
| 416 |
-
# This function should return a list of dicts, each dict representing a stat entry
|
| 417 |
-
api_follower_stats = get_linkedin_follower_stats(client_id, token_dict, org_urn)
|
| 418 |
-
if not api_follower_stats: # api_follower_stats could be None or empty list
|
| 419 |
-
logging.info(f"Follower Stats sync: No stats found via API for org {org_urn}.")
|
| 420 |
-
return "Follower Stats: None found via API. ", token_state
|
| 421 |
-
|
| 422 |
-
bubble_follower_stats_df_orig = token_state.get("bubble_follower_stats_df", pd.DataFrame()).copy()
|
| 423 |
-
new_stats_to_upload = []
|
| 424 |
-
|
| 425 |
-
# --- Process Monthly Gains ---
|
| 426 |
-
api_monthly_gains = [s for s in api_follower_stats if s.get(FOLLOWER_STATS_TYPE_COLUMN) == 'follower_gains_monthly']
|
| 427 |
-
existing_monthly_gain_dates = set()
|
| 428 |
-
if not bubble_follower_stats_df_orig.empty:
|
| 429 |
-
bubble_monthly_df = bubble_follower_stats_df_orig[bubble_follower_stats_df_orig[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly']
|
| 430 |
-
if FOLLOWER_STATS_CATEGORY_COLUMN in bubble_monthly_df.columns:
|
| 431 |
-
# Ensure dates are strings for set comparison, handle potential NaNs from to_datetime if any
|
| 432 |
-
existing_monthly_gain_dates = set(bubble_monthly_df[FOLLOWER_STATS_CATEGORY_COLUMN].astype(str).unique())
|
| 433 |
-
|
| 434 |
-
for gain_stat in api_monthly_gains:
|
| 435 |
-
# category_name for monthly gains is 'YYYY-MM-DD' string from linkedin_follower_stats
|
| 436 |
-
if str(gain_stat.get(FOLLOWER_STATS_CATEGORY_COLUMN)) not in existing_monthly_gain_dates:
|
| 437 |
-
new_stats_to_upload.append(gain_stat)
|
| 438 |
-
|
| 439 |
-
# --- Process Demographics (add if new or different counts) ---
|
| 440 |
-
api_demographics = [s for s in api_follower_stats if s.get(FOLLOWER_STATS_TYPE_COLUMN) != 'follower_gains_monthly']
|
| 441 |
-
|
| 442 |
-
# Create a map of existing demographics for quick lookup and comparison
|
| 443 |
-
# Key: (org_urn, type, category_name) -> (organic_count, paid_count)
|
| 444 |
-
existing_demographics_map = {}
|
| 445 |
-
if not bubble_follower_stats_df_orig.empty:
|
| 446 |
-
bubble_demographics_df = bubble_follower_stats_df_orig[bubble_follower_stats_df_orig[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly']
|
| 447 |
-
if not bubble_demographics_df.empty and \
|
| 448 |
-
all(col in bubble_demographics_df.columns for col in [
|
| 449 |
-
FOLLOWER_STATS_ORG_URN_COLUMN, FOLLOWER_STATS_TYPE_COLUMN,
|
| 450 |
-
FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN,
|
| 451 |
-
FOLLOWER_STATS_PAID_COLUMN
|
| 452 |
-
]):
|
| 453 |
-
for _, row in bubble_demographics_df.iterrows():
|
| 454 |
-
key = (
|
| 455 |
-
str(row[FOLLOWER_STATS_ORG_URN_COLUMN]),
|
| 456 |
-
str(row[FOLLOWER_STATS_TYPE_COLUMN]),
|
| 457 |
-
str(row[FOLLOWER_STATS_CATEGORY_COLUMN])
|
| 458 |
-
)
|
| 459 |
-
existing_demographics_map[key] = (
|
| 460 |
-
row[FOLLOWER_STATS_ORGANIC_COLUMN],
|
| 461 |
-
row[FOLLOWER_STATS_PAID_COLUMN]
|
| 462 |
-
)
|
| 463 |
-
|
| 464 |
-
for demo_stat in api_demographics:
|
| 465 |
-
key = (
|
| 466 |
-
str(demo_stat.get(FOLLOWER_STATS_ORG_URN_COLUMN)),
|
| 467 |
-
str(demo_stat.get(FOLLOWER_STATS_TYPE_COLUMN)),
|
| 468 |
-
str(demo_stat.get(FOLLOWER_STATS_CATEGORY_COLUMN))
|
| 469 |
-
)
|
| 470 |
-
api_counts = (
|
| 471 |
-
demo_stat.get(FOLLOWER_STATS_ORGANIC_COLUMN, 0),
|
| 472 |
-
demo_stat.get(FOLLOWER_STATS_PAID_COLUMN, 0)
|
| 473 |
-
)
|
| 474 |
-
|
| 475 |
-
if key not in existing_demographics_map or existing_demographics_map[key] != api_counts:
|
| 476 |
-
new_stats_to_upload.append(demo_stat)
|
| 477 |
-
|
| 478 |
-
if not new_stats_to_upload:
|
| 479 |
-
logging.info(f"Follower Stats sync: Data for org {org_urn} is up-to-date or no changes found.")
|
| 480 |
-
return "Follower Stats: Data up-to-date or no changes. ", token_state
|
| 481 |
-
|
| 482 |
-
bulk_upload_to_bubble(new_stats_to_upload, BUBBLE_FOLLOWER_STATS_TABLE_NAME)
|
| 483 |
-
logging.info(f"Successfully uploaded {len(new_stats_to_upload)} follower stat entries to Bubble for org {org_urn}.")
|
| 484 |
-
|
| 485 |
-
# Update in-memory DataFrame: Concatenate old and new, then drop duplicates strategically
|
| 486 |
-
temp_df = pd.concat([bubble_follower_stats_df_orig, pd.DataFrame(new_stats_to_upload)], ignore_index=True)
|
| 487 |
-
|
| 488 |
-
# For monthly gains, unique by org, date (category_name)
|
| 489 |
-
monthly_part = temp_df[temp_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].drop_duplicates(
|
| 490 |
-
subset=[FOLLOWER_STATS_ORG_URN_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN],
|
| 491 |
-
keep='last' # Keep the newest entry if dates somehow collide (shouldn't with current logic)
|
| 492 |
-
)
|
| 493 |
-
# For demographics, unique by org, type, and category_name
|
| 494 |
-
demographics_part = temp_df[temp_df[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'].drop_duplicates(
|
| 495 |
-
subset=[FOLLOWER_STATS_ORG_URN_COLUMN, FOLLOWER_STATS_TYPE_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN],
|
| 496 |
-
keep='last' # This ensures that if a demographic was "updated", the new version is kept
|
| 497 |
-
)
|
| 498 |
-
token_state["bubble_follower_stats_df"] = pd.concat([monthly_part, demographics_part], ignore_index=True)
|
| 499 |
-
|
| 500 |
-
return f"Follower Stats: Synced {len(new_stats_to_upload)} entries. ", token_state
|
| 501 |
-
except ValueError as ve: # Catch specific errors if your API calls raise them
|
| 502 |
-
logging.error(f"ValueError during follower stats sync for {org_urn}: {ve}", exc_info=True)
|
| 503 |
-
return f"Follower Stats Error: {html.escape(str(ve))}. ", token_state
|
| 504 |
-
except Exception as e:
|
| 505 |
-
logging.exception(f"Unexpected error in sync_linkedin_follower_stats for {org_urn}.") # Logs full traceback
|
| 506 |
-
return f"Follower Stats: Unexpected error ({type(e).__name__}). ", token_state
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
def sync_all_linkedin_data(token_state):
|
| 510 |
-
"""Orchestrates the syncing of all LinkedIn data types (Posts, Mentions, Follower Stats)."""
|
| 511 |
-
logging.info("Starting sync_all_linkedin_data process.")
|
| 512 |
-
if not token_state or not token_state.get("token"):
|
| 513 |
-
logging.error("Sync All: Access denied. LinkedIn token not available.")
|
| 514 |
-
return "<p style='color:red; text-align:center;'>β Access denied. LinkedIn token not available.</p>", token_state
|
| 515 |
-
|
| 516 |
-
client_id = token_state.get("client_id")
|
| 517 |
-
token_dict = token_state.get("token")
|
| 518 |
-
org_urn = token_state.get('org_urn')
|
| 519 |
-
fetch_count_for_posts_api = token_state.get('fetch_count_for_api', 0)
|
| 520 |
-
# Operate on copies to avoid modifying original DFs in state directly until the end
|
| 521 |
-
bubble_posts_df_orig = token_state.get("bubble_posts_df", pd.DataFrame()).copy()
|
| 522 |
-
|
| 523 |
-
posts_sync_message = ""
|
| 524 |
-
mentions_sync_message = ""
|
| 525 |
-
follower_stats_sync_message = ""
|
| 526 |
-
|
| 527 |
-
if not org_urn:
|
| 528 |
-
logging.error("Sync All: Org URN missing in token_state.")
|
| 529 |
-
return "<p style='color:red;'>β Config error: Org URN missing.</p>", token_state
|
| 530 |
-
if not client_id or client_id == "ENV VAR MISSING":
|
| 531 |
-
logging.error("Sync All: Client ID missing or not set.")
|
| 532 |
-
return "<p style='color:red;'>β Config error: Client ID missing.</p>", token_state
|
| 533 |
-
|
| 534 |
-
# --- Sync Posts ---
|
| 535 |
-
if fetch_count_for_posts_api == 0:
|
| 536 |
-
posts_sync_message = "Posts: Already up-to-date. "
|
| 537 |
-
logging.info("Posts sync: Skipped as fetch_count_for_posts_api is 0.")
|
| 538 |
-
else:
|
| 539 |
-
logging.info(f"Posts sync: Starting fetch for {fetch_count_for_posts_api} posts.")
|
| 540 |
-
try:
|
| 541 |
-
# fetch_linkedin_posts_core is expected to return: (processed_raw_posts, stats_map, errors_list)
|
| 542 |
-
processed_raw_posts, stats_map, _ = fetch_linkedin_posts_core(client_id, token_dict, org_urn, count=fetch_count_for_posts_api)
|
| 543 |
-
|
| 544 |
-
if not processed_raw_posts:
|
| 545 |
-
posts_sync_message = "Posts: None found via API. "
|
| 546 |
-
logging.info("Posts sync: No raw posts returned from API.")
|
| 547 |
-
else:
|
| 548 |
-
existing_post_urns = set()
|
| 549 |
-
if not bubble_posts_df_orig.empty and BUBBLE_POST_URN_COLUMN_NAME in bubble_posts_df_orig.columns:
|
| 550 |
-
existing_post_urns = set(bubble_posts_df_orig[BUBBLE_POST_URN_COLUMN_NAME].dropna().astype(str))
|
| 551 |
-
|
| 552 |
-
# Filter out posts already in Bubble
|
| 553 |
-
new_raw_posts = [p for p in processed_raw_posts if str(p.get(LINKEDIN_POST_URN_KEY)) not in existing_post_urns]
|
| 554 |
-
|
| 555 |
-
if not new_raw_posts:
|
| 556 |
-
posts_sync_message = "Posts: All fetched already in Bubble. "
|
| 557 |
-
logging.info("Posts sync: All fetched posts were already found in Bubble.")
|
| 558 |
-
else:
|
| 559 |
-
logging.info(f"Posts sync: Processing {len(new_raw_posts)} new raw posts.")
|
| 560 |
-
post_urns_to_process = [p[LINKEDIN_POST_URN_KEY] for p in new_raw_posts if p.get(LINKEDIN_POST_URN_KEY)]
|
| 561 |
-
|
| 562 |
-
all_comments_data = fetch_comments(client_id, token_dict, post_urns_to_process, stats_map)
|
| 563 |
-
sentiments_per_post = analyze_sentiment(all_comments_data) # Assumes analysis of comments
|
| 564 |
-
detailed_new_posts = compile_detailed_posts(new_raw_posts, stats_map, sentiments_per_post) # Compiles with stats and sentiment
|
| 565 |
-
|
| 566 |
-
# prepare_data_for_bubble should return tuple: (posts_for_bubble, post_stats_for_bubble, post_comments_for_bubble)
|
| 567 |
-
li_posts, li_post_stats, li_post_comments = prepare_data_for_bubble(detailed_new_posts, all_comments_data)
|
| 568 |
-
|
| 569 |
-
if li_posts: # If there are posts to upload
|
| 570 |
-
bulk_upload_to_bubble(li_posts, "LI_posts")
|
| 571 |
-
# Update in-memory DataFrame for posts
|
| 572 |
-
updated_posts_df = pd.concat([bubble_posts_df_orig, pd.DataFrame(li_posts)], ignore_index=True)
|
| 573 |
-
token_state["bubble_posts_df"] = updated_posts_df.drop_duplicates(subset=[BUBBLE_POST_URN_COLUMN_NAME], keep='last')
|
| 574 |
-
logging.info(f"Posts sync: Uploaded {len(li_posts)} new posts to Bubble.")
|
| 575 |
-
|
| 576 |
-
if li_post_stats:
|
| 577 |
-
bulk_upload_to_bubble(li_post_stats, "LI_post_stats")
|
| 578 |
-
logging.info(f"Posts sync: Uploaded {len(li_post_stats)} post_stats entries.")
|
| 579 |
-
# Note: Consider how/if to update a local stats_df in token_state if you maintain one.
|
| 580 |
-
if li_post_comments:
|
| 581 |
-
bulk_upload_to_bubble(li_post_comments, "LI_post_comments")
|
| 582 |
-
logging.info(f"Posts sync: Uploaded {len(li_post_comments)} post_comments entries.")
|
| 583 |
-
# Note: Consider how/if to update a local comments_df in token_state.
|
| 584 |
-
|
| 585 |
-
posts_sync_message = f"Posts: Synced {len(li_posts)} new. "
|
| 586 |
-
else:
|
| 587 |
-
posts_sync_message = "Posts: No new ones to upload after processing. "
|
| 588 |
-
logging.info("Posts sync: No new posts were prepared for Bubble upload.")
|
| 589 |
-
except ValueError as ve: # Catch specific errors from your API calls
|
| 590 |
-
posts_sync_message = f"Posts Error: {html.escape(str(ve))}. "
|
| 591 |
-
logging.error(f"Posts sync: ValueError: {ve}", exc_info=True)
|
| 592 |
-
except Exception as e:
|
| 593 |
-
logging.exception("Posts sync: Unexpected error during processing.") # Logs full traceback
|
| 594 |
-
posts_sync_message = f"Posts: Unexpected error ({type(e).__name__}). "
|
| 595 |
-
|
| 596 |
-
# --- Sync Mentions ---
|
| 597 |
-
# The sync_linkedin_mentions function updates token_state["bubble_mentions_df"] internally
|
| 598 |
-
mentions_sync_message, token_state = sync_linkedin_mentions(token_state)
|
| 599 |
-
|
| 600 |
-
# --- Sync Follower Stats ---
|
| 601 |
-
# The sync_linkedin_follower_stats function updates token_state["bubble_follower_stats_df"] internally
|
| 602 |
-
follower_stats_sync_message, token_state = sync_linkedin_follower_stats(token_state)
|
| 603 |
-
|
| 604 |
-
logging.info(f"Sync process complete. Messages: Posts: [{posts_sync_message.strip()}], Mentions: [{mentions_sync_message.strip()}], Follower Stats: [{follower_stats_sync_message.strip()}]")
|
| 605 |
-
final_message = f"<p style='color:green; text-align:center;'>β
Sync Attempted. {posts_sync_message} {mentions_sync_message} {follower_stats_sync_message}</p>"
|
| 606 |
-
return final_message, token_state
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
def display_main_dashboard(token_state):
|
| 610 |
-
"""Generates HTML for the main dashboard display using data from token_state."""
|
| 611 |
-
if not token_state or not token_state.get("token"):
|
| 612 |
-
logging.warning("Dashboard display: Access denied. No token available.")
|
| 613 |
-
return "β Access denied. No token available for dashboard."
|
| 614 |
-
|
| 615 |
-
html_parts = ["<div style='padding:10px;'><h3>Dashboard Overview</h3>"]
|
| 616 |
-
|
| 617 |
-
# Display Recent Posts
|
| 618 |
-
posts_df = token_state.get("bubble_posts_df", pd.DataFrame())
|
| 619 |
-
html_parts.append(f"<h4>Recent Posts ({len(posts_df)} in Bubble):</h4>")
|
| 620 |
-
if not posts_df.empty:
|
| 621 |
-
# Define columns to show, ensuring they exist in the DataFrame
|
| 622 |
-
cols_to_show_posts = [col for col in [BUBBLE_POST_DATE_COLUMN_NAME, 'text', 'sentiment', 'summary_text', 'li_eb_label'] if col in posts_df.columns]
|
| 623 |
-
if not cols_to_show_posts:
|
| 624 |
-
html_parts.append("<p>No relevant post columns found to display.</p>")
|
| 625 |
-
else:
|
| 626 |
-
display_df_posts = posts_df.copy()
|
| 627 |
-
if BUBBLE_POST_DATE_COLUMN_NAME in display_df_posts.columns:
|
| 628 |
-
try:
|
| 629 |
-
# Format date and sort
|
| 630 |
-
display_df_posts[BUBBLE_POST_DATE_COLUMN_NAME] = pd.to_datetime(display_df_posts[BUBBLE_POST_DATE_COLUMN_NAME], errors='coerce').dt.strftime('%Y-%m-%d %H:%M')
|
| 631 |
-
display_df_posts = display_df_posts.sort_values(by=BUBBLE_POST_DATE_COLUMN_NAME, ascending=False)
|
| 632 |
-
except Exception as e:
|
| 633 |
-
logging.error(f"Error formatting post dates for display: {e}")
|
| 634 |
-
html_parts.append("<p>Error formatting post dates.</p>")
|
| 635 |
-
# Use escape=False if 'text' or 'summary_text' can contain HTML; otherwise, True is safer.
|
| 636 |
-
# Assuming 'text' might have HTML from LinkedIn, using escape=False. Be cautious with this.
|
| 637 |
-
html_parts.append(display_df_posts[cols_to_show_posts].head().to_html(escape=False, index=False, classes="table table-striped table-sm"))
|
| 638 |
-
else:
|
| 639 |
-
html_parts.append("<p>No posts loaded from Bubble.</p>")
|
| 640 |
-
html_parts.append("<hr/>")
|
| 641 |
-
|
| 642 |
-
# Display Recent Mentions
|
| 643 |
-
mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame())
|
| 644 |
-
html_parts.append(f"<h4>Recent Mentions ({len(mentions_df)} in Bubble):</h4>")
|
| 645 |
-
if not mentions_df.empty:
|
| 646 |
-
cols_to_show_mentions = [col for col in [BUBBLE_MENTIONS_DATE_COLUMN_NAME, "mention_text", "sentiment_label"] if col in mentions_df.columns]
|
| 647 |
-
if not cols_to_show_mentions:
|
| 648 |
-
html_parts.append("<p>No relevant mention columns found to display.</p>")
|
| 649 |
-
else:
|
| 650 |
-
display_df_mentions = mentions_df.copy()
|
| 651 |
-
if BUBBLE_MENTIONS_DATE_COLUMN_NAME in display_df_mentions.columns:
|
| 652 |
-
try:
|
| 653 |
-
display_df_mentions[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(display_df_mentions[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce').dt.strftime('%Y-%m-%d %H:%M')
|
| 654 |
-
display_df_mentions = display_df_mentions.sort_values(by=BUBBLE_MENTIONS_DATE_COLUMN_NAME, ascending=False)
|
| 655 |
-
except Exception as e:
|
| 656 |
-
logging.error(f"Error formatting mention dates for display: {e}")
|
| 657 |
-
html_parts.append("<p>Error formatting mention dates.</p>")
|
| 658 |
-
# Assuming "mention_text" can have HTML.
|
| 659 |
-
html_parts.append(display_df_mentions[cols_to_show_mentions].head().to_html(escape=False, index=False, classes="table table-striped table-sm"))
|
| 660 |
-
else:
|
| 661 |
-
html_parts.append("<p>No mentions loaded from Bubble.</p>")
|
| 662 |
-
html_parts.append("<hr/>")
|
| 663 |
-
|
| 664 |
-
# Display Follower Statistics Summary
|
| 665 |
-
follower_stats_df = token_state.get("bubble_follower_stats_df", pd.DataFrame())
|
| 666 |
-
html_parts.append(f"<h4>Follower Statistics ({len(follower_stats_df)} entries in Bubble):</h4>")
|
| 667 |
-
if not follower_stats_df.empty:
|
| 668 |
-
# Latest Monthly Follower Gain
|
| 669 |
-
monthly_gains = follower_stats_df[follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].copy()
|
| 670 |
-
if not monthly_gains.empty and FOLLOWER_STATS_CATEGORY_COLUMN in monthly_gains.columns and \
|
| 671 |
-
FOLLOWER_STATS_ORGANIC_COLUMN in monthly_gains.columns and FOLLOWER_STATS_PAID_COLUMN in monthly_gains.columns:
|
| 672 |
-
try:
|
| 673 |
-
# FOLLOWER_STATS_CATEGORY_COLUMN for monthly gains is 'YYYY-MM-DD'
|
| 674 |
-
monthly_gains.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN] = pd.to_datetime(monthly_gains[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce').dt.strftime('%Y-%m-%d')
|
| 675 |
-
latest_gain = monthly_gains.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN, ascending=False).head(1)
|
| 676 |
-
if not latest_gain.empty:
|
| 677 |
-
html_parts.append("<h5>Latest Monthly Follower Gain:</h5>")
|
| 678 |
-
html_parts.append(latest_gain[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].to_html(escape=True, index=False, classes="table table-sm"))
|
| 679 |
-
else:
|
| 680 |
-
html_parts.append("<p>No valid monthly follower gain data to display after processing.</p>")
|
| 681 |
-
except Exception as e:
|
| 682 |
-
logging.error(f"Error formatting follower gain dates for display: {e}")
|
| 683 |
-
html_parts.append("<p>Error displaying monthly follower gain data.</p>")
|
| 684 |
-
else:
|
| 685 |
-
html_parts.append("<p>No monthly follower gain data or required columns are missing.</p>")
|
| 686 |
-
|
| 687 |
-
# Count of Demographic Entries
|
| 688 |
-
demographics_count = len(follower_stats_df[follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'])
|
| 689 |
-
html_parts.append(f"<p>Total demographic entries (seniority, industry, etc.): {demographics_count}</p>")
|
| 690 |
-
else:
|
| 691 |
-
html_parts.append("<p>No follower statistics loaded from Bubble.</p>")
|
| 692 |
-
|
| 693 |
-
html_parts.append("</div>")
|
| 694 |
-
return "".join(html_parts)
|
| 695 |
-
|
| 696 |
-
|
| 697 |
def guarded_fetch_analytics(token_state):
|
| 698 |
"""Guarded call to fetch_and_render_analytics, ensuring token and basic data structures."""
|
| 699 |
if not token_state or not token_state.get("token"):
|
| 700 |
logging.warning("Analytics fetch: Access denied. No token.")
|
| 701 |
# Ensure the number of returned Nones matches the expected number of outputs for the plots
|
| 702 |
-
return ("β Access denied. No token.", None, None, None, None, None, None, None)
|
| 703 |
-
|
| 704 |
# Ensure DataFrames are passed, even if empty, to avoid errors in the analytics function
|
| 705 |
posts_df_analytics = token_state.get("bubble_posts_df", pd.DataFrame())
|
| 706 |
mentions_df_analytics = token_state.get("bubble_mentions_df", pd.DataFrame())
|
| 707 |
follower_stats_df_analytics = token_state.get("bubble_follower_stats_df", pd.DataFrame())
|
| 708 |
|
| 709 |
logging.info("Calling fetch_and_render_analytics with current token_state data.")
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
|
|
|
| 717 |
)
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
"""Generates HTML and a plot for the Mentions tab."""
|
| 722 |
-
logging.info("Updating Mentions Tab display.")
|
| 723 |
-
if not token_state or not token_state.get("token"):
|
| 724 |
-
logging.warning("Mentions tab: Access denied. No token.")
|
| 725 |
-
return ("β Access denied. No token available for mentions.", None)
|
| 726 |
-
|
| 727 |
-
mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame())
|
| 728 |
-
if mentions_df.empty:
|
| 729 |
-
logging.info("Mentions tab: No mentions data in Bubble.")
|
| 730 |
-
return ("<p style='text-align:center;'>No mentions data in Bubble. Try syncing.</p>", None)
|
| 731 |
-
|
| 732 |
-
html_parts = ["<h3 style='text-align:center;'>Recent Mentions</h3>"]
|
| 733 |
-
# Define columns to display, ensuring they exist
|
| 734 |
-
display_columns = [col for col in [BUBBLE_MENTIONS_DATE_COLUMN_NAME, "mention_text", "sentiment_label", BUBBLE_MENTIONS_ID_COLUMN_NAME] if col in mentions_df.columns]
|
| 735 |
-
|
| 736 |
-
mentions_df_display = mentions_df.copy()
|
| 737 |
-
if BUBBLE_MENTIONS_DATE_COLUMN_NAME in mentions_df_display.columns:
|
| 738 |
-
try:
|
| 739 |
-
mentions_df_display[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(mentions_df_display[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce').dt.strftime('%Y-%m-%d %H:%M')
|
| 740 |
-
mentions_df_display = mentions_df_display.sort_values(by=BUBBLE_MENTIONS_DATE_COLUMN_NAME, ascending=False)
|
| 741 |
-
except Exception as e:
|
| 742 |
-
logging.error(f"Error formatting mention dates for tab display: {e}")
|
| 743 |
-
html_parts.append("<p>Error formatting mention dates.</p>")
|
| 744 |
-
|
| 745 |
-
if not display_columns or mentions_df_display[display_columns].empty: # Check if display_df is empty after potential sort/filter
|
| 746 |
-
html_parts.append("<p>Required columns for mentions display are missing or no data after processing.</p>")
|
| 747 |
-
else:
|
| 748 |
-
# Assuming "mention_text" might contain HTML.
|
| 749 |
-
html_parts.append(mentions_df_display[display_columns].head(20).to_html(escape=False, index=False, classes="table table-sm"))
|
| 750 |
-
|
| 751 |
-
mentions_html_output = "\n".join(html_parts)
|
| 752 |
-
fig = None # Initialize fig to None
|
| 753 |
-
if not mentions_df.empty and "sentiment_label" in mentions_df.columns:
|
| 754 |
-
try:
|
| 755 |
-
import matplotlib.pyplot as plt
|
| 756 |
-
plt.switch_backend('Agg') # Essential for Gradio
|
| 757 |
-
fig_plot, ax = plt.subplots(figsize=(6,4)) # Create figure and axes
|
| 758 |
-
sentiment_counts = mentions_df["sentiment_label"].value_counts()
|
| 759 |
-
sentiment_counts.plot(kind='bar', ax=ax, color=['#4CAF50', '#FFC107', '#F44336', '#9E9E9E', '#2196F3']) # Example colors
|
| 760 |
-
ax.set_title("Mention Sentiment Distribution")
|
| 761 |
-
ax.set_ylabel("Count")
|
| 762 |
-
plt.xticks(rotation=45, ha='right')
|
| 763 |
-
plt.tight_layout() # Adjust layout to prevent labels from overlapping
|
| 764 |
-
fig = fig_plot # Assign the figure to fig
|
| 765 |
-
logging.info("Mentions tab: Sentiment distribution plot generated.")
|
| 766 |
-
except Exception as e:
|
| 767 |
-
logging.error(f"Error generating mentions plot: {e}", exc_info=True)
|
| 768 |
-
fig = None # Ensure fig is None on error
|
| 769 |
-
else:
|
| 770 |
-
logging.info("Mentions tab: Not enough data or 'sentiment_label' column missing for plot.")
|
| 771 |
-
|
| 772 |
-
return mentions_html_output, fig
|
| 773 |
-
|
| 774 |
-
def run_follower_stats_tab_display(token_state):
|
| 775 |
-
"""Generates HTML and plots for the Follower Stats tab."""
|
| 776 |
-
logging.info("Updating Follower Stats Tab display.")
|
| 777 |
-
if not token_state or not token_state.get("token"):
|
| 778 |
-
logging.warning("Follower stats tab: Access denied. No token.")
|
| 779 |
-
return ("β Access denied. No token available for follower stats.", None, None, None)
|
| 780 |
-
|
| 781 |
-
follower_stats_df_orig = token_state.get("bubble_follower_stats_df", pd.DataFrame())
|
| 782 |
-
if follower_stats_df_orig.empty:
|
| 783 |
-
logging.info("Follower stats tab: No follower stats data in Bubble.")
|
| 784 |
-
return ("<p style='text-align:center;'>No follower stats data in Bubble. Try syncing.</p>", None, None, None)
|
| 785 |
-
|
| 786 |
-
follower_stats_df = follower_stats_df_orig.copy() # Work with a copy
|
| 787 |
-
html_parts = ["<div style='padding:10px;'><h3 style='text-align:center;'>Follower Statistics Overview</h3>"]
|
| 788 |
-
|
| 789 |
-
plot_monthly_gains = None
|
| 790 |
-
plot_seniority_dist = None
|
| 791 |
-
plot_industry_dist = None # Initialize for industry plot
|
| 792 |
-
|
| 793 |
-
# --- Monthly Gains Table & Plot ---
|
| 794 |
-
# Filter for monthly gains and ensure necessary columns exist
|
| 795 |
-
monthly_gains_df = follower_stats_df[
|
| 796 |
-
(follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly') &
|
| 797 |
-
(follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & # Date column
|
| 798 |
-
(follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna()) &
|
| 799 |
-
(follower_stats_df[FOLLOWER_STATS_PAID_COLUMN].notna())
|
| 800 |
-
].copy()
|
| 801 |
-
|
| 802 |
-
if not monthly_gains_df.empty:
|
| 803 |
-
try:
|
| 804 |
-
# FOLLOWER_STATS_CATEGORY_COLUMN for monthly gains is 'YYYY-MM-DD'
|
| 805 |
-
# For table display, sort descending by original date string
|
| 806 |
-
monthly_gains_df.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN_DT] = pd.to_datetime(monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce')
|
| 807 |
-
monthly_gains_df_sorted_table = monthly_gains_df.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN_DT, ascending=False)
|
| 808 |
-
|
| 809 |
-
html_parts.append("<h4>Monthly Follower Gains (Last 13 Months):</h4>")
|
| 810 |
-
# Format date for display in table
|
| 811 |
-
table_display_df = monthly_gains_df_sorted_table.copy()
|
| 812 |
-
table_display_df[FOLLOWER_STATS_CATEGORY_COLUMN] = table_display_df[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime('%Y-%m')
|
| 813 |
-
|
| 814 |
-
html_parts.append(table_display_df[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(13).to_html(escape=True, index=False, classes="table table-sm"))
|
| 815 |
-
|
| 816 |
-
# For plotting, sort ascending by datetime object for correct time series
|
| 817 |
-
monthly_gains_df_sorted_plot = monthly_gains_df.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN_DT, ascending=True)
|
| 818 |
-
# Use the formatted YYYY-MM string for x-axis ticks on the plot
|
| 819 |
-
plot_dates = monthly_gains_df_sorted_plot[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime('%Y-%m').unique()
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
import matplotlib.pyplot as plt
|
| 823 |
-
plt.switch_backend('Agg')
|
| 824 |
-
fig_gains, ax_gains = plt.subplots(figsize=(10,5)) # Wider plot
|
| 825 |
-
ax_gains.plot(plot_dates, monthly_gains_df_sorted_plot.groupby(monthly_gains_df_sorted_plot[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime('%Y-%m'))[FOLLOWER_STATS_ORGANIC_COLUMN].sum(), marker='o', linestyle='-', label='Organic Gain')
|
| 826 |
-
ax_gains.plot(plot_dates, monthly_gains_df_sorted_plot.groupby(monthly_gains_df_sorted_plot[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime('%Y-%m'))[FOLLOWER_STATS_PAID_COLUMN].sum(), marker='x', linestyle='--', label='Paid Gain')
|
| 827 |
-
ax_gains.set_title("Monthly Follower Gains Over Time")
|
| 828 |
-
ax_gains.set_ylabel("Follower Count")
|
| 829 |
-
ax_gains.set_xlabel("Month (YYYY-MM)")
|
| 830 |
-
plt.xticks(rotation=45, ha='right')
|
| 831 |
-
ax_gains.legend()
|
| 832 |
-
plt.grid(True, linestyle='--', alpha=0.7)
|
| 833 |
-
plt.tight_layout()
|
| 834 |
-
plot_monthly_gains = fig_gains
|
| 835 |
-
logging.info("Follower stats tab: Monthly gains plot generated.")
|
| 836 |
-
except Exception as e:
|
| 837 |
-
logging.error(f"Error processing or plotting monthly gains: {e}", exc_info=True)
|
| 838 |
-
html_parts.append("<p>Error displaying monthly follower gain data.</p>")
|
| 839 |
-
else:
|
| 840 |
-
html_parts.append("<p>No monthly follower gain data available or required columns missing.</p>")
|
| 841 |
-
html_parts.append("<hr/>")
|
| 842 |
-
|
| 843 |
-
# --- Seniority Table & Plot ---
|
| 844 |
-
seniority_df = follower_stats_df[
|
| 845 |
-
(follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_seniority') &
|
| 846 |
-
(follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & # Seniority name
|
| 847 |
-
(follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna())
|
| 848 |
-
].copy()
|
| 849 |
-
if not seniority_df.empty:
|
| 850 |
-
try:
|
| 851 |
-
seniority_df_sorted = seniority_df.sort_values(by=FOLLOWER_STATS_ORGANIC_COLUMN, ascending=False)
|
| 852 |
-
html_parts.append("<h4>Followers by Seniority (Top 10 Organic):</h4>")
|
| 853 |
-
html_parts.append(seniority_df_sorted[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(10).to_html(escape=True, index=False, classes="table table-sm"))
|
| 854 |
-
|
| 855 |
-
import matplotlib.pyplot as plt
|
| 856 |
-
plt.switch_backend('Agg')
|
| 857 |
-
fig_seniority, ax_seniority = plt.subplots(figsize=(8,5)) # Adjusted size
|
| 858 |
-
top_n_seniority = seniority_df_sorted.nlargest(10, FOLLOWER_STATS_ORGANIC_COLUMN)
|
| 859 |
-
ax_seniority.bar(top_n_seniority[FOLLOWER_STATS_CATEGORY_COLUMN], top_n_seniority[FOLLOWER_STATS_ORGANIC_COLUMN], color='skyblue')
|
| 860 |
-
ax_seniority.set_title("Follower Distribution by Seniority (Top 10 Organic)")
|
| 861 |
-
ax_seniority.set_ylabel("Organic Follower Count")
|
| 862 |
-
plt.xticks(rotation=45, ha='right')
|
| 863 |
-
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 864 |
-
plt.tight_layout()
|
| 865 |
-
plot_seniority_dist = fig_seniority
|
| 866 |
-
logging.info("Follower stats tab: Seniority distribution plot generated.")
|
| 867 |
-
except Exception as e:
|
| 868 |
-
logging.error(f"Error processing or plotting seniority data: {e}", exc_info=True)
|
| 869 |
-
html_parts.append("<p>Error displaying follower seniority data.</p>")
|
| 870 |
-
else:
|
| 871 |
-
html_parts.append("<p>No follower seniority data available or required columns missing.</p>")
|
| 872 |
-
html_parts.append("<hr/>")
|
| 873 |
-
|
| 874 |
-
# --- Industry Table & Plot ---
|
| 875 |
-
industry_df = follower_stats_df[
|
| 876 |
-
(follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_industry') &
|
| 877 |
-
(follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & # Industry name
|
| 878 |
-
(follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna())
|
| 879 |
-
].copy()
|
| 880 |
-
if not industry_df.empty:
|
| 881 |
-
try:
|
| 882 |
-
industry_df_sorted = industry_df.sort_values(by=FOLLOWER_STATS_ORGANIC_COLUMN, ascending=False)
|
| 883 |
-
html_parts.append("<h4>Followers by Industry (Top 10 Organic):</h4>")
|
| 884 |
-
html_parts.append(industry_df_sorted[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(10).to_html(escape=True, index=False, classes="table table-sm"))
|
| 885 |
-
|
| 886 |
-
import matplotlib.pyplot as plt
|
| 887 |
-
plt.switch_backend('Agg')
|
| 888 |
-
fig_industry, ax_industry = plt.subplots(figsize=(8,5))
|
| 889 |
-
top_n_industry = industry_df_sorted.nlargest(10, FOLLOWER_STATS_ORGANIC_COLUMN)
|
| 890 |
-
ax_industry.bar(top_n_industry[FOLLOWER_STATS_CATEGORY_COLUMN], top_n_industry[FOLLOWER_STATS_ORGANIC_COLUMN], color='lightcoral')
|
| 891 |
-
ax_industry.set_title("Follower Distribution by Industry (Top 10 Organic)")
|
| 892 |
-
ax_industry.set_ylabel("Organic Follower Count")
|
| 893 |
-
plt.xticks(rotation=45, ha='right')
|
| 894 |
-
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 895 |
-
plt.tight_layout()
|
| 896 |
-
plot_industry_dist = fig_industry
|
| 897 |
-
logging.info("Follower stats tab: Industry distribution plot generated.")
|
| 898 |
-
except Exception as e:
|
| 899 |
-
logging.error(f"Error processing or plotting industry data: {e}", exc_info=True)
|
| 900 |
-
html_parts.append("<p>Error displaying follower industry data.</p>")
|
| 901 |
-
else:
|
| 902 |
-
html_parts.append("<p>No follower industry data available or required columns missing.</p>")
|
| 903 |
-
|
| 904 |
-
html_parts.append("</div>")
|
| 905 |
-
follower_html_output = "\n".join(html_parts)
|
| 906 |
-
return follower_html_output, plot_monthly_gains, plot_seniority_dist, plot_industry_dist
|
| 907 |
|
| 908 |
|
| 909 |
# --- Gradio UI Blocks ---
|
|
@@ -913,80 +61,78 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
|
|
| 913 |
# Central state for holding token, client_id, org_urn, and fetched dataframes
|
| 914 |
token_state = gr.State(value={
|
| 915 |
"token": None, "client_id": None, "org_urn": None,
|
| 916 |
-
"bubble_posts_df": pd.DataFrame(), "fetch_count_for_api": 0,
|
| 917 |
"bubble_mentions_df": pd.DataFrame(),
|
| 918 |
-
"bubble_follower_stats_df": pd.DataFrame(),
|
| 919 |
-
"url_user_token_temp_storage": None
|
| 920 |
})
|
| 921 |
|
| 922 |
gr.Markdown("# π LinkedIn Organization Dashboard")
|
| 923 |
# Hidden textboxes to capture URL parameters
|
| 924 |
-
url_user_token_display = gr.Textbox(label="User Token (from URL - Hidden)", interactive=False, visible=False)
|
| 925 |
-
status_box = gr.Textbox(label="Overall LinkedIn Token Status", interactive=False, value="Initializing...")
|
| 926 |
org_urn_display = gr.Textbox(label="Organization URN (from URL - Hidden)", interactive=False, visible=False)
|
| 927 |
|
| 928 |
# Load URL parameters when the Gradio app loads
|
| 929 |
-
# This will populate url_user_token_display and org_urn_display
|
| 930 |
app.load(fn=get_url_user_token, inputs=None, outputs=[url_user_token_display, org_urn_display], api_name="get_url_params", show_progress=False)
|
| 931 |
-
|
| 932 |
-
# This function will run after URL params are loaded and org_urn_display changes
|
| 933 |
def initial_load_sequence(url_token, org_urn_val, current_state):
|
| 934 |
-
logging.info(f"Initial load sequence triggered
|
| 935 |
# Process token, fetch Bubble data, determine sync needs
|
| 936 |
status_msg, new_state, btn_update = process_and_store_bubble_token(url_token, org_urn_val, current_state)
|
| 937 |
# Display initial dashboard content based on (potentially empty) Bubble data
|
| 938 |
-
dashboard_content = display_main_dashboard(new_state)
|
| 939 |
return status_msg, new_state, btn_update, dashboard_content
|
| 940 |
|
| 941 |
with gr.Tabs():
|
| 942 |
with gr.TabItem("1οΈβ£ Dashboard & Sync"):
|
| 943 |
gr.Markdown("System checks for existing data from Bubble. The 'Sync' button activates if new data needs to be fetched from LinkedIn based on the last sync times and data availability.")
|
| 944 |
-
sync_data_btn = gr.Button("π Sync LinkedIn Data", variant="primary", visible=False, interactive=False)
|
| 945 |
-
sync_status_html_output = gr.HTML("<p style='text-align:center;'>Sync status will appear here.</p>")
|
| 946 |
-
dashboard_display_html = gr.HTML("<p style='text-align:center;'>Dashboard loading...</p>")
|
| 947 |
|
| 948 |
# Chain of events for initial load:
|
| 949 |
-
# 1. app.load gets URL params.
|
| 950 |
-
# 2. org_urn_display.change triggers initial_load_sequence.
|
| 951 |
-
# This populates token_state, updates sync button, and loads initial dashboard.
|
| 952 |
org_urn_display.change(
|
| 953 |
fn=initial_load_sequence,
|
| 954 |
inputs=[url_user_token_display, org_urn_display, token_state],
|
| 955 |
outputs=[status_box, token_state, sync_data_btn, dashboard_display_html],
|
| 956 |
show_progress="full"
|
| 957 |
)
|
| 958 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 959 |
# When Sync button is clicked:
|
| 960 |
-
# 1. sync_all_linkedin_data: Fetches from LinkedIn, uploads to Bubble, updates token_state DFs.
|
| 961 |
-
# 2. process_and_store_bubble_token: Re-evaluates sync needs (button should now say "Up-to-date").
|
| 962 |
-
# 3. display_main_dashboard: Refreshes dashboard with newly synced data.
|
| 963 |
sync_data_btn.click(
|
| 964 |
-
fn=
|
| 965 |
-
inputs=[token_state],
|
| 966 |
outputs=[sync_status_html_output, token_state], # token_state is updated here
|
| 967 |
show_progress="full"
|
| 968 |
-
).then(
|
| 969 |
fn=process_and_store_bubble_token, # Re-check sync status and update button
|
| 970 |
inputs=[url_user_token_display, org_urn_display, token_state], # Pass current token_state
|
| 971 |
outputs=[status_box, token_state, sync_data_btn], # token_state updated again
|
| 972 |
-
show_progress=False
|
| 973 |
).then(
|
| 974 |
fn=display_main_dashboard, # Refresh dashboard display
|
| 975 |
inputs=[token_state],
|
| 976 |
outputs=[dashboard_display_html],
|
| 977 |
show_progress=False
|
| 978 |
)
|
| 979 |
-
|
| 980 |
with gr.TabItem("2οΈβ£ Analytics"):
|
| 981 |
fetch_analytics_btn = gr.Button("π Fetch/Refresh Full Analytics", variant="primary")
|
| 982 |
-
|
| 983 |
-
follower_count_md = gr.Markdown("Analytics data will load here...")
|
| 984 |
with gr.Row(): follower_plot, growth_plot = gr.Plot(label="Follower Demographics"), gr.Plot(label="Follower Growth")
|
| 985 |
with gr.Row(): eng_rate_plot = gr.Plot(label="Engagement Rate")
|
| 986 |
with gr.Row(): interaction_plot = gr.Plot(label="Post Interactions")
|
| 987 |
-
with gr.Row(): eb_plot = gr.Plot(label="Engagement Benchmark")
|
| 988 |
with gr.Row(): mentions_vol_plot, mentions_sentiment_plot = gr.Plot(label="Mentions Volume"), gr.Plot(label="Mentions Sentiment")
|
| 989 |
-
|
| 990 |
fetch_analytics_btn.click(
|
| 991 |
fn=guarded_fetch_analytics, inputs=[token_state],
|
| 992 |
outputs=[follower_count_md, follower_plot, growth_plot, eng_rate_plot,
|
|
@@ -997,43 +143,43 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
|
|
| 997 |
with gr.TabItem("3οΈβ£ Mentions"):
|
| 998 |
refresh_mentions_display_btn = gr.Button("π Refresh Mentions Display (from local data)", variant="secondary")
|
| 999 |
mentions_html = gr.HTML("Mentions data loads from Bubble after sync. Click refresh to view current local data.")
|
| 1000 |
-
mentions_sentiment_dist_plot = gr.Plot(label="Mention Sentiment Distribution")
|
| 1001 |
refresh_mentions_display_btn.click(
|
| 1002 |
fn=run_mentions_tab_display, inputs=[token_state],
|
| 1003 |
outputs=[mentions_html, mentions_sentiment_dist_plot],
|
| 1004 |
show_progress="full"
|
| 1005 |
)
|
| 1006 |
-
|
| 1007 |
-
with gr.TabItem("4οΈβ£ Follower Stats"):
|
| 1008 |
refresh_follower_stats_btn = gr.Button("π Refresh Follower Stats Display (from local data)", variant="secondary")
|
| 1009 |
follower_stats_html = gr.HTML("Follower statistics load from Bubble after sync. Click refresh to view current local data.")
|
| 1010 |
with gr.Row():
|
| 1011 |
fs_plot_monthly_gains = gr.Plot(label="Monthly Follower Gains")
|
| 1012 |
with gr.Row():
|
| 1013 |
fs_plot_seniority = gr.Plot(label="Followers by Seniority (Top 10 Organic)")
|
| 1014 |
-
fs_plot_industry = gr.Plot(label="Followers by Industry (Top 10 Organic)")
|
| 1015 |
|
| 1016 |
refresh_follower_stats_btn.click(
|
| 1017 |
fn=run_follower_stats_tab_display, inputs=[token_state],
|
| 1018 |
outputs=[follower_stats_html, fs_plot_monthly_gains, fs_plot_seniority, fs_plot_industry],
|
| 1019 |
show_progress="full"
|
| 1020 |
)
|
| 1021 |
-
|
| 1022 |
if __name__ == "__main__":
|
| 1023 |
# Check for essential environment variables
|
| 1024 |
-
if not os.environ.get(
|
| 1025 |
-
logging.warning("WARNING: '
|
| 1026 |
-
if not os.environ.get(
|
| 1027 |
-
not os.environ.get(
|
| 1028 |
-
not os.environ.get(
|
| 1029 |
logging.warning("WARNING: One or more Bubble environment variables (BUBBLE_APP_NAME, BUBBLE_API_KEY_PRIVATE, BUBBLE_API_ENDPOINT) are not set. Bubble integration will fail.")
|
| 1030 |
|
| 1031 |
try:
|
| 1032 |
import matplotlib
|
| 1033 |
-
logging.info(f"Matplotlib version: {matplotlib.__version__} found.")
|
|
|
|
| 1034 |
except ImportError:
|
| 1035 |
logging.error("Matplotlib is not installed. Plots will not be generated. Please install it: pip install matplotlib")
|
| 1036 |
-
|
| 1037 |
-
# Launch the Gradio app
|
| 1038 |
-
app.launch(server_name="0.0.0.0", server_port=7860, debug=True) # Added debug=True for more verbose logging from Gradio
|
| 1039 |
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
# -- coding: utf-8 --
|
| 3 |
import gradio as gr
|
| 4 |
+
import pandas as pd
|
| 5 |
import os
|
| 6 |
import logging
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
# --- Module Imports ---
|
| 9 |
+
# Functions from your existing/provided custom modules
|
| 10 |
+
from analytics_fetch_and_rendering import fetch_and_render_analytics # Assuming this exists
|
| 11 |
+
from gradio_utils import get_url_user_token # For fetching URL parameters
|
| 12 |
|
| 13 |
+
# Functions from newly created/refactored modules
|
| 14 |
+
from config import (
|
| 15 |
+
LINKEDIN_CLIENT_ID_ENV_VAR, BUBBLE_APP_NAME_ENV_VAR,
|
| 16 |
+
BUBBLE_API_KEY_PRIVATE_ENV_VAR, BUBBLE_API_ENDPOINT_ENV_VAR
|
| 17 |
)
|
| 18 |
+
from state_manager import process_and_store_bubble_token
|
| 19 |
+
from sync_logic import sync_all_linkedin_data_orchestrator
|
| 20 |
+
from ui_generators import (
|
| 21 |
+
display_main_dashboard,
|
| 22 |
+
run_mentions_tab_display,
|
| 23 |
+
run_follower_stats_tab_display
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
)
|
| 25 |
|
|
|
|
|
|
|
|
|
|
| 26 |
# Configure logging
|
| 27 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 28 |
|
| 29 |
+
# --- Guarded Analytics Fetch ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
def guarded_fetch_analytics(token_state):
|
| 31 |
"""Guarded call to fetch_and_render_analytics, ensuring token and basic data structures."""
|
| 32 |
if not token_state or not token_state.get("token"):
|
| 33 |
logging.warning("Analytics fetch: Access denied. No token.")
|
| 34 |
# Ensure the number of returned Nones matches the expected number of outputs for the plots
|
| 35 |
+
return ("β Access denied. No token.", None, None, None, None, None, None, None)
|
| 36 |
+
|
| 37 |
# Ensure DataFrames are passed, even if empty, to avoid errors in the analytics function
|
| 38 |
posts_df_analytics = token_state.get("bubble_posts_df", pd.DataFrame())
|
| 39 |
mentions_df_analytics = token_state.get("bubble_mentions_df", pd.DataFrame())
|
| 40 |
follower_stats_df_analytics = token_state.get("bubble_follower_stats_df", pd.DataFrame())
|
| 41 |
|
| 42 |
logging.info("Calling fetch_and_render_analytics with current token_state data.")
|
| 43 |
+
try:
|
| 44 |
+
return fetch_and_render_analytics(
|
| 45 |
+
token_state.get("client_id"),
|
| 46 |
+
token_state.get("token"),
|
| 47 |
+
token_state.get("org_urn"),
|
| 48 |
+
posts_df_analytics,
|
| 49 |
+
mentions_df_analytics,
|
| 50 |
+
follower_stats_df_analytics
|
| 51 |
)
|
| 52 |
+
except Exception as e:
|
| 53 |
+
logging.error(f"Error in guarded_fetch_analytics calling fetch_and_render_analytics: {e}", exc_info=True)
|
| 54 |
+
return (f"β Error fetching analytics: {e}", None, None, None, None, None, None, None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
|
| 57 |
# --- Gradio UI Blocks ---
|
|
|
|
| 61 |
# Central state for holding token, client_id, org_urn, and fetched dataframes
|
| 62 |
token_state = gr.State(value={
|
| 63 |
"token": None, "client_id": None, "org_urn": None,
|
| 64 |
+
"bubble_posts_df": pd.DataFrame(), "fetch_count_for_api": 0,
|
| 65 |
"bubble_mentions_df": pd.DataFrame(),
|
| 66 |
+
"bubble_follower_stats_df": pd.DataFrame(),
|
| 67 |
+
"url_user_token_temp_storage": None
|
| 68 |
})
|
| 69 |
|
| 70 |
gr.Markdown("# π LinkedIn Organization Dashboard")
|
| 71 |
# Hidden textboxes to capture URL parameters
|
| 72 |
+
url_user_token_display = gr.Textbox(label="User Token (from URL - Hidden)", interactive=False, visible=False)
|
| 73 |
+
status_box = gr.Textbox(label="Overall LinkedIn Token Status", interactive=False, value="Initializing...")
|
| 74 |
org_urn_display = gr.Textbox(label="Organization URN (from URL - Hidden)", interactive=False, visible=False)
|
| 75 |
|
| 76 |
# Load URL parameters when the Gradio app loads
|
|
|
|
| 77 |
app.load(fn=get_url_user_token, inputs=None, outputs=[url_user_token_display, org_urn_display], api_name="get_url_params", show_progress=False)
|
| 78 |
+
|
| 79 |
+
# This function will run after URL params are loaded and org_urn_display changes
|
| 80 |
def initial_load_sequence(url_token, org_urn_val, current_state):
|
| 81 |
+
logging.info(f"Initial load sequence triggered. Org URN: {org_urn_val}, URL Token: {'Present' if url_token else 'Absent'}")
|
| 82 |
# Process token, fetch Bubble data, determine sync needs
|
| 83 |
status_msg, new_state, btn_update = process_and_store_bubble_token(url_token, org_urn_val, current_state)
|
| 84 |
# Display initial dashboard content based on (potentially empty) Bubble data
|
| 85 |
+
dashboard_content = display_main_dashboard(new_state)
|
| 86 |
return status_msg, new_state, btn_update, dashboard_content
|
| 87 |
|
| 88 |
with gr.Tabs():
|
| 89 |
with gr.TabItem("1οΈβ£ Dashboard & Sync"):
|
| 90 |
gr.Markdown("System checks for existing data from Bubble. The 'Sync' button activates if new data needs to be fetched from LinkedIn based on the last sync times and data availability.")
|
| 91 |
+
sync_data_btn = gr.Button("π Sync LinkedIn Data", variant="primary", visible=False, interactive=False)
|
| 92 |
+
sync_status_html_output = gr.HTML("<p style='text-align:center;'>Sync status will appear here.</p>")
|
| 93 |
+
dashboard_display_html = gr.HTML("<p style='text-align:center;'>Dashboard loading...</p>")
|
| 94 |
|
| 95 |
# Chain of events for initial load:
|
|
|
|
|
|
|
|
|
|
| 96 |
org_urn_display.change(
|
| 97 |
fn=initial_load_sequence,
|
| 98 |
inputs=[url_user_token_display, org_urn_display, token_state],
|
| 99 |
outputs=[status_box, token_state, sync_data_btn, dashboard_display_html],
|
| 100 |
show_progress="full"
|
| 101 |
)
|
| 102 |
+
# Also trigger initial_load_sequence if url_user_token_display changes (e.g. if it loads after org_urn)
|
| 103 |
+
# This helps ensure it runs once both are potentially available.
|
| 104 |
+
# Note: `org_urn_display.change` might be sufficient if `get_url_user_token` updates both nearly simultaneously.
|
| 105 |
+
# Adding this for robustness, but ensure it doesn't cause unwanted multiple runs if state isn't managed carefully.
|
| 106 |
+
# Consider using a flag in token_state if multiple triggers become an issue.
|
| 107 |
+
# For now, relying on org_urn_display.change as the primary trigger post-load.
|
| 108 |
+
|
| 109 |
# When Sync button is clicked:
|
|
|
|
|
|
|
|
|
|
| 110 |
sync_data_btn.click(
|
| 111 |
+
fn=sync_all_linkedin_data_orchestrator,
|
| 112 |
+
inputs=[token_state],
|
| 113 |
outputs=[sync_status_html_output, token_state], # token_state is updated here
|
| 114 |
show_progress="full"
|
| 115 |
+
).then(
|
| 116 |
fn=process_and_store_bubble_token, # Re-check sync status and update button
|
| 117 |
inputs=[url_user_token_display, org_urn_display, token_state], # Pass current token_state
|
| 118 |
outputs=[status_box, token_state, sync_data_btn], # token_state updated again
|
| 119 |
+
show_progress=False # Typically "full" for user-initiated actions, "minimal" or False for quick updates
|
| 120 |
).then(
|
| 121 |
fn=display_main_dashboard, # Refresh dashboard display
|
| 122 |
inputs=[token_state],
|
| 123 |
outputs=[dashboard_display_html],
|
| 124 |
show_progress=False
|
| 125 |
)
|
| 126 |
+
|
| 127 |
with gr.TabItem("2οΈβ£ Analytics"):
|
| 128 |
fetch_analytics_btn = gr.Button("π Fetch/Refresh Full Analytics", variant="primary")
|
| 129 |
+
follower_count_md = gr.Markdown("Analytics data will load here...")
|
|
|
|
| 130 |
with gr.Row(): follower_plot, growth_plot = gr.Plot(label="Follower Demographics"), gr.Plot(label="Follower Growth")
|
| 131 |
with gr.Row(): eng_rate_plot = gr.Plot(label="Engagement Rate")
|
| 132 |
with gr.Row(): interaction_plot = gr.Plot(label="Post Interactions")
|
| 133 |
+
with gr.Row(): eb_plot = gr.Plot(label="Engagement Benchmark")
|
| 134 |
with gr.Row(): mentions_vol_plot, mentions_sentiment_plot = gr.Plot(label="Mentions Volume"), gr.Plot(label="Mentions Sentiment")
|
| 135 |
+
|
| 136 |
fetch_analytics_btn.click(
|
| 137 |
fn=guarded_fetch_analytics, inputs=[token_state],
|
| 138 |
outputs=[follower_count_md, follower_plot, growth_plot, eng_rate_plot,
|
|
|
|
| 143 |
with gr.TabItem("3οΈβ£ Mentions"):
|
| 144 |
refresh_mentions_display_btn = gr.Button("π Refresh Mentions Display (from local data)", variant="secondary")
|
| 145 |
mentions_html = gr.HTML("Mentions data loads from Bubble after sync. Click refresh to view current local data.")
|
| 146 |
+
mentions_sentiment_dist_plot = gr.Plot(label="Mention Sentiment Distribution")
|
| 147 |
refresh_mentions_display_btn.click(
|
| 148 |
fn=run_mentions_tab_display, inputs=[token_state],
|
| 149 |
outputs=[mentions_html, mentions_sentiment_dist_plot],
|
| 150 |
show_progress="full"
|
| 151 |
)
|
| 152 |
+
|
| 153 |
+
with gr.TabItem("4οΈβ£ Follower Stats"):
|
| 154 |
refresh_follower_stats_btn = gr.Button("π Refresh Follower Stats Display (from local data)", variant="secondary")
|
| 155 |
follower_stats_html = gr.HTML("Follower statistics load from Bubble after sync. Click refresh to view current local data.")
|
| 156 |
with gr.Row():
|
| 157 |
fs_plot_monthly_gains = gr.Plot(label="Monthly Follower Gains")
|
| 158 |
with gr.Row():
|
| 159 |
fs_plot_seniority = gr.Plot(label="Followers by Seniority (Top 10 Organic)")
|
| 160 |
+
fs_plot_industry = gr.Plot(label="Followers by Industry (Top 10 Organic)")
|
| 161 |
|
| 162 |
refresh_follower_stats_btn.click(
|
| 163 |
fn=run_follower_stats_tab_display, inputs=[token_state],
|
| 164 |
outputs=[follower_stats_html, fs_plot_monthly_gains, fs_plot_seniority, fs_plot_industry],
|
| 165 |
show_progress="full"
|
| 166 |
)
|
| 167 |
+
|
| 168 |
if __name__ == "__main__":
|
| 169 |
# Check for essential environment variables
|
| 170 |
+
if not os.environ.get(LINKEDIN_CLIENT_ID_ENV_VAR):
|
| 171 |
+
logging.warning(f"WARNING: '{LINKEDIN_CLIENT_ID_ENV_VAR}' environment variable not set. The app may not function correctly for LinkedIn API calls.")
|
| 172 |
+
if not os.environ.get(BUBBLE_APP_NAME_ENV_VAR) or \
|
| 173 |
+
not os.environ.get(BUBBLE_API_KEY_PRIVATE_ENV_VAR) or \
|
| 174 |
+
not os.environ.get(BUBBLE_API_ENDPOINT_ENV_VAR):
|
| 175 |
logging.warning("WARNING: One or more Bubble environment variables (BUBBLE_APP_NAME, BUBBLE_API_KEY_PRIVATE, BUBBLE_API_ENDPOINT) are not set. Bubble integration will fail.")
|
| 176 |
|
| 177 |
try:
|
| 178 |
import matplotlib
|
| 179 |
+
logging.info(f"Matplotlib version: {matplotlib.__version__} found. Backend: {matplotlib.get_backend()}")
|
| 180 |
+
# The backend is now set in ui_generators.py, which is good practice.
|
| 181 |
except ImportError:
|
| 182 |
logging.error("Matplotlib is not installed. Plots will not be generated. Please install it: pip install matplotlib")
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
+
# Launch the Gradio app
|
| 185 |
+
app.launch(server_name="0.0.0.0", server_port=7860, debug=True)
|