Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import os
|
|
5 |
import logging
|
6 |
import html
|
7 |
import pandas as pd
|
8 |
-
from datetime import datetime, timedelta #
|
9 |
|
10 |
# Import functions from your custom modules
|
11 |
from analytics_fetch_and_rendering import fetch_and_render_analytics
|
@@ -14,7 +14,7 @@ from gradio_utils import get_url_user_token
|
|
14 |
from Bubble_API_Calls import (
|
15 |
fetch_linkedin_token_from_bubble,
|
16 |
bulk_upload_to_bubble,
|
17 |
-
fetch_linkedin_posts_data_from_bubble
|
18 |
)
|
19 |
|
20 |
from Linkedin_Data_API_Calls import (
|
@@ -29,22 +29,33 @@ from Linkedin_Data_API_Calls import (
|
|
29 |
prepare_mentions_for_bubble # Prepares user-specified format for Bubble
|
30 |
)
|
31 |
|
|
|
|
|
|
|
32 |
# Configure logging
|
33 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
34 |
|
35 |
# --- Global Constants ---
|
36 |
DEFAULT_INITIAL_FETCH_COUNT = 10
|
37 |
-
LINKEDIN_POST_URN_KEY = 'id'
|
38 |
-
BUBBLE_POST_URN_COLUMN_NAME = 'id'
|
39 |
-
BUBBLE_POST_DATE_COLUMN_NAME = 'published_at'
|
40 |
|
41 |
-
# Constants for Mentions
|
42 |
-
BUBBLE_MENTIONS_TABLE_NAME = "LI_mentions"
|
43 |
-
BUBBLE_MENTIONS_ID_COLUMN_NAME = "id" #
|
44 |
-
BUBBLE_MENTIONS_DATE_COLUMN_NAME = "date" #
|
45 |
|
46 |
DEFAULT_MENTIONS_INITIAL_FETCH_COUNT = 20
|
47 |
-
DEFAULT_MENTIONS_UPDATE_FETCH_COUNT = 10
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
|
50 |
def check_token_status(token_state):
|
@@ -53,33 +64,36 @@ def check_token_status(token_state):
|
|
53 |
|
54 |
def process_and_store_bubble_token(url_user_token, org_urn, token_state):
|
55 |
"""
|
56 |
-
Processes user token, fetches LinkedIn token, fetches existing Bubble posts
|
57 |
-
and determines if an initial fetch or update is needed for
|
58 |
Updates token state and UI for the sync button.
|
59 |
"""
|
60 |
logging.info(f"Processing token with URL user token: '{url_user_token}', Org URN: '{org_urn}'")
|
61 |
|
|
|
62 |
new_state = token_state.copy() if token_state else {
|
63 |
-
"token": None, "client_id": None, "org_urn": None,
|
64 |
"bubble_posts_df": pd.DataFrame(), "fetch_count_for_api": 0,
|
65 |
-
"bubble_mentions_df": pd.DataFrame(),
|
|
|
66 |
"url_user_token_temp_storage": None
|
67 |
}
|
68 |
new_state.update({
|
69 |
-
"org_urn": org_urn,
|
70 |
-
"bubble_posts_df": new_state.get("bubble_posts_df", pd.DataFrame()),
|
71 |
"fetch_count_for_api": new_state.get("fetch_count_for_api", 0),
|
72 |
-
"bubble_mentions_df": new_state.get("bubble_mentions_df", pd.DataFrame()),
|
73 |
-
"
|
74 |
-
"url_user_token_temp_storage": url_user_token
|
75 |
})
|
76 |
|
77 |
-
button_update = gr.update(visible=False, interactive=False, value="π Sync LinkedIn Data")
|
78 |
|
79 |
client_id = os.environ.get("Linkedin_client_id")
|
80 |
new_state["client_id"] = client_id if client_id else "ENV VAR MISSING"
|
81 |
if not client_id: logging.error("CRITICAL ERROR: 'Linkedin_client_id' environment variable not set.")
|
82 |
|
|
|
83 |
if url_user_token and "not found" not in url_user_token and "Could not access" not in url_user_token:
|
84 |
logging.info(f"Attempting to fetch LinkedIn token from Bubble with user token: {url_user_token}")
|
85 |
try:
|
@@ -92,21 +106,22 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
|
|
92 |
logging.warning(f"β Failed to fetch a valid LinkedIn token from Bubble. Response: {parsed_linkedin_token}")
|
93 |
except Exception as e:
|
94 |
new_state["token"] = None
|
95 |
-
logging.error(f"β Exception while fetching LinkedIn token from Bubble: {e}")
|
96 |
else:
|
97 |
new_state["token"] = None
|
98 |
logging.info("No valid URL user token provided for LinkedIn token fetch, or an error was indicated.")
|
99 |
|
|
|
100 |
current_org_urn = new_state.get("org_urn")
|
101 |
if current_org_urn:
|
102 |
# Fetch Posts from Bubble
|
103 |
logging.info(f"Attempting to fetch posts from Bubble for org_urn: {current_org_urn}")
|
104 |
try:
|
105 |
-
fetched_posts_df, error_message_posts = fetch_linkedin_posts_data_from_bubble(current_org_urn, "LI_posts")
|
106 |
new_state["bubble_posts_df"] = pd.DataFrame() if error_message_posts or fetched_posts_df is None else fetched_posts_df
|
107 |
-
if error_message_posts: logging.warning(f"Error from
|
108 |
except Exception as e:
|
109 |
-
logging.error(f"β Error fetching posts from Bubble: {e}.")
|
110 |
new_state["bubble_posts_df"] = pd.DataFrame()
|
111 |
|
112 |
# Fetch Mentions from Bubble
|
@@ -114,14 +129,26 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
|
|
114 |
try:
|
115 |
fetched_mentions_df, error_message_mentions = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_MENTIONS_TABLE_NAME)
|
116 |
new_state["bubble_mentions_df"] = pd.DataFrame() if error_message_mentions or fetched_mentions_df is None else fetched_mentions_df
|
117 |
-
if error_message_mentions: logging.warning(f"Error from
|
118 |
except Exception as e:
|
119 |
-
logging.error(f"β Error fetching mentions from Bubble: {e}.")
|
120 |
new_state["bubble_mentions_df"] = pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
else:
|
122 |
-
logging.warning("Org URN not available in state. Cannot fetch
|
123 |
new_state["bubble_posts_df"] = pd.DataFrame()
|
124 |
new_state["bubble_mentions_df"] = pd.DataFrame()
|
|
|
|
|
125 |
|
126 |
# Determine fetch count for Posts API
|
127 |
if new_state["bubble_posts_df"].empty:
|
@@ -129,58 +156,116 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
|
|
129 |
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
130 |
else:
|
131 |
try:
|
132 |
-
df_posts_check = new_state["bubble_posts_df"].copy()
|
133 |
if BUBBLE_POST_DATE_COLUMN_NAME not in df_posts_check.columns or df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME].isnull().all():
|
134 |
-
logging.warning(f"Date column '{BUBBLE_POST_DATE_COLUMN_NAME}' for posts missing
|
135 |
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
136 |
else:
|
137 |
df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME] = pd.to_datetime(df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME], errors='coerce', utc=True)
|
138 |
last_post_date_utc = df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME].dropna().max()
|
139 |
-
if pd.isna(last_post_date_utc):
|
|
|
140 |
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
141 |
else:
|
142 |
days_diff = (pd.Timestamp('now', tz='UTC').normalize() - last_post_date_utc.normalize()).days
|
143 |
-
if days_diff >= 7:
|
144 |
-
|
|
|
|
|
145 |
else:
|
146 |
-
new_state['fetch_count_for_api'] = 0
|
|
|
147 |
except Exception as e:
|
148 |
-
logging.error(f"Error processing post dates: {e}. Defaulting to initial fetch.")
|
149 |
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
150 |
|
151 |
-
# Determine if
|
152 |
mentions_need_sync = False
|
153 |
if new_state["bubble_mentions_df"].empty:
|
154 |
mentions_need_sync = True
|
|
|
155 |
else:
|
156 |
-
if
|
|
|
|
|
157 |
mentions_need_sync = True
|
|
|
158 |
else:
|
159 |
-
df_mentions_check = new_state["bubble_mentions_df"].copy()
|
160 |
df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce', utc=True)
|
161 |
last_mention_date_utc = df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME].dropna().max()
|
162 |
-
if
|
|
|
|
|
163 |
mentions_need_sync = True
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
else:
|
175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
token_status_message = check_token_status(new_state)
|
178 |
-
logging.info(f"Token processing complete. Status: {token_status_message}. Button: {button_update}.
|
179 |
return token_status_message, new_state, button_update
|
180 |
|
181 |
|
182 |
def sync_linkedin_mentions(token_state):
|
183 |
-
"""Fetches
|
184 |
logging.info("Starting LinkedIn mentions sync process.")
|
185 |
if not token_state or not token_state.get("token"):
|
186 |
logging.error("Mentions sync: Access denied. No LinkedIn token.")
|
@@ -189,293 +274,749 @@ def sync_linkedin_mentions(token_state):
|
|
189 |
client_id = token_state.get("client_id")
|
190 |
token_dict = token_state.get("token")
|
191 |
org_urn = token_state.get('org_urn')
|
192 |
-
bubble_mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame())
|
193 |
|
194 |
if not org_urn or not client_id or client_id == "ENV VAR MISSING":
|
195 |
logging.error("Mentions sync: Configuration error (Org URN or Client ID missing).")
|
196 |
return "Mentions: Config error. ", token_state
|
197 |
|
|
|
198 |
fetch_count_for_mentions_api = 0
|
|
|
199 |
if bubble_mentions_df.empty:
|
|
|
200 |
fetch_count_for_mentions_api = DEFAULT_MENTIONS_INITIAL_FETCH_COUNT
|
201 |
-
logging.info(
|
202 |
else:
|
203 |
-
if BUBBLE_MENTIONS_DATE_COLUMN_NAME not in bubble_mentions_df.columns or
|
204 |
-
|
|
|
205 |
fetch_count_for_mentions_api = DEFAULT_MENTIONS_INITIAL_FETCH_COUNT
|
|
|
206 |
else:
|
207 |
-
mentions_df_copy = bubble_mentions_df.copy()
|
208 |
mentions_df_copy[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(mentions_df_copy[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce', utc=True)
|
209 |
last_mention_date_utc = mentions_df_copy[BUBBLE_MENTIONS_DATE_COLUMN_NAME].dropna().max()
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
fetch_count_for_mentions_api =
|
214 |
-
|
215 |
-
days_since_last_mention = (pd.Timestamp('now', tz='UTC').normalize() - last_mention_date_utc.normalize()).days
|
216 |
-
logging.info(f"Days since last mention: {days_since_last_mention}")
|
217 |
-
if days_since_last_mention >= 7:
|
218 |
-
fetch_count_for_mentions_api = DEFAULT_MENTIONS_UPDATE_FETCH_COUNT
|
219 |
-
logging.info(f"Last mention older than 7 days. Fetching update of {fetch_count_for_mentions_api} mentions.")
|
220 |
-
else:
|
221 |
-
logging.info("Mentions data is fresh. No API fetch needed.")
|
222 |
|
223 |
-
|
224 |
-
|
225 |
-
if fetch_count_for_mentions_api == 0:
|
226 |
return "Mentions: Up-to-date. ", token_state
|
|
|
|
|
227 |
|
228 |
try:
|
229 |
-
logging.info(f"Fetching {fetch_count_for_mentions_api} core mentions from LinkedIn for org_urn: {org_urn}")
|
230 |
processed_raw_mentions = fetch_linkedin_mentions_core(client_id, token_dict, org_urn, count=fetch_count_for_mentions_api)
|
231 |
-
|
232 |
if not processed_raw_mentions:
|
233 |
-
logging.info("No mentions
|
234 |
return "Mentions: None found via API. ", token_state
|
235 |
|
236 |
existing_mention_ids = set()
|
237 |
if not bubble_mentions_df.empty and BUBBLE_MENTIONS_ID_COLUMN_NAME in bubble_mentions_df.columns:
|
|
|
238 |
existing_mention_ids = set(bubble_mentions_df[BUBBLE_MENTIONS_ID_COLUMN_NAME].dropna().astype(str))
|
239 |
|
240 |
-
sentiments_map = analyze_mentions_sentiment(processed_raw_mentions)
|
241 |
-
all_compiled_mentions = compile_detailed_mentions(processed_raw_mentions, sentiments_map)
|
242 |
|
|
|
243 |
new_compiled_mentions_to_upload = [
|
244 |
m for m in all_compiled_mentions if str(m.get("id")) not in existing_mention_ids
|
245 |
]
|
246 |
|
247 |
if not new_compiled_mentions_to_upload:
|
248 |
-
logging.info("All fetched
|
249 |
return "Mentions: All fetched already in Bubble. ", token_state
|
250 |
|
251 |
-
|
252 |
-
bubble_ready_mentions = prepare_mentions_for_bubble(new_compiled_mentions_to_upload)
|
253 |
-
|
254 |
if bubble_ready_mentions:
|
255 |
-
logging.info(f"Uploading {len(bubble_ready_mentions)} new mentions to Bubble table: {BUBBLE_MENTIONS_TABLE_NAME}.")
|
256 |
bulk_upload_to_bubble(bubble_ready_mentions, BUBBLE_MENTIONS_TABLE_NAME)
|
|
|
|
|
|
|
|
|
|
|
257 |
return f"Mentions: Synced {len(bubble_ready_mentions)} new. ", token_state
|
258 |
else:
|
259 |
-
logging.info("No new mentions
|
260 |
return "Mentions: No new ones to upload. ", token_state
|
261 |
-
|
262 |
-
|
263 |
-
logging.error(f"ValueError during mentions sync: {ve}")
|
264 |
return f"Mentions Error: {html.escape(str(ve))}. ", token_state
|
265 |
except Exception as e:
|
266 |
-
logging.exception("Unexpected error in sync_linkedin_mentions.")
|
267 |
-
return "Mentions: Unexpected error. ", token_state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
|
270 |
-
def
|
271 |
-
|
|
|
272 |
if not token_state or not token_state.get("token"):
|
273 |
-
logging.error("Access denied.
|
274 |
return "<p style='color:red; text-align:center;'>β Access denied. LinkedIn token not available.</p>", token_state
|
275 |
|
276 |
client_id = token_state.get("client_id")
|
277 |
token_dict = token_state.get("token")
|
278 |
org_urn = token_state.get('org_urn')
|
279 |
fetch_count_for_posts_api = token_state.get('fetch_count_for_api', 0)
|
280 |
-
|
|
|
|
|
281 |
posts_sync_message = ""
|
|
|
|
|
282 |
|
283 |
-
if not org_urn:
|
284 |
-
|
|
|
|
|
|
|
|
|
285 |
|
|
|
286 |
if fetch_count_for_posts_api == 0:
|
287 |
posts_sync_message = "Posts: Already up-to-date. "
|
|
|
288 |
else:
|
|
|
289 |
try:
|
290 |
-
|
291 |
processed_raw_posts, stats_map, _ = fetch_linkedin_posts_core(client_id, token_dict, org_urn, count=fetch_count_for_posts_api)
|
292 |
-
|
|
|
|
|
|
|
293 |
else:
|
294 |
existing_post_urns = set()
|
295 |
-
if not
|
296 |
-
existing_post_urns = set(
|
|
|
|
|
297 |
new_raw_posts = [p for p in processed_raw_posts if str(p.get(LINKEDIN_POST_URN_KEY)) not in existing_post_urns]
|
298 |
-
|
|
|
|
|
|
|
299 |
else:
|
|
|
300 |
post_urns_to_process = [p[LINKEDIN_POST_URN_KEY] for p in new_raw_posts if p.get(LINKEDIN_POST_URN_KEY)]
|
|
|
301 |
all_comments_data = fetch_comments(client_id, token_dict, post_urns_to_process, stats_map)
|
302 |
-
sentiments_per_post = analyze_sentiment(all_comments_data)
|
303 |
-
detailed_new_posts = compile_detailed_posts(new_raw_posts, stats_map, sentiments_per_post)
|
|
|
|
|
304 |
li_posts, li_post_stats, li_post_comments = prepare_data_for_bubble(detailed_new_posts, all_comments_data)
|
305 |
-
|
|
|
306 |
bulk_upload_to_bubble(li_posts, "LI_posts")
|
307 |
-
|
308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
posts_sync_message = f"Posts: Synced {len(li_posts)} new. "
|
310 |
-
else:
|
311 |
-
|
312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
|
314 |
-
|
315 |
-
|
|
|
316 |
|
317 |
-
#
|
318 |
-
|
319 |
-
|
320 |
-
fetched_posts_df, _ = fetch_linkedin_posts_data_from_bubble(org_urn, "LI_posts")
|
321 |
-
token_state["bubble_posts_df"] = pd.DataFrame() if fetched_posts_df is None else fetched_posts_df
|
322 |
-
fetched_mentions_df, _ = fetch_linkedin_posts_data_from_bubble(org_urn, BUBBLE_MENTIONS_TABLE_NAME)
|
323 |
-
token_state["bubble_mentions_df"] = pd.DataFrame() if fetched_mentions_df is None else fetched_mentions_df
|
324 |
-
logging.info("Refreshed posts and mentions DataFrames in state from Bubble after sync.")
|
325 |
-
except Exception as e:
|
326 |
-
logging.error(f"Error re-fetching data from Bubble post-sync: {e}")
|
327 |
|
328 |
-
|
|
|
329 |
return final_message, token_state
|
330 |
|
331 |
|
332 |
def display_main_dashboard(token_state):
|
|
|
333 |
if not token_state or not token_state.get("token"):
|
|
|
334 |
return "β Access denied. No token available for dashboard."
|
335 |
|
|
|
|
|
|
|
336 |
posts_df = token_state.get("bubble_posts_df", pd.DataFrame())
|
337 |
-
|
338 |
if not posts_df.empty:
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame())
|
344 |
-
|
345 |
if not mentions_df.empty:
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
|
353 |
|
354 |
def guarded_fetch_analytics(token_state):
|
|
|
355 |
if not token_state or not token_state.get("token"):
|
356 |
-
|
357 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
|
359 |
|
360 |
def run_mentions_tab_display(token_state):
|
|
|
361 |
logging.info("Updating Mentions Tab display.")
|
362 |
if not token_state or not token_state.get("token"):
|
|
|
363 |
return ("β Access denied. No token available for mentions.", None)
|
364 |
|
365 |
mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame())
|
366 |
if mentions_df.empty:
|
|
|
367 |
return ("<p style='text-align:center;'>No mentions data in Bubble. Try syncing.</p>", None)
|
368 |
|
369 |
html_parts = ["<h3 style='text-align:center;'>Recent Mentions</h3>"]
|
370 |
-
#
|
371 |
-
display_columns = [col for col in [
|
372 |
|
373 |
-
|
374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
else:
|
376 |
-
|
377 |
-
html_parts.append(
|
378 |
|
379 |
mentions_html_output = "\n".join(html_parts)
|
380 |
-
fig = None
|
381 |
if not mentions_df.empty and "sentiment_label" in mentions_df.columns:
|
382 |
try:
|
383 |
import matplotlib.pyplot as plt
|
384 |
-
|
385 |
-
plt.
|
386 |
-
fig_plot, ax = plt.subplots(figsize=(6,4))
|
387 |
sentiment_counts = mentions_df["sentiment_label"].value_counts()
|
388 |
-
sentiment_counts.plot(kind='bar', ax=ax)
|
389 |
ax.set_title("Mention Sentiment Distribution")
|
390 |
ax.set_ylabel("Count")
|
391 |
plt.xticks(rotation=45, ha='right')
|
392 |
-
plt.tight_layout()
|
393 |
-
fig = fig_plot #
|
|
|
394 |
except Exception as e:
|
395 |
-
logging.error(f"Error generating mentions plot: {e}"
|
|
|
|
|
|
|
|
|
396 |
return mentions_html_output, fig
|
397 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
|
399 |
# --- Gradio UI Blocks ---
|
400 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
|
401 |
-
title="LinkedIn Organization
|
402 |
|
|
|
403 |
token_state = gr.State(value={
|
404 |
-
"token": None, "client_id": None, "org_urn": None,
|
405 |
-
"bubble_posts_df": pd.DataFrame(), "fetch_count_for_api": 0,
|
406 |
-
"bubble_mentions_df": pd.DataFrame(),
|
407 |
-
"
|
|
|
408 |
})
|
409 |
|
410 |
-
gr.Markdown("# π LinkedIn Organization
|
411 |
-
|
412 |
-
|
|
|
413 |
org_urn_display = gr.Textbox(label="Organization URN (from URL - Hidden)", interactive=False, visible=False)
|
414 |
|
415 |
-
|
|
|
|
|
416 |
|
417 |
-
#
|
418 |
def initial_load_sequence(url_token, org_urn_val, current_state):
|
|
|
|
|
419 |
status_msg, new_state, btn_update = process_and_store_bubble_token(url_token, org_urn_val, current_state)
|
420 |
-
|
|
|
421 |
return status_msg, new_state, btn_update, dashboard_content
|
422 |
|
423 |
with gr.Tabs():
|
424 |
with gr.TabItem("1οΈβ£ Dashboard & Sync"):
|
425 |
-
gr.Markdown("System checks for existing data.
|
426 |
-
sync_data_btn = gr.Button("π Sync LinkedIn Data", variant="primary", visible=False, interactive=False)
|
427 |
-
|
428 |
-
|
429 |
-
|
|
|
|
|
|
|
|
|
430 |
org_urn_display.change(
|
431 |
fn=initial_load_sequence,
|
432 |
inputs=[url_user_token_display, org_urn_display, token_state],
|
433 |
-
outputs=[status_box, token_state, sync_data_btn,
|
|
|
434 |
)
|
435 |
-
|
436 |
-
#
|
437 |
-
|
|
|
|
|
438 |
sync_data_btn.click(
|
439 |
-
fn=
|
440 |
-
inputs=[token_state],
|
441 |
-
outputs=[
|
|
|
442 |
).then(
|
443 |
-
fn=process_and_store_bubble_token,
|
444 |
-
inputs=[url_user_token_display, org_urn_display, token_state],
|
445 |
-
outputs=[status_box, token_state, sync_data_btn]
|
|
|
446 |
).then(
|
447 |
-
fn=display_main_dashboard,
|
448 |
inputs=[token_state],
|
449 |
-
outputs=[
|
|
|
450 |
)
|
451 |
|
452 |
with gr.TabItem("2οΈβ£ Analytics"):
|
453 |
-
fetch_analytics_btn = gr.Button("π Fetch
|
454 |
-
|
455 |
-
|
456 |
-
with gr.Row():
|
457 |
-
with gr.Row():
|
458 |
-
with gr.Row():
|
459 |
-
with gr.Row():
|
|
|
|
|
460 |
fetch_analytics_btn.click(
|
461 |
fn=guarded_fetch_analytics, inputs=[token_state],
|
462 |
-
outputs=[
|
463 |
-
interaction_plot, eb_plot, mentions_vol_plot, mentions_sentiment_plot]
|
|
|
464 |
)
|
465 |
|
466 |
with gr.TabItem("3οΈβ£ Mentions"):
|
467 |
-
refresh_mentions_display_btn = gr.Button("π Refresh Mentions Display", variant="secondary")
|
468 |
-
mentions_html = gr.HTML("Mentions data loads from Bubble after sync.")
|
469 |
-
|
470 |
refresh_mentions_display_btn.click(
|
471 |
fn=run_mentions_tab_display, inputs=[token_state],
|
472 |
-
outputs=[mentions_html,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
473 |
)
|
474 |
|
475 |
-
app.load(fn=lambda ts: check_token_status(ts), inputs=[token_state], outputs=status_box)
|
476 |
-
gr.Timer(15.0).tick(fn=lambda ts: check_token_status(ts), inputs=[token_state], outputs=status_box)
|
477 |
-
|
478 |
if __name__ == "__main__":
|
|
|
479 |
if not os.environ.get("Linkedin_client_id"):
|
480 |
-
logging.warning("WARNING: 'Linkedin_client_id'
|
481 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
import logging
|
6 |
import html
|
7 |
import pandas as pd
|
8 |
+
from datetime import datetime, timedelta, timezone # Added timezone
|
9 |
|
10 |
# Import functions from your custom modules
|
11 |
from analytics_fetch_and_rendering import fetch_and_render_analytics
|
|
|
14 |
from Bubble_API_Calls import (
|
15 |
fetch_linkedin_token_from_bubble,
|
16 |
bulk_upload_to_bubble,
|
17 |
+
fetch_linkedin_posts_data_from_bubble # This will be used for posts, mentions, and follower stats
|
18 |
)
|
19 |
|
20 |
from Linkedin_Data_API_Calls import (
|
|
|
29 |
prepare_mentions_for_bubble # Prepares user-specified format for Bubble
|
30 |
)
|
31 |
|
32 |
+
# Import follower stats function
|
33 |
+
from linkedin_follower_stats import get_linkedin_follower_stats
|
34 |
+
|
35 |
# Configure logging
|
36 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
37 |
|
38 |
# --- Global Constants ---
|
39 |
DEFAULT_INITIAL_FETCH_COUNT = 10
|
40 |
+
LINKEDIN_POST_URN_KEY = 'id'
|
41 |
+
BUBBLE_POST_URN_COLUMN_NAME = 'id' # Assuming this is the unique post ID in Bubble
|
42 |
+
BUBBLE_POST_DATE_COLUMN_NAME = 'published_at' # Assuming this is the post publication date in Bubble
|
43 |
|
44 |
+
# Constants for Mentions
|
45 |
+
BUBBLE_MENTIONS_TABLE_NAME = "LI_mentions"
|
46 |
+
BUBBLE_MENTIONS_ID_COLUMN_NAME = "id" # Assuming this is the unique mention ID in Bubble
|
47 |
+
BUBBLE_MENTIONS_DATE_COLUMN_NAME = "date" # Assuming this is the mention date in Bubble
|
48 |
|
49 |
DEFAULT_MENTIONS_INITIAL_FETCH_COUNT = 20
|
50 |
+
DEFAULT_MENTIONS_UPDATE_FETCH_COUNT = 10
|
51 |
+
|
52 |
+
# Constants for Follower Stats
|
53 |
+
BUBBLE_FOLLOWER_STATS_TABLE_NAME = "LI_follower_stats"
|
54 |
+
FOLLOWER_STATS_CATEGORY_COLUMN = "category_name" # For demographics: name (e.g., "Engineering"), for monthly gains: date string 'YYYY-MM-DD'
|
55 |
+
FOLLOWER_STATS_TYPE_COLUMN = "follower_count_type" # e.g., "follower_seniority", "follower_gains_monthly"
|
56 |
+
FOLLOWER_STATS_ORG_URN_COLUMN = "organization_urn" # URN of the organization
|
57 |
+
FOLLOWER_STATS_ORGANIC_COLUMN = "follower_count_organic"
|
58 |
+
FOLLOWER_STATS_PAID_COLUMN = "follower_count_paid"
|
59 |
|
60 |
|
61 |
def check_token_status(token_state):
|
|
|
64 |
|
65 |
def process_and_store_bubble_token(url_user_token, org_urn, token_state):
|
66 |
"""
|
67 |
+
Processes user token, fetches LinkedIn token, fetches existing Bubble data (posts, mentions, follower stats),
|
68 |
+
and determines if an initial fetch or update is needed for each data type.
|
69 |
Updates token state and UI for the sync button.
|
70 |
"""
|
71 |
logging.info(f"Processing token with URL user token: '{url_user_token}', Org URN: '{org_urn}'")
|
72 |
|
73 |
+
# Initialize or update state safely
|
74 |
new_state = token_state.copy() if token_state else {
|
75 |
+
"token": None, "client_id": None, "org_urn": None,
|
76 |
"bubble_posts_df": pd.DataFrame(), "fetch_count_for_api": 0,
|
77 |
+
"bubble_mentions_df": pd.DataFrame(),
|
78 |
+
"bubble_follower_stats_df": pd.DataFrame(),
|
79 |
"url_user_token_temp_storage": None
|
80 |
}
|
81 |
new_state.update({
|
82 |
+
"org_urn": org_urn,
|
83 |
+
"bubble_posts_df": new_state.get("bubble_posts_df", pd.DataFrame()), # Ensure DF exists
|
84 |
"fetch_count_for_api": new_state.get("fetch_count_for_api", 0),
|
85 |
+
"bubble_mentions_df": new_state.get("bubble_mentions_df", pd.DataFrame()), # Ensure DF exists
|
86 |
+
"bubble_follower_stats_df": new_state.get("bubble_follower_stats_df", pd.DataFrame()), # Ensure DF exists
|
87 |
+
"url_user_token_temp_storage": url_user_token
|
88 |
})
|
89 |
|
90 |
+
button_update = gr.update(visible=False, interactive=False, value="π Sync LinkedIn Data") # Default to hidden
|
91 |
|
92 |
client_id = os.environ.get("Linkedin_client_id")
|
93 |
new_state["client_id"] = client_id if client_id else "ENV VAR MISSING"
|
94 |
if not client_id: logging.error("CRITICAL ERROR: 'Linkedin_client_id' environment variable not set.")
|
95 |
|
96 |
+
# Fetch LinkedIn Token from Bubble
|
97 |
if url_user_token and "not found" not in url_user_token and "Could not access" not in url_user_token:
|
98 |
logging.info(f"Attempting to fetch LinkedIn token from Bubble with user token: {url_user_token}")
|
99 |
try:
|
|
|
106 |
logging.warning(f"β Failed to fetch a valid LinkedIn token from Bubble. Response: {parsed_linkedin_token}")
|
107 |
except Exception as e:
|
108 |
new_state["token"] = None
|
109 |
+
logging.error(f"β Exception while fetching LinkedIn token from Bubble: {e}", exc_info=True)
|
110 |
else:
|
111 |
new_state["token"] = None
|
112 |
logging.info("No valid URL user token provided for LinkedIn token fetch, or an error was indicated.")
|
113 |
|
114 |
+
# Fetch existing data from Bubble if Org URN is available
|
115 |
current_org_urn = new_state.get("org_urn")
|
116 |
if current_org_urn:
|
117 |
# Fetch Posts from Bubble
|
118 |
logging.info(f"Attempting to fetch posts from Bubble for org_urn: {current_org_urn}")
|
119 |
try:
|
120 |
+
fetched_posts_df, error_message_posts = fetch_linkedin_posts_data_from_bubble(current_org_urn, "LI_posts") # Assuming "LI_posts" is the table name
|
121 |
new_state["bubble_posts_df"] = pd.DataFrame() if error_message_posts or fetched_posts_df is None else fetched_posts_df
|
122 |
+
if error_message_posts: logging.warning(f"Error fetching LI_posts from Bubble: {error_message_posts}.")
|
123 |
except Exception as e:
|
124 |
+
logging.error(f"β Error fetching posts from Bubble: {e}.", exc_info=True)
|
125 |
new_state["bubble_posts_df"] = pd.DataFrame()
|
126 |
|
127 |
# Fetch Mentions from Bubble
|
|
|
129 |
try:
|
130 |
fetched_mentions_df, error_message_mentions = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_MENTIONS_TABLE_NAME)
|
131 |
new_state["bubble_mentions_df"] = pd.DataFrame() if error_message_mentions or fetched_mentions_df is None else fetched_mentions_df
|
132 |
+
if error_message_mentions: logging.warning(f"Error fetching {BUBBLE_MENTIONS_TABLE_NAME} from Bubble: {error_message_mentions}.")
|
133 |
except Exception as e:
|
134 |
+
logging.error(f"β Error fetching mentions from Bubble: {e}.", exc_info=True)
|
135 |
new_state["bubble_mentions_df"] = pd.DataFrame()
|
136 |
+
|
137 |
+
# Fetch Follower Stats from Bubble
|
138 |
+
logging.info(f"Attempting to fetch follower stats from Bubble for org_urn: {current_org_urn}")
|
139 |
+
try:
|
140 |
+
fetched_follower_stats_df, error_message_fs = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_FOLLOWER_STATS_TABLE_NAME)
|
141 |
+
new_state["bubble_follower_stats_df"] = pd.DataFrame() if error_message_fs or fetched_follower_stats_df is None else fetched_follower_stats_df
|
142 |
+
if error_message_fs: logging.warning(f"Error fetching {BUBBLE_FOLLOWER_STATS_TABLE_NAME} from Bubble: {error_message_fs}.")
|
143 |
+
except Exception as e:
|
144 |
+
logging.error(f"β Error fetching follower stats from Bubble: {e}.", exc_info=True)
|
145 |
+
new_state["bubble_follower_stats_df"] = pd.DataFrame()
|
146 |
else:
|
147 |
+
logging.warning("Org URN not available in state. Cannot fetch data from Bubble.")
|
148 |
new_state["bubble_posts_df"] = pd.DataFrame()
|
149 |
new_state["bubble_mentions_df"] = pd.DataFrame()
|
150 |
+
new_state["bubble_follower_stats_df"] = pd.DataFrame()
|
151 |
+
|
152 |
|
153 |
# Determine fetch count for Posts API
|
154 |
if new_state["bubble_posts_df"].empty:
|
|
|
156 |
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
157 |
else:
|
158 |
try:
|
159 |
+
df_posts_check = new_state["bubble_posts_df"].copy() # Use .copy()
|
160 |
if BUBBLE_POST_DATE_COLUMN_NAME not in df_posts_check.columns or df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME].isnull().all():
|
161 |
+
logging.warning(f"Date column '{BUBBLE_POST_DATE_COLUMN_NAME}' for posts missing or all null values. Triggering initial fetch.")
|
162 |
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
163 |
else:
|
164 |
df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME] = pd.to_datetime(df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME], errors='coerce', utc=True)
|
165 |
last_post_date_utc = df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME].dropna().max()
|
166 |
+
if pd.isna(last_post_date_utc): # No valid dates found after conversion
|
167 |
+
logging.warning("No valid post dates found after conversion. Triggering initial fetch.")
|
168 |
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
169 |
else:
|
170 |
days_diff = (pd.Timestamp('now', tz='UTC').normalize() - last_post_date_utc.normalize()).days
|
171 |
+
if days_diff >= 7:
|
172 |
+
# Fetch more if data is older, e.g., 10 posts per week of difference
|
173 |
+
new_state['fetch_count_for_api'] = max(1, days_diff // 7) * 10
|
174 |
+
logging.info(f"Posts data is {days_diff} days old. Setting fetch count to {new_state['fetch_count_for_api']}.")
|
175 |
else:
|
176 |
+
new_state['fetch_count_for_api'] = 0 # Data is recent
|
177 |
+
logging.info("Posts data is recent. No new posts fetch needed based on date.")
|
178 |
except Exception as e:
|
179 |
+
logging.error(f"Error processing post dates: {e}. Defaulting to initial fetch for posts.", exc_info=True)
|
180 |
new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
|
181 |
|
182 |
+
# Determine if Mentions need sync
|
183 |
mentions_need_sync = False
|
184 |
if new_state["bubble_mentions_df"].empty:
|
185 |
mentions_need_sync = True
|
186 |
+
logging.info("Mentions need sync: Bubble mentions DF is empty.")
|
187 |
else:
|
188 |
+
# Check if the crucial date column exists and has any non-null values
|
189 |
+
if BUBBLE_MENTIONS_DATE_COLUMN_NAME not in new_state["bubble_mentions_df"].columns or \
|
190 |
+
new_state["bubble_mentions_df"][BUBBLE_MENTIONS_DATE_COLUMN_NAME].isnull().all():
|
191 |
mentions_need_sync = True
|
192 |
+
logging.info(f"Mentions need sync: Date column '{BUBBLE_MENTIONS_DATE_COLUMN_NAME}' missing or all null values.")
|
193 |
else:
|
194 |
+
df_mentions_check = new_state["bubble_mentions_df"].copy() # Use .copy()
|
195 |
df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce', utc=True)
|
196 |
last_mention_date_utc = df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME].dropna().max()
|
197 |
+
# Sync if no valid last mention date or if it's 7 days or older
|
198 |
+
if pd.isna(last_mention_date_utc) or \
|
199 |
+
(pd.Timestamp('now', tz='UTC').normalize() - last_mention_date_utc.normalize()).days >= 7:
|
200 |
mentions_need_sync = True
|
201 |
+
logging.info(f"Mentions need sync: Last mention date {last_mention_date_utc} is old or invalid.")
|
202 |
+
else:
|
203 |
+
logging.info(f"Mentions up-to-date. Last mention: {last_mention_date_utc}")
|
204 |
+
|
205 |
+
# Determine if Follower Stats need sync
|
206 |
+
follower_stats_need_sync = False
|
207 |
+
fs_df = new_state.get("bubble_follower_stats_df", pd.DataFrame())
|
208 |
+
if fs_df.empty:
|
209 |
+
follower_stats_need_sync = True
|
210 |
+
logging.info("Follower stats need sync: Bubble follower stats DF is empty.")
|
211 |
else:
|
212 |
+
# Check monthly gains data
|
213 |
+
monthly_gains_df = fs_df[fs_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].copy() # Use .copy()
|
214 |
+
if monthly_gains_df.empty:
|
215 |
+
follower_stats_need_sync = True
|
216 |
+
logging.info("Follower stats need sync: No monthly gains data in Bubble.")
|
217 |
+
elif FOLLOWER_STATS_CATEGORY_COLUMN not in monthly_gains_df.columns:
|
218 |
+
follower_stats_need_sync = True
|
219 |
+
logging.info(f"Follower stats need sync: Date column '{FOLLOWER_STATS_CATEGORY_COLUMN}' missing in monthly gains.")
|
220 |
+
else:
|
221 |
+
# Ensure date conversion does not raise SettingWithCopyWarning by using .loc
|
222 |
+
monthly_gains_df.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN] = pd.to_datetime(monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce').dt.normalize()
|
223 |
+
last_gain_date = monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN].dropna().max()
|
224 |
+
if pd.isna(last_gain_date): # No valid dates after conversion
|
225 |
+
follower_stats_need_sync = True
|
226 |
+
logging.info("Follower stats need sync: No valid dates in monthly gains after conversion.")
|
227 |
+
else:
|
228 |
+
# Sync if the last recorded gain is for a month *before* the start of the current month.
|
229 |
+
# This ensures we attempt to fetch the previous month's data if it's not there.
|
230 |
+
start_of_current_month = pd.Timestamp('now', tz='UTC').normalize().replace(day=1)
|
231 |
+
if last_gain_date < start_of_current_month:
|
232 |
+
follower_stats_need_sync = True
|
233 |
+
logging.info(f"Follower stats need sync: Last gain date {last_gain_date} is before current month start {start_of_current_month}.")
|
234 |
+
else:
|
235 |
+
logging.info(f"Follower monthly gains up-to-date. Last gain recorded on: {last_gain_date}")
|
236 |
+
|
237 |
+
# Also trigger sync if demographic data (non-monthly gains) is missing entirely
|
238 |
+
# This is a basic check; more granular checks could be added for specific demographic types if needed.
|
239 |
+
if fs_df[fs_df[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'].empty:
|
240 |
+
follower_stats_need_sync = True
|
241 |
+
logging.info("Follower stats need sync: Demographic data (non-monthly types) missing.")
|
242 |
+
|
243 |
+
|
244 |
+
# Update Sync Button based on token and needed actions
|
245 |
+
sync_actions = []
|
246 |
+
if new_state['fetch_count_for_api'] > 0:
|
247 |
+
sync_actions.append(f"{new_state['fetch_count_for_api']} Posts")
|
248 |
+
if mentions_need_sync:
|
249 |
+
sync_actions.append("Mentions")
|
250 |
+
if follower_stats_need_sync:
|
251 |
+
sync_actions.append("Follower Stats")
|
252 |
+
|
253 |
+
if new_state["token"] and sync_actions: # Token present and actions needed
|
254 |
+
button_label = f"π Sync LinkedIn Data ({', '.join(sync_actions)})"
|
255 |
+
button_update = gr.update(value=button_label, visible=True, interactive=True)
|
256 |
+
elif new_state["token"]: # Token present but nothing to sync
|
257 |
+
button_label = "β
Data Up-to-Date"
|
258 |
+
button_update = gr.update(value=button_label, visible=True, interactive=False) # Visible but not interactive
|
259 |
+
else: # No token
|
260 |
+
button_update = gr.update(visible=False, interactive=False) # Keep hidden
|
261 |
|
262 |
token_status_message = check_token_status(new_state)
|
263 |
+
logging.info(f"Token processing complete. Status: {token_status_message}. Button: {button_update}. Sync actions: {sync_actions}")
|
264 |
return token_status_message, new_state, button_update
|
265 |
|
266 |
|
267 |
def sync_linkedin_mentions(token_state):
|
268 |
+
"""Fetches new LinkedIn mentions and uploads them to Bubble."""
|
269 |
logging.info("Starting LinkedIn mentions sync process.")
|
270 |
if not token_state or not token_state.get("token"):
|
271 |
logging.error("Mentions sync: Access denied. No LinkedIn token.")
|
|
|
274 |
client_id = token_state.get("client_id")
|
275 |
token_dict = token_state.get("token")
|
276 |
org_urn = token_state.get('org_urn')
|
277 |
+
bubble_mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame()).copy() # Work with a copy
|
278 |
|
279 |
if not org_urn or not client_id or client_id == "ENV VAR MISSING":
|
280 |
logging.error("Mentions sync: Configuration error (Org URN or Client ID missing).")
|
281 |
return "Mentions: Config error. ", token_state
|
282 |
|
283 |
+
# Determine if mentions sync is needed and how many to fetch
|
284 |
fetch_count_for_mentions_api = 0
|
285 |
+
mentions_sync_is_needed_now = False
|
286 |
if bubble_mentions_df.empty:
|
287 |
+
mentions_sync_is_needed_now = True
|
288 |
fetch_count_for_mentions_api = DEFAULT_MENTIONS_INITIAL_FETCH_COUNT
|
289 |
+
logging.info("Mentions sync needed: Bubble DF empty. Fetching initial count.")
|
290 |
else:
|
291 |
+
if BUBBLE_MENTIONS_DATE_COLUMN_NAME not in bubble_mentions_df.columns or \
|
292 |
+
bubble_mentions_df[BUBBLE_MENTIONS_DATE_COLUMN_NAME].isnull().all():
|
293 |
+
mentions_sync_is_needed_now = True
|
294 |
fetch_count_for_mentions_api = DEFAULT_MENTIONS_INITIAL_FETCH_COUNT
|
295 |
+
logging.info(f"Mentions sync needed: Date column '{BUBBLE_MENTIONS_DATE_COLUMN_NAME}' missing or all null. Fetching initial count.")
|
296 |
else:
|
297 |
+
mentions_df_copy = bubble_mentions_df.copy() # Redundant copy, already copied above
|
298 |
mentions_df_copy[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(mentions_df_copy[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce', utc=True)
|
299 |
last_mention_date_utc = mentions_df_copy[BUBBLE_MENTIONS_DATE_COLUMN_NAME].dropna().max()
|
300 |
+
if pd.isna(last_mention_date_utc) or \
|
301 |
+
(pd.Timestamp('now', tz='UTC').normalize() - last_mention_date_utc.normalize()).days >= 7:
|
302 |
+
mentions_sync_is_needed_now = True
|
303 |
+
fetch_count_for_mentions_api = DEFAULT_MENTIONS_UPDATE_FETCH_COUNT # Fetch update count if data is old
|
304 |
+
logging.info(f"Mentions sync needed: Last mention date {last_mention_date_utc} is old or invalid. Fetching update count.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
|
306 |
+
if not mentions_sync_is_needed_now:
|
307 |
+
logging.info("Mentions data is fresh based on current check. No API fetch needed for mentions.")
|
|
|
308 |
return "Mentions: Up-to-date. ", token_state
|
309 |
+
|
310 |
+
logging.info(f"Mentions sync proceeding. Fetch count: {fetch_count_for_mentions_api}")
|
311 |
|
312 |
try:
|
|
|
313 |
processed_raw_mentions = fetch_linkedin_mentions_core(client_id, token_dict, org_urn, count=fetch_count_for_mentions_api)
|
|
|
314 |
if not processed_raw_mentions:
|
315 |
+
logging.info("Mentions sync: No new mentions found via API.")
|
316 |
return "Mentions: None found via API. ", token_state
|
317 |
|
318 |
existing_mention_ids = set()
|
319 |
if not bubble_mentions_df.empty and BUBBLE_MENTIONS_ID_COLUMN_NAME in bubble_mentions_df.columns:
|
320 |
+
# Ensure IDs are strings for reliable comparison, handling potential NaNs
|
321 |
existing_mention_ids = set(bubble_mentions_df[BUBBLE_MENTIONS_ID_COLUMN_NAME].dropna().astype(str))
|
322 |
|
323 |
+
sentiments_map = analyze_mentions_sentiment(processed_raw_mentions) # Assumes this returns a map {mention_id: sentiment_data}
|
324 |
+
all_compiled_mentions = compile_detailed_mentions(processed_raw_mentions, sentiments_map) # Assumes this adds sentiment to each mention dict
|
325 |
|
326 |
+
# Filter out mentions already in Bubble
|
327 |
new_compiled_mentions_to_upload = [
|
328 |
m for m in all_compiled_mentions if str(m.get("id")) not in existing_mention_ids
|
329 |
]
|
330 |
|
331 |
if not new_compiled_mentions_to_upload:
|
332 |
+
logging.info("Mentions sync: All fetched mentions are already in Bubble.")
|
333 |
return "Mentions: All fetched already in Bubble. ", token_state
|
334 |
|
335 |
+
bubble_ready_mentions = prepare_mentions_for_bubble(new_compiled_mentions_to_upload) # Prepare for Bubble format
|
|
|
|
|
336 |
if bubble_ready_mentions:
|
|
|
337 |
bulk_upload_to_bubble(bubble_ready_mentions, BUBBLE_MENTIONS_TABLE_NAME)
|
338 |
+
logging.info(f"Successfully uploaded {len(bubble_ready_mentions)} new mentions to Bubble.")
|
339 |
+
# Update in-memory DataFrame
|
340 |
+
updated_mentions_df = pd.concat([bubble_mentions_df, pd.DataFrame(bubble_ready_mentions)], ignore_index=True)
|
341 |
+
# Drop duplicates based on ID, keeping the latest (which would be the newly added ones if IDs overlapped, though logic above should prevent this)
|
342 |
+
token_state["bubble_mentions_df"] = updated_mentions_df.drop_duplicates(subset=[BUBBLE_MENTIONS_ID_COLUMN_NAME], keep='last')
|
343 |
return f"Mentions: Synced {len(bubble_ready_mentions)} new. ", token_state
|
344 |
else:
|
345 |
+
logging.info("Mentions sync: No new mentions were prepared for Bubble upload (possibly all filtered or empty after prep).")
|
346 |
return "Mentions: No new ones to upload. ", token_state
|
347 |
+
except ValueError as ve: # Catch specific errors if your API calls raise them
|
348 |
+
logging.error(f"ValueError during mentions sync: {ve}", exc_info=True)
|
|
|
349 |
return f"Mentions Error: {html.escape(str(ve))}. ", token_state
|
350 |
except Exception as e:
|
351 |
+
logging.exception("Unexpected error in sync_linkedin_mentions.") # Logs full traceback
|
352 |
+
return f"Mentions: Unexpected error ({type(e).__name__}). ", token_state
|
353 |
+
|
354 |
+
|
355 |
+
def sync_linkedin_follower_stats(token_state):
|
356 |
+
"""Fetches new LinkedIn follower statistics and uploads them to Bubble."""
|
357 |
+
logging.info("Starting LinkedIn follower stats sync process.")
|
358 |
+
if not token_state or not token_state.get("token"):
|
359 |
+
logging.error("Follower Stats sync: Access denied. No LinkedIn token.")
|
360 |
+
return "Follower Stats: No token. ", token_state
|
361 |
+
|
362 |
+
client_id = token_state.get("client_id")
|
363 |
+
token_dict = token_state.get("token")
|
364 |
+
org_urn = token_state.get('org_urn')
|
365 |
+
|
366 |
+
if not org_urn or not client_id or client_id == "ENV VAR MISSING":
|
367 |
+
logging.error("Follower Stats sync: Configuration error (Org URN or Client ID missing).")
|
368 |
+
return "Follower Stats: Config error. ", token_state
|
369 |
+
|
370 |
+
# Determine if follower stats sync is needed (logic copied and adapted from process_and_store_bubble_token)
|
371 |
+
follower_stats_sync_is_needed_now = False
|
372 |
+
fs_df_current = token_state.get("bubble_follower_stats_df", pd.DataFrame()).copy() # Work with a copy
|
373 |
+
if fs_df_current.empty:
|
374 |
+
follower_stats_sync_is_needed_now = True
|
375 |
+
logging.info("Follower stats sync needed: Bubble DF is empty.")
|
376 |
+
else:
|
377 |
+
monthly_gains_df = fs_df_current[fs_df_current[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].copy()
|
378 |
+
if monthly_gains_df.empty or FOLLOWER_STATS_CATEGORY_COLUMN not in monthly_gains_df.columns:
|
379 |
+
follower_stats_sync_is_needed_now = True
|
380 |
+
logging.info("Follower stats sync needed: Monthly gains data missing or date column absent.")
|
381 |
+
else:
|
382 |
+
monthly_gains_df.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN] = pd.to_datetime(monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce').dt.normalize()
|
383 |
+
last_gain_date = monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN].dropna().max()
|
384 |
+
start_of_current_month = pd.Timestamp('now', tz='UTC').normalize().replace(day=1)
|
385 |
+
if pd.isna(last_gain_date) or last_gain_date < start_of_current_month:
|
386 |
+
follower_stats_sync_is_needed_now = True
|
387 |
+
logging.info(f"Follower stats sync needed: Last gain date {last_gain_date} is old or invalid.")
|
388 |
+
|
389 |
+
if fs_df_current[fs_df_current[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'].empty:
|
390 |
+
follower_stats_sync_is_needed_now = True
|
391 |
+
logging.info("Follower stats sync needed: Demographic data (non-monthly) is missing.")
|
392 |
+
|
393 |
+
if not follower_stats_sync_is_needed_now:
|
394 |
+
logging.info("Follower stats data is fresh based on current check. No API fetch needed.")
|
395 |
+
return "Follower Stats: Data up-to-date. ", token_state
|
396 |
+
|
397 |
+
logging.info(f"Follower stats sync proceeding for org_urn: {org_urn}")
|
398 |
+
try:
|
399 |
+
# This function should return a list of dicts, each dict representing a stat entry
|
400 |
+
api_follower_stats = get_linkedin_follower_stats(client_id, token_dict, org_urn)
|
401 |
+
if not api_follower_stats: # api_follower_stats could be None or empty list
|
402 |
+
logging.info(f"Follower Stats sync: No stats found via API for org {org_urn}.")
|
403 |
+
return "Follower Stats: None found via API. ", token_state
|
404 |
+
|
405 |
+
bubble_follower_stats_df_orig = token_state.get("bubble_follower_stats_df", pd.DataFrame()).copy()
|
406 |
+
new_stats_to_upload = []
|
407 |
+
|
408 |
+
# --- Process Monthly Gains ---
|
409 |
+
api_monthly_gains = [s for s in api_follower_stats if s.get(FOLLOWER_STATS_TYPE_COLUMN) == 'follower_gains_monthly']
|
410 |
+
existing_monthly_gain_dates = set()
|
411 |
+
if not bubble_follower_stats_df_orig.empty:
|
412 |
+
bubble_monthly_df = bubble_follower_stats_df_orig[bubble_follower_stats_df_orig[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly']
|
413 |
+
if FOLLOWER_STATS_CATEGORY_COLUMN in bubble_monthly_df.columns:
|
414 |
+
# Ensure dates are strings for set comparison, handle potential NaNs from to_datetime if any
|
415 |
+
existing_monthly_gain_dates = set(bubble_monthly_df[FOLLOWER_STATS_CATEGORY_COLUMN].astype(str).unique())
|
416 |
+
|
417 |
+
for gain_stat in api_monthly_gains:
|
418 |
+
# category_name for monthly gains is 'YYYY-MM-DD' string from linkedin_follower_stats
|
419 |
+
if str(gain_stat.get(FOLLOWER_STATS_CATEGORY_COLUMN)) not in existing_monthly_gain_dates:
|
420 |
+
new_stats_to_upload.append(gain_stat)
|
421 |
+
|
422 |
+
# --- Process Demographics (add if new or different counts) ---
|
423 |
+
api_demographics = [s for s in api_follower_stats if s.get(FOLLOWER_STATS_TYPE_COLUMN) != 'follower_gains_monthly']
|
424 |
+
|
425 |
+
# Create a map of existing demographics for quick lookup and comparison
|
426 |
+
# Key: (org_urn, type, category_name) -> (organic_count, paid_count)
|
427 |
+
existing_demographics_map = {}
|
428 |
+
if not bubble_follower_stats_df_orig.empty:
|
429 |
+
bubble_demographics_df = bubble_follower_stats_df_orig[bubble_follower_stats_df_orig[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly']
|
430 |
+
if not bubble_demographics_df.empty and \
|
431 |
+
all(col in bubble_demographics_df.columns for col in [
|
432 |
+
FOLLOWER_STATS_ORG_URN_COLUMN, FOLLOWER_STATS_TYPE_COLUMN,
|
433 |
+
FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN,
|
434 |
+
FOLLOWER_STATS_PAID_COLUMN
|
435 |
+
]):
|
436 |
+
for _, row in bubble_demographics_df.iterrows():
|
437 |
+
key = (
|
438 |
+
str(row[FOLLOWER_STATS_ORG_URN_COLUMN]),
|
439 |
+
str(row[FOLLOWER_STATS_TYPE_COLUMN]),
|
440 |
+
str(row[FOLLOWER_STATS_CATEGORY_COLUMN])
|
441 |
+
)
|
442 |
+
existing_demographics_map[key] = (
|
443 |
+
row[FOLLOWER_STATS_ORGANIC_COLUMN],
|
444 |
+
row[FOLLOWER_STATS_PAID_COLUMN]
|
445 |
+
)
|
446 |
+
|
447 |
+
for demo_stat in api_demographics:
|
448 |
+
key = (
|
449 |
+
str(demo_stat.get(FOLLOWER_STATS_ORG_URN_COLUMN)),
|
450 |
+
str(demo_stat.get(FOLLOWER_STATS_TYPE_COLUMN)),
|
451 |
+
str(demo_stat.get(FOLLOWER_STATS_CATEGORY_COLUMN))
|
452 |
+
)
|
453 |
+
api_counts = (
|
454 |
+
demo_stat.get(FOLLOWER_STATS_ORGANIC_COLUMN, 0),
|
455 |
+
demo_stat.get(FOLLOWER_STATS_PAID_COLUMN, 0)
|
456 |
+
)
|
457 |
+
|
458 |
+
if key not in existing_demographics_map or existing_demographics_map[key] != api_counts:
|
459 |
+
new_stats_to_upload.append(demo_stat)
|
460 |
+
|
461 |
+
if not new_stats_to_upload:
|
462 |
+
logging.info(f"Follower Stats sync: Data for org {org_urn} is up-to-date or no changes found.")
|
463 |
+
return "Follower Stats: Data up-to-date or no changes. ", token_state
|
464 |
+
|
465 |
+
bulk_upload_to_bubble(new_stats_to_upload, BUBBLE_FOLLOWER_STATS_TABLE_NAME)
|
466 |
+
logging.info(f"Successfully uploaded {len(new_stats_to_upload)} follower stat entries to Bubble for org {org_urn}.")
|
467 |
+
|
468 |
+
# Update in-memory DataFrame: Concatenate old and new, then drop duplicates strategically
|
469 |
+
temp_df = pd.concat([bubble_follower_stats_df_orig, pd.DataFrame(new_stats_to_upload)], ignore_index=True)
|
470 |
+
|
471 |
+
# For monthly gains, unique by org, date (category_name)
|
472 |
+
monthly_part = temp_df[temp_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].drop_duplicates(
|
473 |
+
subset=[FOLLOWER_STATS_ORG_URN_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN],
|
474 |
+
keep='last' # Keep the newest entry if dates somehow collide (shouldn't with current logic)
|
475 |
+
)
|
476 |
+
# For demographics, unique by org, type, and category_name
|
477 |
+
demographics_part = temp_df[temp_df[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'].drop_duplicates(
|
478 |
+
subset=[FOLLOWER_STATS_ORG_URN_COLUMN, FOLLOWER_STATS_TYPE_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN],
|
479 |
+
keep='last' # This ensures that if a demographic was "updated", the new version is kept
|
480 |
+
)
|
481 |
+
token_state["bubble_follower_stats_df"] = pd.concat([monthly_part, demographics_part], ignore_index=True)
|
482 |
+
|
483 |
+
return f"Follower Stats: Synced {len(new_stats_to_upload)} entries. ", token_state
|
484 |
+
except ValueError as ve: # Catch specific errors if your API calls raise them
|
485 |
+
logging.error(f"ValueError during follower stats sync for {org_urn}: {ve}", exc_info=True)
|
486 |
+
return f"Follower Stats Error: {html.escape(str(ve))}. ", token_state
|
487 |
+
except Exception as e:
|
488 |
+
logging.exception(f"Unexpected error in sync_linkedin_follower_stats for {org_urn}.") # Logs full traceback
|
489 |
+
return f"Follower Stats: Unexpected error ({type(e).__name__}). ", token_state
|
490 |
|
491 |
|
492 |
+
def sync_all_linkedin_data(token_state):
|
493 |
+
"""Orchestrates the syncing of all LinkedIn data types (Posts, Mentions, Follower Stats)."""
|
494 |
+
logging.info("Starting sync_all_linkedin_data process.")
|
495 |
if not token_state or not token_state.get("token"):
|
496 |
+
logging.error("Sync All: Access denied. LinkedIn token not available.")
|
497 |
return "<p style='color:red; text-align:center;'>β Access denied. LinkedIn token not available.</p>", token_state
|
498 |
|
499 |
client_id = token_state.get("client_id")
|
500 |
token_dict = token_state.get("token")
|
501 |
org_urn = token_state.get('org_urn')
|
502 |
fetch_count_for_posts_api = token_state.get('fetch_count_for_api', 0)
|
503 |
+
# Operate on copies to avoid modifying original DFs in state directly until the end
|
504 |
+
bubble_posts_df_orig = token_state.get("bubble_posts_df", pd.DataFrame()).copy()
|
505 |
+
|
506 |
posts_sync_message = ""
|
507 |
+
mentions_sync_message = ""
|
508 |
+
follower_stats_sync_message = ""
|
509 |
|
510 |
+
if not org_urn:
|
511 |
+
logging.error("Sync All: Org URN missing in token_state.")
|
512 |
+
return "<p style='color:red;'>β Config error: Org URN missing.</p>", token_state
|
513 |
+
if not client_id or client_id == "ENV VAR MISSING":
|
514 |
+
logging.error("Sync All: Client ID missing or not set.")
|
515 |
+
return "<p style='color:red;'>β Config error: Client ID missing.</p>", token_state
|
516 |
|
517 |
+
# --- Sync Posts ---
|
518 |
if fetch_count_for_posts_api == 0:
|
519 |
posts_sync_message = "Posts: Already up-to-date. "
|
520 |
+
logging.info("Posts sync: Skipped as fetch_count_for_posts_api is 0.")
|
521 |
else:
|
522 |
+
logging.info(f"Posts sync: Starting fetch for {fetch_count_for_posts_api} posts.")
|
523 |
try:
|
524 |
+
# fetch_linkedin_posts_core is expected to return: (processed_raw_posts, stats_map, errors_list)
|
525 |
processed_raw_posts, stats_map, _ = fetch_linkedin_posts_core(client_id, token_dict, org_urn, count=fetch_count_for_posts_api)
|
526 |
+
|
527 |
+
if not processed_raw_posts:
|
528 |
+
posts_sync_message = "Posts: None found via API. "
|
529 |
+
logging.info("Posts sync: No raw posts returned from API.")
|
530 |
else:
|
531 |
existing_post_urns = set()
|
532 |
+
if not bubble_posts_df_orig.empty and BUBBLE_POST_URN_COLUMN_NAME in bubble_posts_df_orig.columns:
|
533 |
+
existing_post_urns = set(bubble_posts_df_orig[BUBBLE_POST_URN_COLUMN_NAME].dropna().astype(str))
|
534 |
+
|
535 |
+
# Filter out posts already in Bubble
|
536 |
new_raw_posts = [p for p in processed_raw_posts if str(p.get(LINKEDIN_POST_URN_KEY)) not in existing_post_urns]
|
537 |
+
|
538 |
+
if not new_raw_posts:
|
539 |
+
posts_sync_message = "Posts: All fetched already in Bubble. "
|
540 |
+
logging.info("Posts sync: All fetched posts were already found in Bubble.")
|
541 |
else:
|
542 |
+
logging.info(f"Posts sync: Processing {len(new_raw_posts)} new raw posts.")
|
543 |
post_urns_to_process = [p[LINKEDIN_POST_URN_KEY] for p in new_raw_posts if p.get(LINKEDIN_POST_URN_KEY)]
|
544 |
+
|
545 |
all_comments_data = fetch_comments(client_id, token_dict, post_urns_to_process, stats_map)
|
546 |
+
sentiments_per_post = analyze_sentiment(all_comments_data) # Assumes analysis of comments
|
547 |
+
detailed_new_posts = compile_detailed_posts(new_raw_posts, stats_map, sentiments_per_post) # Compiles with stats and sentiment
|
548 |
+
|
549 |
+
# prepare_data_for_bubble should return tuple: (posts_for_bubble, post_stats_for_bubble, post_comments_for_bubble)
|
550 |
li_posts, li_post_stats, li_post_comments = prepare_data_for_bubble(detailed_new_posts, all_comments_data)
|
551 |
+
|
552 |
+
if li_posts: # If there are posts to upload
|
553 |
bulk_upload_to_bubble(li_posts, "LI_posts")
|
554 |
+
# Update in-memory DataFrame for posts
|
555 |
+
updated_posts_df = pd.concat([bubble_posts_df_orig, pd.DataFrame(li_posts)], ignore_index=True)
|
556 |
+
token_state["bubble_posts_df"] = updated_posts_df.drop_duplicates(subset=[BUBBLE_POST_URN_COLUMN_NAME], keep='last')
|
557 |
+
logging.info(f"Posts sync: Uploaded {len(li_posts)} new posts to Bubble.")
|
558 |
+
|
559 |
+
if li_post_stats:
|
560 |
+
bulk_upload_to_bubble(li_post_stats, "LI_post_stats")
|
561 |
+
logging.info(f"Posts sync: Uploaded {len(li_post_stats)} post_stats entries.")
|
562 |
+
# Note: Consider how/if to update a local stats_df in token_state if you maintain one.
|
563 |
+
if li_post_comments:
|
564 |
+
bulk_upload_to_bubble(li_post_comments, "LI_post_comments")
|
565 |
+
logging.info(f"Posts sync: Uploaded {len(li_post_comments)} post_comments entries.")
|
566 |
+
# Note: Consider how/if to update a local comments_df in token_state.
|
567 |
+
|
568 |
posts_sync_message = f"Posts: Synced {len(li_posts)} new. "
|
569 |
+
else:
|
570 |
+
posts_sync_message = "Posts: No new ones to upload after processing. "
|
571 |
+
logging.info("Posts sync: No new posts were prepared for Bubble upload.")
|
572 |
+
except ValueError as ve: # Catch specific errors from your API calls
|
573 |
+
posts_sync_message = f"Posts Error: {html.escape(str(ve))}. "
|
574 |
+
logging.error(f"Posts sync: ValueError: {ve}", exc_info=True)
|
575 |
+
except Exception as e:
|
576 |
+
logging.exception("Posts sync: Unexpected error during processing.") # Logs full traceback
|
577 |
+
posts_sync_message = f"Posts: Unexpected error ({type(e).__name__}). "
|
578 |
|
579 |
+
# --- Sync Mentions ---
|
580 |
+
# The sync_linkedin_mentions function updates token_state["bubble_mentions_df"] internally
|
581 |
+
mentions_sync_message, token_state = sync_linkedin_mentions(token_state)
|
582 |
|
583 |
+
# --- Sync Follower Stats ---
|
584 |
+
# The sync_linkedin_follower_stats function updates token_state["bubble_follower_stats_df"] internally
|
585 |
+
follower_stats_sync_message, token_state = sync_linkedin_follower_stats(token_state)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
586 |
|
587 |
+
logging.info(f"Sync process complete. Messages: Posts: [{posts_sync_message.strip()}], Mentions: [{mentions_sync_message.strip()}], Follower Stats: [{follower_stats_sync_message.strip()}]")
|
588 |
+
final_message = f"<p style='color:green; text-align:center;'>β
Sync Attempted. {posts_sync_message} {mentions_sync_message} {follower_stats_sync_message}</p>"
|
589 |
return final_message, token_state
|
590 |
|
591 |
|
592 |
def display_main_dashboard(token_state):
|
593 |
+
"""Generates HTML for the main dashboard display using data from token_state."""
|
594 |
if not token_state or not token_state.get("token"):
|
595 |
+
logging.warning("Dashboard display: Access denied. No token available.")
|
596 |
return "β Access denied. No token available for dashboard."
|
597 |
|
598 |
+
html_parts = ["<div style='padding:10px;'><h3>Dashboard Overview</h3>"]
|
599 |
+
|
600 |
+
# Display Recent Posts
|
601 |
posts_df = token_state.get("bubble_posts_df", pd.DataFrame())
|
602 |
+
html_parts.append(f"<h4>Recent Posts ({len(posts_df)} in Bubble):</h4>")
|
603 |
if not posts_df.empty:
|
604 |
+
# Define columns to show, ensuring they exist in the DataFrame
|
605 |
+
cols_to_show_posts = [col for col in [BUBBLE_POST_DATE_COLUMN_NAME, 'text', 'sentiment', 'summary_text', 'li_eb_label'] if col in posts_df.columns]
|
606 |
+
if not cols_to_show_posts:
|
607 |
+
html_parts.append("<p>No relevant post columns found to display.</p>")
|
608 |
+
else:
|
609 |
+
display_df_posts = posts_df.copy()
|
610 |
+
if BUBBLE_POST_DATE_COLUMN_NAME in display_df_posts.columns:
|
611 |
+
try:
|
612 |
+
# Format date and sort
|
613 |
+
display_df_posts[BUBBLE_POST_DATE_COLUMN_NAME] = pd.to_datetime(display_df_posts[BUBBLE_POST_DATE_COLUMN_NAME], errors='coerce').dt.strftime('%Y-%m-%d %H:%M')
|
614 |
+
display_df_posts = display_df_posts.sort_values(by=BUBBLE_POST_DATE_COLUMN_NAME, ascending=False)
|
615 |
+
except Exception as e:
|
616 |
+
logging.error(f"Error formatting post dates for display: {e}")
|
617 |
+
html_parts.append("<p>Error formatting post dates.</p>")
|
618 |
+
# Use escape=False if 'text' or 'summary_text' can contain HTML; otherwise, True is safer.
|
619 |
+
# Assuming 'text' might have HTML from LinkedIn, using escape=False. Be cautious with this.
|
620 |
+
html_parts.append(display_df_posts[cols_to_show_posts].head().to_html(escape=False, index=False, classes="table table-striped table-sm"))
|
621 |
+
else:
|
622 |
+
html_parts.append("<p>No posts loaded from Bubble.</p>")
|
623 |
+
html_parts.append("<hr/>")
|
624 |
+
|
625 |
+
# Display Recent Mentions
|
626 |
mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame())
|
627 |
+
html_parts.append(f"<h4>Recent Mentions ({len(mentions_df)} in Bubble):</h4>")
|
628 |
if not mentions_df.empty:
|
629 |
+
cols_to_show_mentions = [col for col in [BUBBLE_MENTIONS_DATE_COLUMN_NAME, "mention_text", "sentiment_label"] if col in mentions_df.columns]
|
630 |
+
if not cols_to_show_mentions:
|
631 |
+
html_parts.append("<p>No relevant mention columns found to display.</p>")
|
632 |
+
else:
|
633 |
+
display_df_mentions = mentions_df.copy()
|
634 |
+
if BUBBLE_MENTIONS_DATE_COLUMN_NAME in display_df_mentions.columns:
|
635 |
+
try:
|
636 |
+
display_df_mentions[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(display_df_mentions[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce').dt.strftime('%Y-%m-%d %H:%M')
|
637 |
+
display_df_mentions = display_df_mentions.sort_values(by=BUBBLE_MENTIONS_DATE_COLUMN_NAME, ascending=False)
|
638 |
+
except Exception as e:
|
639 |
+
logging.error(f"Error formatting mention dates for display: {e}")
|
640 |
+
html_parts.append("<p>Error formatting mention dates.</p>")
|
641 |
+
# Assuming "mention_text" can have HTML.
|
642 |
+
html_parts.append(display_df_mentions[cols_to_show_mentions].head().to_html(escape=False, index=False, classes="table table-striped table-sm"))
|
643 |
+
else:
|
644 |
+
html_parts.append("<p>No mentions loaded from Bubble.</p>")
|
645 |
+
html_parts.append("<hr/>")
|
646 |
+
|
647 |
+
# Display Follower Statistics Summary
|
648 |
+
follower_stats_df = token_state.get("bubble_follower_stats_df", pd.DataFrame())
|
649 |
+
html_parts.append(f"<h4>Follower Statistics ({len(follower_stats_df)} entries in Bubble):</h4>")
|
650 |
+
if not follower_stats_df.empty:
|
651 |
+
# Latest Monthly Follower Gain
|
652 |
+
monthly_gains = follower_stats_df[follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].copy()
|
653 |
+
if not monthly_gains.empty and FOLLOWER_STATS_CATEGORY_COLUMN in monthly_gains.columns and \
|
654 |
+
FOLLOWER_STATS_ORGANIC_COLUMN in monthly_gains.columns and FOLLOWER_STATS_PAID_COLUMN in monthly_gains.columns:
|
655 |
+
try:
|
656 |
+
# FOLLOWER_STATS_CATEGORY_COLUMN for monthly gains is 'YYYY-MM-DD'
|
657 |
+
monthly_gains.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN] = pd.to_datetime(monthly_gains[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce').dt.strftime('%Y-%m-%d')
|
658 |
+
latest_gain = monthly_gains.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN, ascending=False).head(1)
|
659 |
+
if not latest_gain.empty:
|
660 |
+
html_parts.append("<h5>Latest Monthly Follower Gain:</h5>")
|
661 |
+
html_parts.append(latest_gain[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].to_html(escape=True, index=False, classes="table table-sm"))
|
662 |
+
else:
|
663 |
+
html_parts.append("<p>No valid monthly follower gain data to display after processing.</p>")
|
664 |
+
except Exception as e:
|
665 |
+
logging.error(f"Error formatting follower gain dates for display: {e}")
|
666 |
+
html_parts.append("<p>Error displaying monthly follower gain data.</p>")
|
667 |
+
else:
|
668 |
+
html_parts.append("<p>No monthly follower gain data or required columns are missing.</p>")
|
669 |
|
670 |
+
# Count of Demographic Entries
|
671 |
+
demographics_count = len(follower_stats_df[follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'])
|
672 |
+
html_parts.append(f"<p>Total demographic entries (seniority, industry, etc.): {demographics_count}</p>")
|
673 |
+
else:
|
674 |
+
html_parts.append("<p>No follower statistics loaded from Bubble.</p>")
|
675 |
+
|
676 |
+
html_parts.append("</div>")
|
677 |
+
return "".join(html_parts)
|
678 |
|
679 |
|
680 |
def guarded_fetch_analytics(token_state):
|
681 |
+
"""Guarded call to fetch_and_render_analytics, ensuring token and basic data structures."""
|
682 |
if not token_state or not token_state.get("token"):
|
683 |
+
logging.warning("Analytics fetch: Access denied. No token.")
|
684 |
+
# Ensure the number of returned Nones matches the expected number of outputs for the plots
|
685 |
+
return ("β Access denied. No token.", None, None, None, None, None, None, None)
|
686 |
+
|
687 |
+
# Ensure DataFrames are passed, even if empty, to avoid errors in the analytics function
|
688 |
+
posts_df_analytics = token_state.get("bubble_posts_df", pd.DataFrame())
|
689 |
+
mentions_df_analytics = token_state.get("bubble_mentions_df", pd.DataFrame())
|
690 |
+
follower_stats_df_analytics = token_state.get("bubble_follower_stats_df", pd.DataFrame())
|
691 |
+
|
692 |
+
logging.info("Calling fetch_and_render_analytics with current token_state data.")
|
693 |
+
return fetch_and_render_analytics(
|
694 |
+
token_state.get("client_id"),
|
695 |
+
token_state.get("token"),
|
696 |
+
token_state.get("org_urn"),
|
697 |
+
posts_df_analytics,
|
698 |
+
mentions_df_analytics,
|
699 |
+
follower_stats_df_analytics
|
700 |
+
)
|
701 |
|
702 |
|
703 |
def run_mentions_tab_display(token_state):
|
704 |
+
"""Generates HTML and a plot for the Mentions tab."""
|
705 |
logging.info("Updating Mentions Tab display.")
|
706 |
if not token_state or not token_state.get("token"):
|
707 |
+
logging.warning("Mentions tab: Access denied. No token.")
|
708 |
return ("β Access denied. No token available for mentions.", None)
|
709 |
|
710 |
mentions_df = token_state.get("bubble_mentions_df", pd.DataFrame())
|
711 |
if mentions_df.empty:
|
712 |
+
logging.info("Mentions tab: No mentions data in Bubble.")
|
713 |
return ("<p style='text-align:center;'>No mentions data in Bubble. Try syncing.</p>", None)
|
714 |
|
715 |
html_parts = ["<h3 style='text-align:center;'>Recent Mentions</h3>"]
|
716 |
+
# Define columns to display, ensuring they exist
|
717 |
+
display_columns = [col for col in [BUBBLE_MENTIONS_DATE_COLUMN_NAME, "mention_text", "sentiment_label", BUBBLE_MENTIONS_ID_COLUMN_NAME] if col in mentions_df.columns]
|
718 |
|
719 |
+
mentions_df_display = mentions_df.copy()
|
720 |
+
if BUBBLE_MENTIONS_DATE_COLUMN_NAME in mentions_df_display.columns:
|
721 |
+
try:
|
722 |
+
mentions_df_display[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(mentions_df_display[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce').dt.strftime('%Y-%m-%d %H:%M')
|
723 |
+
mentions_df_display = mentions_df_display.sort_values(by=BUBBLE_MENTIONS_DATE_COLUMN_NAME, ascending=False)
|
724 |
+
except Exception as e:
|
725 |
+
logging.error(f"Error formatting mention dates for tab display: {e}")
|
726 |
+
html_parts.append("<p>Error formatting mention dates.</p>")
|
727 |
+
|
728 |
+
if not display_columns or mentions_df_display[display_columns].empty: # Check if display_df is empty after potential sort/filter
|
729 |
+
html_parts.append("<p>Required columns for mentions display are missing or no data after processing.</p>")
|
730 |
else:
|
731 |
+
# Assuming "mention_text" might contain HTML.
|
732 |
+
html_parts.append(mentions_df_display[display_columns].head(20).to_html(escape=False, index=False, classes="table table-sm"))
|
733 |
|
734 |
mentions_html_output = "\n".join(html_parts)
|
735 |
+
fig = None # Initialize fig to None
|
736 |
if not mentions_df.empty and "sentiment_label" in mentions_df.columns:
|
737 |
try:
|
738 |
import matplotlib.pyplot as plt
|
739 |
+
plt.switch_backend('Agg') # Essential for Gradio
|
740 |
+
fig_plot, ax = plt.subplots(figsize=(6,4)) # Create figure and axes
|
|
|
741 |
sentiment_counts = mentions_df["sentiment_label"].value_counts()
|
742 |
+
sentiment_counts.plot(kind='bar', ax=ax, color=['#4CAF50', '#FFC107', '#F44336', '#9E9E9E', '#2196F3']) # Example colors
|
743 |
ax.set_title("Mention Sentiment Distribution")
|
744 |
ax.set_ylabel("Count")
|
745 |
plt.xticks(rotation=45, ha='right')
|
746 |
+
plt.tight_layout() # Adjust layout to prevent labels from overlapping
|
747 |
+
fig = fig_plot # Assign the figure to fig
|
748 |
+
logging.info("Mentions tab: Sentiment distribution plot generated.")
|
749 |
except Exception as e:
|
750 |
+
logging.error(f"Error generating mentions plot: {e}", exc_info=True)
|
751 |
+
fig = None # Ensure fig is None on error
|
752 |
+
else:
|
753 |
+
logging.info("Mentions tab: Not enough data or 'sentiment_label' column missing for plot.")
|
754 |
+
|
755 |
return mentions_html_output, fig
|
756 |
|
757 |
+
def run_follower_stats_tab_display(token_state):
|
758 |
+
"""Generates HTML and plots for the Follower Stats tab."""
|
759 |
+
logging.info("Updating Follower Stats Tab display.")
|
760 |
+
if not token_state or not token_state.get("token"):
|
761 |
+
logging.warning("Follower stats tab: Access denied. No token.")
|
762 |
+
return ("β Access denied. No token available for follower stats.", None, None, None)
|
763 |
+
|
764 |
+
follower_stats_df_orig = token_state.get("bubble_follower_stats_df", pd.DataFrame())
|
765 |
+
if follower_stats_df_orig.empty:
|
766 |
+
logging.info("Follower stats tab: No follower stats data in Bubble.")
|
767 |
+
return ("<p style='text-align:center;'>No follower stats data in Bubble. Try syncing.</p>", None, None, None)
|
768 |
+
|
769 |
+
follower_stats_df = follower_stats_df_orig.copy() # Work with a copy
|
770 |
+
html_parts = ["<div style='padding:10px;'><h3 style='text-align:center;'>Follower Statistics Overview</h3>"]
|
771 |
+
|
772 |
+
plot_monthly_gains = None
|
773 |
+
plot_seniority_dist = None
|
774 |
+
plot_industry_dist = None # Initialize for industry plot
|
775 |
+
|
776 |
+
# --- Monthly Gains Table & Plot ---
|
777 |
+
# Filter for monthly gains and ensure necessary columns exist
|
778 |
+
monthly_gains_df = follower_stats_df[
|
779 |
+
(follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly') &
|
780 |
+
(follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & # Date column
|
781 |
+
(follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna()) &
|
782 |
+
(follower_stats_df[FOLLOWER_STATS_PAID_COLUMN].notna())
|
783 |
+
].copy()
|
784 |
+
|
785 |
+
if not monthly_gains_df.empty:
|
786 |
+
try:
|
787 |
+
# FOLLOWER_STATS_CATEGORY_COLUMN for monthly gains is 'YYYY-MM-DD'
|
788 |
+
# For table display, sort descending by original date string
|
789 |
+
monthly_gains_df.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN_DT] = pd.to_datetime(monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce')
|
790 |
+
monthly_gains_df_sorted_table = monthly_gains_df.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN_DT, ascending=False)
|
791 |
+
|
792 |
+
html_parts.append("<h4>Monthly Follower Gains (Last 13 Months):</h4>")
|
793 |
+
# Format date for display in table
|
794 |
+
table_display_df = monthly_gains_df_sorted_table.copy()
|
795 |
+
table_display_df[FOLLOWER_STATS_CATEGORY_COLUMN] = table_display_df[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime('%Y-%m')
|
796 |
+
|
797 |
+
html_parts.append(table_display_df[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(13).to_html(escape=True, index=False, classes="table table-sm"))
|
798 |
+
|
799 |
+
# For plotting, sort ascending by datetime object for correct time series
|
800 |
+
monthly_gains_df_sorted_plot = monthly_gains_df.sort_values(by=FOLLOWER_STATS_CATEGORY_COLUMN_DT, ascending=True)
|
801 |
+
# Use the formatted YYYY-MM string for x-axis ticks on the plot
|
802 |
+
plot_dates = monthly_gains_df_sorted_plot[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime('%Y-%m').unique()
|
803 |
+
|
804 |
+
|
805 |
+
import matplotlib.pyplot as plt
|
806 |
+
plt.switch_backend('Agg')
|
807 |
+
fig_gains, ax_gains = plt.subplots(figsize=(10,5)) # Wider plot
|
808 |
+
ax_gains.plot(plot_dates, monthly_gains_df_sorted_plot.groupby(monthly_gains_df_sorted_plot[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime('%Y-%m'))[FOLLOWER_STATS_ORGANIC_COLUMN].sum(), marker='o', linestyle='-', label='Organic Gain')
|
809 |
+
ax_gains.plot(plot_dates, monthly_gains_df_sorted_plot.groupby(monthly_gains_df_sorted_plot[FOLLOWER_STATS_CATEGORY_COLUMN_DT].dt.strftime('%Y-%m'))[FOLLOWER_STATS_PAID_COLUMN].sum(), marker='x', linestyle='--', label='Paid Gain')
|
810 |
+
ax_gains.set_title("Monthly Follower Gains Over Time")
|
811 |
+
ax_gains.set_ylabel("Follower Count")
|
812 |
+
ax_gains.set_xlabel("Month (YYYY-MM)")
|
813 |
+
plt.xticks(rotation=45, ha='right')
|
814 |
+
ax_gains.legend()
|
815 |
+
plt.grid(True, linestyle='--', alpha=0.7)
|
816 |
+
plt.tight_layout()
|
817 |
+
plot_monthly_gains = fig_gains
|
818 |
+
logging.info("Follower stats tab: Monthly gains plot generated.")
|
819 |
+
except Exception as e:
|
820 |
+
logging.error(f"Error processing or plotting monthly gains: {e}", exc_info=True)
|
821 |
+
html_parts.append("<p>Error displaying monthly follower gain data.</p>")
|
822 |
+
else:
|
823 |
+
html_parts.append("<p>No monthly follower gain data available or required columns missing.</p>")
|
824 |
+
html_parts.append("<hr/>")
|
825 |
+
|
826 |
+
# --- Seniority Table & Plot ---
|
827 |
+
seniority_df = follower_stats_df[
|
828 |
+
(follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_seniority') &
|
829 |
+
(follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & # Seniority name
|
830 |
+
(follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna())
|
831 |
+
].copy()
|
832 |
+
if not seniority_df.empty:
|
833 |
+
try:
|
834 |
+
seniority_df_sorted = seniority_df.sort_values(by=FOLLOWER_STATS_ORGANIC_COLUMN, ascending=False)
|
835 |
+
html_parts.append("<h4>Followers by Seniority (Top 10 Organic):</h4>")
|
836 |
+
html_parts.append(seniority_df_sorted[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(10).to_html(escape=True, index=False, classes="table table-sm"))
|
837 |
+
|
838 |
+
import matplotlib.pyplot as plt
|
839 |
+
plt.switch_backend('Agg')
|
840 |
+
fig_seniority, ax_seniority = plt.subplots(figsize=(8,5)) # Adjusted size
|
841 |
+
top_n_seniority = seniority_df_sorted.nlargest(10, FOLLOWER_STATS_ORGANIC_COLUMN)
|
842 |
+
ax_seniority.bar(top_n_seniority[FOLLOWER_STATS_CATEGORY_COLUMN], top_n_seniority[FOLLOWER_STATS_ORGANIC_COLUMN], color='skyblue')
|
843 |
+
ax_seniority.set_title("Follower Distribution by Seniority (Top 10 Organic)")
|
844 |
+
ax_seniority.set_ylabel("Organic Follower Count")
|
845 |
+
plt.xticks(rotation=45, ha='right')
|
846 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
847 |
+
plt.tight_layout()
|
848 |
+
plot_seniority_dist = fig_seniority
|
849 |
+
logging.info("Follower stats tab: Seniority distribution plot generated.")
|
850 |
+
except Exception as e:
|
851 |
+
logging.error(f"Error processing or plotting seniority data: {e}", exc_info=True)
|
852 |
+
html_parts.append("<p>Error displaying follower seniority data.</p>")
|
853 |
+
else:
|
854 |
+
html_parts.append("<p>No follower seniority data available or required columns missing.</p>")
|
855 |
+
html_parts.append("<hr/>")
|
856 |
+
|
857 |
+
# --- Industry Table & Plot ---
|
858 |
+
industry_df = follower_stats_df[
|
859 |
+
(follower_stats_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_industry') &
|
860 |
+
(follower_stats_df[FOLLOWER_STATS_CATEGORY_COLUMN].notna()) & # Industry name
|
861 |
+
(follower_stats_df[FOLLOWER_STATS_ORGANIC_COLUMN].notna())
|
862 |
+
].copy()
|
863 |
+
if not industry_df.empty:
|
864 |
+
try:
|
865 |
+
industry_df_sorted = industry_df.sort_values(by=FOLLOWER_STATS_ORGANIC_COLUMN, ascending=False)
|
866 |
+
html_parts.append("<h4>Followers by Industry (Top 10 Organic):</h4>")
|
867 |
+
html_parts.append(industry_df_sorted[[FOLLOWER_STATS_CATEGORY_COLUMN, FOLLOWER_STATS_ORGANIC_COLUMN, FOLLOWER_STATS_PAID_COLUMN]].head(10).to_html(escape=True, index=False, classes="table table-sm"))
|
868 |
+
|
869 |
+
import matplotlib.pyplot as plt
|
870 |
+
plt.switch_backend('Agg')
|
871 |
+
fig_industry, ax_industry = plt.subplots(figsize=(8,5))
|
872 |
+
top_n_industry = industry_df_sorted.nlargest(10, FOLLOWER_STATS_ORGANIC_COLUMN)
|
873 |
+
ax_industry.bar(top_n_industry[FOLLOWER_STATS_CATEGORY_COLUMN], top_n_industry[FOLLOWER_STATS_ORGANIC_COLUMN], color='lightcoral')
|
874 |
+
ax_industry.set_title("Follower Distribution by Industry (Top 10 Organic)")
|
875 |
+
ax_industry.set_ylabel("Organic Follower Count")
|
876 |
+
plt.xticks(rotation=45, ha='right')
|
877 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
878 |
+
plt.tight_layout()
|
879 |
+
plot_industry_dist = fig_industry
|
880 |
+
logging.info("Follower stats tab: Industry distribution plot generated.")
|
881 |
+
except Exception as e:
|
882 |
+
logging.error(f"Error processing or plotting industry data: {e}", exc_info=True)
|
883 |
+
html_parts.append("<p>Error displaying follower industry data.</p>")
|
884 |
+
else:
|
885 |
+
html_parts.append("<p>No follower industry data available or required columns missing.</p>")
|
886 |
+
|
887 |
+
html_parts.append("</div>")
|
888 |
+
follower_html_output = "\n".join(html_parts)
|
889 |
+
return follower_html_output, plot_monthly_gains, plot_seniority_dist, plot_industry_dist
|
890 |
+
|
891 |
|
892 |
# --- Gradio UI Blocks ---
|
893 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
|
894 |
+
title="LinkedIn Organization Dashboard") as app:
|
895 |
|
896 |
+
# Central state for holding token, client_id, org_urn, and fetched dataframes
|
897 |
token_state = gr.State(value={
|
898 |
+
"token": None, "client_id": None, "org_urn": None,
|
899 |
+
"bubble_posts_df": pd.DataFrame(), "fetch_count_for_api": 0, # For posts
|
900 |
+
"bubble_mentions_df": pd.DataFrame(),
|
901 |
+
"bubble_follower_stats_df": pd.DataFrame(),
|
902 |
+
"url_user_token_temp_storage": None # To hold token from URL temporarily
|
903 |
})
|
904 |
|
905 |
+
gr.Markdown("# π LinkedIn Organization Dashboard")
|
906 |
+
# Hidden textboxes to capture URL parameters
|
907 |
+
url_user_token_display = gr.Textbox(label="User Token (from URL - Hidden)", interactive=False, visible=False)
|
908 |
+
status_box = gr.Textbox(label="Overall LinkedIn Token Status", interactive=False, value="Initializing...")
|
909 |
org_urn_display = gr.Textbox(label="Organization URN (from URL - Hidden)", interactive=False, visible=False)
|
910 |
|
911 |
+
# Load URL parameters when the Gradio app loads
|
912 |
+
# This will populate url_user_token_display and org_urn_display
|
913 |
+
app.load(fn=get_url_user_token, inputs=None, outputs=[url_user_token_display, org_urn_display], api_name="get_url_params", show_progress=False)
|
914 |
|
915 |
+
# This function will run after URL params are loaded and org_urn_display changes (which it will on load)
|
916 |
def initial_load_sequence(url_token, org_urn_val, current_state):
|
917 |
+
logging.info(f"Initial load sequence triggered by org_urn_display change. Org URN: {org_urn_val}")
|
918 |
+
# Process token, fetch Bubble data, determine sync needs
|
919 |
status_msg, new_state, btn_update = process_and_store_bubble_token(url_token, org_urn_val, current_state)
|
920 |
+
# Display initial dashboard content based on (potentially empty) Bubble data
|
921 |
+
dashboard_content = display_main_dashboard(new_state)
|
922 |
return status_msg, new_state, btn_update, dashboard_content
|
923 |
|
924 |
with gr.Tabs():
|
925 |
with gr.TabItem("1οΈβ£ Dashboard & Sync"):
|
926 |
+
gr.Markdown("System checks for existing data from Bubble. The 'Sync' button activates if new data needs to be fetched from LinkedIn based on the last sync times and data availability.")
|
927 |
+
sync_data_btn = gr.Button("π Sync LinkedIn Data", variant="primary", visible=False, interactive=False) # Start hidden/disabled
|
928 |
+
sync_status_html_output = gr.HTML("<p style='text-align:center;'>Sync status will appear here.</p>")
|
929 |
+
dashboard_display_html = gr.HTML("<p style='text-align:center;'>Dashboard loading...</p>")
|
930 |
+
|
931 |
+
# Chain of events for initial load:
|
932 |
+
# 1. app.load gets URL params.
|
933 |
+
# 2. org_urn_display.change triggers initial_load_sequence.
|
934 |
+
# This populates token_state, updates sync button, and loads initial dashboard.
|
935 |
org_urn_display.change(
|
936 |
fn=initial_load_sequence,
|
937 |
inputs=[url_user_token_display, org_urn_display, token_state],
|
938 |
+
outputs=[status_box, token_state, sync_data_btn, dashboard_display_html],
|
939 |
+
show_progress="full"
|
940 |
)
|
941 |
+
|
942 |
+
# When Sync button is clicked:
|
943 |
+
# 1. sync_all_linkedin_data: Fetches from LinkedIn, uploads to Bubble, updates token_state DFs.
|
944 |
+
# 2. process_and_store_bubble_token: Re-evaluates sync needs (button should now say "Up-to-date").
|
945 |
+
# 3. display_main_dashboard: Refreshes dashboard with newly synced data.
|
946 |
sync_data_btn.click(
|
947 |
+
fn=sync_all_linkedin_data,
|
948 |
+
inputs=[token_state],
|
949 |
+
outputs=[sync_status_html_output, token_state], # token_state is updated here
|
950 |
+
show_progress="full"
|
951 |
).then(
|
952 |
+
fn=process_and_store_bubble_token, # Re-check sync status and update button
|
953 |
+
inputs=[url_user_token_display, org_urn_display, token_state], # Pass current token_state
|
954 |
+
outputs=[status_box, token_state, sync_data_btn], # token_state updated again
|
955 |
+
show_progress=False
|
956 |
).then(
|
957 |
+
fn=display_main_dashboard, # Refresh dashboard display
|
958 |
inputs=[token_state],
|
959 |
+
outputs=[dashboard_display_html],
|
960 |
+
show_progress=False
|
961 |
)
|
962 |
|
963 |
with gr.TabItem("2οΈβ£ Analytics"):
|
964 |
+
fetch_analytics_btn = gr.Button("π Fetch/Refresh Full Analytics", variant="primary")
|
965 |
+
# Analytics outputs
|
966 |
+
follower_count_md = gr.Markdown("Analytics data will load here...")
|
967 |
+
with gr.Row(): follower_plot, growth_plot = gr.Plot(label="Follower Demographics"), gr.Plot(label="Follower Growth")
|
968 |
+
with gr.Row(): eng_rate_plot = gr.Plot(label="Engagement Rate")
|
969 |
+
with gr.Row(): interaction_plot = gr.Plot(label="Post Interactions")
|
970 |
+
with gr.Row(): eb_plot = gr.Plot(label="Engagement Benchmark")
|
971 |
+
with gr.Row(): mentions_vol_plot, mentions_sentiment_plot = gr.Plot(label="Mentions Volume"), gr.Plot(label="Mentions Sentiment")
|
972 |
+
|
973 |
fetch_analytics_btn.click(
|
974 |
fn=guarded_fetch_analytics, inputs=[token_state],
|
975 |
+
outputs=[follower_count_md, follower_plot, growth_plot, eng_rate_plot,
|
976 |
+
interaction_plot, eb_plot, mentions_vol_plot, mentions_sentiment_plot],
|
977 |
+
show_progress="full"
|
978 |
)
|
979 |
|
980 |
with gr.TabItem("3οΈβ£ Mentions"):
|
981 |
+
refresh_mentions_display_btn = gr.Button("π Refresh Mentions Display (from local data)", variant="secondary")
|
982 |
+
mentions_html = gr.HTML("Mentions data loads from Bubble after sync. Click refresh to view current local data.")
|
983 |
+
mentions_sentiment_dist_plot = gr.Plot(label="Mention Sentiment Distribution")
|
984 |
refresh_mentions_display_btn.click(
|
985 |
fn=run_mentions_tab_display, inputs=[token_state],
|
986 |
+
outputs=[mentions_html, mentions_sentiment_dist_plot],
|
987 |
+
show_progress="full"
|
988 |
+
)
|
989 |
+
|
990 |
+
with gr.TabItem("4οΈβ£ Follower Stats"):
|
991 |
+
refresh_follower_stats_btn = gr.Button("π Refresh Follower Stats Display (from local data)", variant="secondary")
|
992 |
+
follower_stats_html = gr.HTML("Follower statistics load from Bubble after sync. Click refresh to view current local data.")
|
993 |
+
with gr.Row():
|
994 |
+
fs_plot_monthly_gains = gr.Plot(label="Monthly Follower Gains")
|
995 |
+
with gr.Row():
|
996 |
+
fs_plot_seniority = gr.Plot(label="Followers by Seniority (Top 10 Organic)")
|
997 |
+
fs_plot_industry = gr.Plot(label="Followers by Industry (Top 10 Organic)")
|
998 |
+
|
999 |
+
refresh_follower_stats_btn.click(
|
1000 |
+
fn=run_follower_stats_tab_display, inputs=[token_state],
|
1001 |
+
outputs=[follower_stats_html, fs_plot_monthly_gains, fs_plot_seniority, fs_plot_industry],
|
1002 |
+
show_progress="full"
|
1003 |
)
|
1004 |
|
|
|
|
|
|
|
1005 |
if __name__ == "__main__":
|
1006 |
+
# Check for essential environment variables
|
1007 |
if not os.environ.get("Linkedin_client_id"):
|
1008 |
+
logging.warning("WARNING: 'Linkedin_client_id' environment variable not set. The app may not function correctly for LinkedIn API calls.")
|
1009 |
+
if not os.environ.get("BUBBLE_APP_NAME") or \
|
1010 |
+
not os.environ.get("BUBBLE_API_KEY_PRIVATE") or \
|
1011 |
+
not os.environ.get("BUBBLE_API_ENDPOINT"):
|
1012 |
+
logging.warning("WARNING: One or more Bubble environment variables (BUBBLE_APP_NAME, BUBBLE_API_KEY_PRIVATE, BUBBLE_API_ENDPOINT) are not set. Bubble integration will fail.")
|
1013 |
+
|
1014 |
+
try:
|
1015 |
+
import matplotlib
|
1016 |
+
logging.info(f"Matplotlib version: {matplotlib.__version__} found.")
|
1017 |
+
except ImportError:
|
1018 |
+
logging.error("Matplotlib is not installed. Plots will not be generated. Please install it: pip install matplotlib")
|
1019 |
+
|
1020 |
+
# Launch the Gradio app
|
1021 |
+
app.launch(server_name="0.0.0.0", server_port=7860, debug=True) # Added debug=True for more verbose logging from Gradio
|
1022 |
+
|