GuglielmoTor commited on
Commit
87b2809
Β·
verified Β·
1 Parent(s): 936dfac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -55
app.py CHANGED
@@ -29,6 +29,14 @@ from Linkedin_Data_API_Calls import (
29
  # Configure logging
30
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
31
 
 
 
 
 
 
 
 
 
32
  def check_token_status(token_state):
33
  """Checks the status of the LinkedIn token."""
34
  return "βœ… Token available" if token_state and token_state.get("token") else "❌ Token not available"
@@ -47,7 +55,6 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
47
  }
48
  new_state.update({"org_urn": org_urn, "bubble_posts_df": new_state.get("bubble_posts_df"), "fetch_count_for_api": new_state.get("fetch_count_for_api", 0)})
49
 
50
- # Default button update: hidden and non-interactive
51
  button_update = gr.update(visible=False, interactive=False, value="πŸ”„ Sync LinkedIn Posts")
52
 
53
  client_id = os.environ.get("Linkedin_client_id")
@@ -74,28 +81,25 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
74
  new_state["token"] = None
75
  logging.info("No valid URL user token provided for LinkedIn token fetch, or an error was indicated.")
76
 
77
- # Fetch posts from Bubble
78
  current_org_urn = new_state.get("org_urn")
79
- bubble_posts_df = None
80
  if current_org_urn:
81
  logging.info(f"Attempting to fetch posts from Bubble for org_urn: {current_org_urn}")
82
  try:
83
  fetched_df, error_message = fetch_linkedin_posts_data_from_bubble(current_org_urn, "LI_posts")
84
  if error_message:
85
  logging.warning(f"Error reported by fetch_linkedin_posts_data_from_bubble: {error_message}. Treating as no data.")
 
86
  else:
87
- bubble_posts_df = fetched_df
88
- new_state["bubble_posts_df"] = bubble_posts_df
89
  except Exception as e:
90
  logging.error(f"❌ Error fetching posts from Bubble: {e}. Treating as no data.")
91
- new_state["bubble_posts_df"] = None # Ensure it's None on error
92
  else:
93
  logging.warning("Org URN not available in state. Cannot fetch posts from Bubble.")
 
 
94
 
95
- # Logic for determining fetch/update based on bubble_posts_df
96
- # DATE_COLUMN_NAME is now 'published_at' and contains ISO datetime strings.
97
  DATE_COLUMN_NAME = 'published_at'
98
- DEFAULT_INITIAL_FETCH_COUNT = 100 # Standard number of posts for initial fetch
99
 
100
  if new_state["bubble_posts_df"] is None or new_state["bubble_posts_df"].empty:
101
  logging.info(f"ℹ️ No posts found in Bubble or DataFrame is empty. Button to fetch initial {DEFAULT_INITIAL_FETCH_COUNT} posts will be visible.")
@@ -103,7 +107,7 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
103
  button_update = gr.update(value=f"πŸ”„ Fetch Initial {DEFAULT_INITIAL_FETCH_COUNT} LinkedIn Posts", visible=True, interactive=True)
104
  else:
105
  try:
106
- df_for_date_check = new_state["bubble_posts_df"].copy() # Use a copy to avoid SettingWithCopyWarning
107
  if DATE_COLUMN_NAME not in df_for_date_check.columns:
108
  logging.warning(f"Date column '{DATE_COLUMN_NAME}' not found in Bubble posts DataFrame. Assuming initial fetch of {DEFAULT_INITIAL_FETCH_COUNT} posts.")
109
  new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
@@ -113,7 +117,6 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
113
  new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
114
  button_update = gr.update(value=f"πŸ”„ Fetch Initial {DEFAULT_INITIAL_FETCH_COUNT} (Date Column Empty)", visible=True, interactive=True)
115
  else:
116
- # Convert ISO datetime strings to datetime objects
117
  df_for_date_check[DATE_COLUMN_NAME] = pd.to_datetime(df_for_date_check[DATE_COLUMN_NAME], errors='coerce', utc=True)
118
  last_post_date_utc = df_for_date_check[DATE_COLUMN_NAME].dropna().max()
119
 
@@ -149,10 +152,6 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
149
  return token_status_message, new_state, button_update
150
 
151
  def guarded_fetch_posts(token_state):
152
- """
153
- Fetches LinkedIn posts based on 'fetch_count_for_api' in token_state,
154
- analyzes them, and uploads to Bubble.
155
- """
156
  logging.info("Starting guarded_fetch_posts process.")
157
  if not token_state or not token_state.get("token"):
158
  logging.error("Access denied for guarded_fetch_posts. No LinkedIn token available.")
@@ -162,54 +161,83 @@ def guarded_fetch_posts(token_state):
162
  token_dict = token_state.get("token")
163
  org_urn = token_state.get('org_urn')
164
  fetch_count_value = token_state.get('fetch_count_for_api')
 
165
 
166
  if not org_urn:
167
- logging.error("Organization URN (org_urn) not found in token_state for guarded_fetch_posts.")
168
  return "<p style='color:red; text-align:center;'>❌ Configuration error: Organization URN missing.</p>"
169
  if not client_id or client_id == "ENV VAR MISSING":
170
- logging.error("Client ID not found or missing in token_state for guarded_fetch_posts.")
171
  return "<p style='color:red; text-align:center;'>❌ Configuration error: LinkedIn Client ID missing.</p>"
172
 
173
  if fetch_count_value == 0:
174
- logging.info("guarded_fetch_posts called, but fetch_count_for_api is 0. Data is fresh.")
175
  return "<p style='color:green; text-align:center;'>βœ… Data is already up-to-date. No new posts fetched.</p>"
176
 
177
- if fetch_count_value is None: # Should ideally not happen with new logic, but as a safeguard
178
- logging.warning("fetch_count_for_api is None in guarded_fetch_posts. This might indicate an issue. Defaulting to fetching a standard amount if your API supports it or all.")
179
- # Depending on your API, None might mean fetch all or a default.
180
- # If your API requires a specific count for "all", you might need to adjust here or in fetch_linkedin_posts_core.
181
-
182
  try:
183
- logging.info(f"Step 1: Fetching core posts for org_urn: {org_urn}. Fetch count parameter for API: {fetch_count_value}")
184
  processed_raw_posts, stats_map, _ = fetch_linkedin_posts_core(client_id, token_dict, org_urn, count=fetch_count_value)
185
 
186
  if not processed_raw_posts:
187
- logging.info("No posts found to process via LinkedIn API after step 1.")
188
- return "<p style='color:orange; text-align:center;'>ℹ️ No new LinkedIn posts found to process at this time.</p>"
189
-
190
- post_urns = [post["id"] for post in processed_raw_posts if post.get("id")]
191
- logging.info(f"Extracted {len(post_urns)} post URNs for further processing.")
192
-
193
- logging.info("Step 2: Fetching comments via LinkedIn API.")
194
- all_comments_data = fetch_comments(client_id, token_dict, post_urns, stats_map)
195
-
196
- logging.info("Step 3: Analyzing sentiment.")
197
- sentiments_per_post = analyze_sentiment(all_comments_data)
198
-
199
- logging.info("Step 4: Compiling detailed posts.")
200
- detailed_posts = compile_detailed_posts(processed_raw_posts, stats_map, sentiments_per_post)
201
-
202
- logging.info("Step 5: Preparing data for Bubble.")
203
- li_posts, li_post_stats, li_post_comments = prepare_data_for_bubble(detailed_posts, all_comments_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
- logging.info("Step 6: Uploading data to Bubble.")
206
- bulk_upload_to_bubble(li_posts, "LI_posts")
207
- bulk_upload_to_bubble(li_post_stats, "LI_post_stats")
208
- bulk_upload_to_bubble(li_post_comments, "LI_post_comments")
209
 
210
- action_performed = f"Initial data fetch (~{fetch_count_value} posts)" if fetch_count_value == DEFAULT_INITIAL_FETCH_COUNT else f"Data update (target: ~{fetch_count_value} posts)"
211
- logging.info(f"Successfully completed: {action_performed}. Uploaded posts and comments to Bubble.")
212
- return f"<p style='color:green; text-align:center;'>βœ… {action_performed} complete. Posts and comments from LinkedIn uploaded to Bubble.</p>"
213
 
214
  except ValueError as ve:
215
  logging.error(f"ValueError during LinkedIn data processing: {ve}")
@@ -242,17 +270,11 @@ def run_mentions_and_load(token_state):
242
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
243
  title="LinkedIn Post Viewer & Analytics") as app:
244
 
245
- # Define DEFAULT_INITIAL_FETCH_COUNT here if needed by guarded_fetch_posts for its messages,
246
- # or ensure it's passed/accessible if logic depends on it there.
247
- # For now, it's only used within process_and_store_bubble_token.
248
- DEFAULT_INITIAL_FETCH_COUNT = 100
249
-
250
-
251
  token_state = gr.State(value={
252
  "token": None,
253
  "client_id": None,
254
  "org_urn": None,
255
- "bubble_posts_df": None,
256
  "fetch_count_for_api": 0
257
  })
258
 
 
29
  # Configure logging
30
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
31
 
32
+ # --- Global Constants ---
33
+ # Standard number of posts for initial fetch
34
+ DEFAULT_INITIAL_FETCH_COUNT = 100
35
+ # Key for post URN in data processed from LinkedIn (e.g., in detailed_posts)
36
+ LINKEDIN_POST_URN_KEY = 'id'
37
+ # Column name for post URN in the DataFrame fetched from Bubble (bubble_posts_df)
38
+ BUBBLE_POST_URN_COLUMN_NAME = 'id' # Adjust if your Bubble 'LI_posts' table uses a different column name for URNs
39
+
40
  def check_token_status(token_state):
41
  """Checks the status of the LinkedIn token."""
42
  return "βœ… Token available" if token_state and token_state.get("token") else "❌ Token not available"
 
55
  }
56
  new_state.update({"org_urn": org_urn, "bubble_posts_df": new_state.get("bubble_posts_df"), "fetch_count_for_api": new_state.get("fetch_count_for_api", 0)})
57
 
 
58
  button_update = gr.update(visible=False, interactive=False, value="πŸ”„ Sync LinkedIn Posts")
59
 
60
  client_id = os.environ.get("Linkedin_client_id")
 
81
  new_state["token"] = None
82
  logging.info("No valid URL user token provided for LinkedIn token fetch, or an error was indicated.")
83
 
 
84
  current_org_urn = new_state.get("org_urn")
 
85
  if current_org_urn:
86
  logging.info(f"Attempting to fetch posts from Bubble for org_urn: {current_org_urn}")
87
  try:
88
  fetched_df, error_message = fetch_linkedin_posts_data_from_bubble(current_org_urn, "LI_posts")
89
  if error_message:
90
  logging.warning(f"Error reported by fetch_linkedin_posts_data_from_bubble: {error_message}. Treating as no data.")
91
+ new_state["bubble_posts_df"] = pd.DataFrame() # Ensure it's an empty DataFrame
92
  else:
93
+ new_state["bubble_posts_df"] = fetched_df if fetched_df is not None else pd.DataFrame()
 
94
  except Exception as e:
95
  logging.error(f"❌ Error fetching posts from Bubble: {e}. Treating as no data.")
96
+ new_state["bubble_posts_df"] = pd.DataFrame()
97
  else:
98
  logging.warning("Org URN not available in state. Cannot fetch posts from Bubble.")
99
+ new_state["bubble_posts_df"] = pd.DataFrame()
100
+
101
 
 
 
102
  DATE_COLUMN_NAME = 'published_at'
 
103
 
104
  if new_state["bubble_posts_df"] is None or new_state["bubble_posts_df"].empty:
105
  logging.info(f"ℹ️ No posts found in Bubble or DataFrame is empty. Button to fetch initial {DEFAULT_INITIAL_FETCH_COUNT} posts will be visible.")
 
107
  button_update = gr.update(value=f"πŸ”„ Fetch Initial {DEFAULT_INITIAL_FETCH_COUNT} LinkedIn Posts", visible=True, interactive=True)
108
  else:
109
  try:
110
+ df_for_date_check = new_state["bubble_posts_df"].copy()
111
  if DATE_COLUMN_NAME not in df_for_date_check.columns:
112
  logging.warning(f"Date column '{DATE_COLUMN_NAME}' not found in Bubble posts DataFrame. Assuming initial fetch of {DEFAULT_INITIAL_FETCH_COUNT} posts.")
113
  new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
 
117
  new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
118
  button_update = gr.update(value=f"πŸ”„ Fetch Initial {DEFAULT_INITIAL_FETCH_COUNT} (Date Column Empty)", visible=True, interactive=True)
119
  else:
 
120
  df_for_date_check[DATE_COLUMN_NAME] = pd.to_datetime(df_for_date_check[DATE_COLUMN_NAME], errors='coerce', utc=True)
121
  last_post_date_utc = df_for_date_check[DATE_COLUMN_NAME].dropna().max()
122
 
 
152
  return token_status_message, new_state, button_update
153
 
154
  def guarded_fetch_posts(token_state):
 
 
 
 
155
  logging.info("Starting guarded_fetch_posts process.")
156
  if not token_state or not token_state.get("token"):
157
  logging.error("Access denied for guarded_fetch_posts. No LinkedIn token available.")
 
161
  token_dict = token_state.get("token")
162
  org_urn = token_state.get('org_urn')
163
  fetch_count_value = token_state.get('fetch_count_for_api')
164
+ bubble_posts_df = token_state.get("bubble_posts_df") # Get existing posts
165
 
166
  if not org_urn:
167
+ logging.error("Organization URN (org_urn) not found in token_state.")
168
  return "<p style='color:red; text-align:center;'>❌ Configuration error: Organization URN missing.</p>"
169
  if not client_id or client_id == "ENV VAR MISSING":
170
+ logging.error("Client ID not found or missing in token_state.")
171
  return "<p style='color:red; text-align:center;'>❌ Configuration error: LinkedIn Client ID missing.</p>"
172
 
173
  if fetch_count_value == 0:
174
+ logging.info("Data is fresh. No new posts fetched based on date check.")
175
  return "<p style='color:green; text-align:center;'>βœ… Data is already up-to-date. No new posts fetched.</p>"
176
 
 
 
 
 
 
177
  try:
178
+ logging.info(f"Step 1: Fetching core posts for org_urn: {org_urn}. Fetch count: {fetch_count_value}")
179
  processed_raw_posts, stats_map, _ = fetch_linkedin_posts_core(client_id, token_dict, org_urn, count=fetch_count_value)
180
 
181
  if not processed_raw_posts:
182
+ logging.info("No posts retrieved from LinkedIn API.")
183
+ return "<p style='color:orange; text-align:center;'>ℹ️ No new LinkedIn posts found to process.</p>"
184
+
185
+ # --- Filter out posts already in Bubble ---
186
+ existing_post_urns = set()
187
+ if bubble_posts_df is not None and not bubble_posts_df.empty and BUBBLE_POST_URN_COLUMN_NAME in bubble_posts_df.columns:
188
+ existing_post_urns = set(bubble_posts_df[BUBBLE_POST_URN_COLUMN_NAME].dropna().astype(str))
189
+ logging.info(f"Found {len(existing_post_urns)} existing post URNs in Bubble data.")
190
+ else:
191
+ logging.info("No existing posts found in Bubble data or URN column missing; all fetched posts will be considered new.")
192
+
193
+ # Filter processed_raw_posts before compiling detailed_posts
194
+ new_raw_posts = [
195
+ post for post in processed_raw_posts
196
+ if str(post.get(LINKEDIN_POST_URN_KEY)) not in existing_post_urns
197
+ ]
198
+
199
+ if not new_raw_posts:
200
+ logging.info("All fetched LinkedIn posts are already present in Bubble. No new posts to add.")
201
+ return "<p style='color:green; text-align:center;'>βœ… All fetched posts already exist in Bubble. Data is up-to-date.</p>"
202
+
203
+ logging.info(f"Identified {len(new_raw_posts)} new posts to process after filtering against Bubble data.")
204
+
205
+ # Continue processing only with new_raw_posts
206
+ post_urns_to_process = [post[LINKEDIN_POST_URN_KEY] for post in new_raw_posts if post.get(LINKEDIN_POST_URN_KEY)]
207
+
208
+ logging.info("Step 2: Fetching comments for new posts via LinkedIn API.")
209
+ # Adjust stats_map if it's keyed by URNs; ensure it's relevant for new_raw_posts
210
+ # For simplicity, assuming fetch_comments and subsequent steps can handle potentially fewer URNs
211
+ all_comments_data = fetch_comments(client_id, token_dict, post_urns_to_process, stats_map)
212
+
213
+ logging.info("Step 3: Analyzing sentiment for new posts.")
214
+ sentiments_per_post = analyze_sentiment(all_comments_data) # Assumes all_comments_data is now for new posts
215
+
216
+ logging.info("Step 4: Compiling detailed data for new posts.")
217
+ # Pass new_raw_posts to compile_detailed_posts
218
+ detailed_new_posts = compile_detailed_posts(new_raw_posts, stats_map, sentiments_per_post)
219
+
220
+ logging.info("Step 5: Preparing data for Bubble (only new posts).")
221
+ # Pass detailed_new_posts to prepare_data_for_bubble
222
+ li_posts, li_post_stats, li_post_comments = prepare_data_for_bubble(detailed_new_posts, all_comments_data)
223
+
224
+ logging.info(f"Step 6: Uploading {len(li_posts)} new posts and their related data to Bubble.")
225
+ if li_posts: # Ensure there's actually something to upload
226
+ bulk_upload_to_bubble(li_posts, "LI_posts")
227
+ if li_post_stats:
228
+ bulk_upload_to_bubble(li_post_stats, "LI_post_stats")
229
+ if li_post_comments:
230
+ bulk_upload_to_bubble(li_post_comments, "LI_post_comments")
231
+
232
+ action_message = f"uploaded {len(li_posts)} new post(s)"
233
+ else:
234
+ action_message = "found no new posts to upload after detailed processing"
235
+ logging.info("No new posts to upload after final preparation for Bubble.")
236
 
 
 
 
 
237
 
238
+ final_message_verb = "Initial data fetch" if fetch_count_value == DEFAULT_INITIAL_FETCH_COUNT and not existing_post_urns else "Data update"
239
+ logging.info(f"Successfully completed: {final_message_verb}. {action_message} to Bubble.")
240
+ return f"<p style='color:green; text-align:center;'>βœ… {final_message_verb} complete. Successfully {action_message} to Bubble.</p>"
241
 
242
  except ValueError as ve:
243
  logging.error(f"ValueError during LinkedIn data processing: {ve}")
 
270
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
271
  title="LinkedIn Post Viewer & Analytics") as app:
272
 
 
 
 
 
 
 
273
  token_state = gr.State(value={
274
  "token": None,
275
  "client_id": None,
276
  "org_urn": None,
277
+ "bubble_posts_df": pd.DataFrame(), # Initialize with empty DataFrame
278
  "fetch_count_for_api": 0
279
  })
280