Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on May 12

Commit

87b2809

verified ·

1 Parent(s): 936dfac

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -55

app.py CHANGED Viewed

@@ -29,6 +29,14 @@ from Linkedin_Data_API_Calls import (
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 def check_token_status(token_state):
     """Checks the status of the LinkedIn token."""
     return "✅ Token available" if token_state and token_state.get("token") else "❌ Token not available"
@@ -47,7 +55,6 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
     }
     new_state.update({"org_urn": org_urn, "bubble_posts_df": new_state.get("bubble_posts_df"), "fetch_count_for_api": new_state.get("fetch_count_for_api", 0)})
-    # Default button update: hidden and non-interactive
     button_update = gr.update(visible=False, interactive=False, value="🔄 Sync LinkedIn Posts")
     client_id = os.environ.get("Linkedin_client_id")
@@ -74,28 +81,25 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
         new_state["token"] = None
         logging.info("No valid URL user token provided for LinkedIn token fetch, or an error was indicated.")
-    # Fetch posts from Bubble
     current_org_urn = new_state.get("org_urn")
-    bubble_posts_df = None
     if current_org_urn:
         logging.info(f"Attempting to fetch posts from Bubble for org_urn: {current_org_urn}")
         try:
             fetched_df, error_message = fetch_linkedin_posts_data_from_bubble(current_org_urn, "LI_posts")
             if error_message:
                 logging.warning(f"Error reported by fetch_linkedin_posts_data_from_bubble: {error_message}. Treating as no data.")
             else:
-                bubble_posts_df = fetched_df
-            new_state["bubble_posts_df"] = bubble_posts_df
         except Exception as e:
             logging.error(f"❌ Error fetching posts from Bubble: {e}. Treating as no data.")
-            new_state["bubble_posts_df"] = None # Ensure it's None on error
     else:
         logging.warning("Org URN not available in state. Cannot fetch posts from Bubble.")
-    # Logic for determining fetch/update based on bubble_posts_df
-    # DATE_COLUMN_NAME is now 'published_at' and contains ISO datetime strings.
     DATE_COLUMN_NAME = 'published_at'
-    DEFAULT_INITIAL_FETCH_COUNT = 100 # Standard number of posts for initial fetch
     if new_state["bubble_posts_df"] is None or new_state["bubble_posts_df"].empty:
         logging.info(f"ℹ️ No posts found in Bubble or DataFrame is empty. Button to fetch initial {DEFAULT_INITIAL_FETCH_COUNT} posts will be visible.")
@@ -103,7 +107,7 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
         button_update = gr.update(value=f"🔄 Fetch Initial {DEFAULT_INITIAL_FETCH_COUNT} LinkedIn Posts", visible=True, interactive=True)
     else:
         try:
-            df_for_date_check = new_state["bubble_posts_df"].copy() # Use a copy to avoid SettingWithCopyWarning
             if DATE_COLUMN_NAME not in df_for_date_check.columns:
                 logging.warning(f"Date column '{DATE_COLUMN_NAME}' not found in Bubble posts DataFrame. Assuming initial fetch of {DEFAULT_INITIAL_FETCH_COUNT} posts.")
                 new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
@@ -113,7 +117,6 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
                 new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
                 button_update = gr.update(value=f"🔄 Fetch Initial {DEFAULT_INITIAL_FETCH_COUNT} (Date Column Empty)", visible=True, interactive=True)
             else:
-                # Convert ISO datetime strings to datetime objects
                 df_for_date_check[DATE_COLUMN_NAME] = pd.to_datetime(df_for_date_check[DATE_COLUMN_NAME], errors='coerce', utc=True)
                 last_post_date_utc = df_for_date_check[DATE_COLUMN_NAME].dropna().max()
@@ -149,10 +152,6 @@ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
     return token_status_message, new_state, button_update
 def guarded_fetch_posts(token_state):
-    """
-    Fetches LinkedIn posts based on 'fetch_count_for_api' in token_state,
-    analyzes them, and uploads to Bubble.
-    """
     logging.info("Starting guarded_fetch_posts process.")
     if not token_state or not token_state.get("token"):
         logging.error("Access denied for guarded_fetch_posts. No LinkedIn token available.")
@@ -162,54 +161,83 @@ def guarded_fetch_posts(token_state):
     token_dict = token_state.get("token")
     org_urn = token_state.get('org_urn')
     fetch_count_value = token_state.get('fetch_count_for_api')
     if not org_urn:
-        logging.error("Organization URN (org_urn) not found in token_state for guarded_fetch_posts.")
         return "<p style='color:red; text-align:center;'>❌ Configuration error: Organization URN missing.</p>"
     if not client_id or client_id == "ENV VAR MISSING":
-        logging.error("Client ID not found or missing in token_state for guarded_fetch_posts.")
         return "<p style='color:red; text-align:center;'>❌ Configuration error: LinkedIn Client ID missing.</p>"
     if fetch_count_value == 0:
-        logging.info("guarded_fetch_posts called, but fetch_count_for_api is 0. Data is fresh.")
         return "<p style='color:green; text-align:center;'>✅ Data is already up-to-date. No new posts fetched.</p>"
-    if fetch_count_value is None: # Should ideally not happen with new logic, but as a safeguard
-        logging.warning("fetch_count_for_api is None in guarded_fetch_posts. This might indicate an issue. Defaulting to fetching a standard amount if your API supports it or all.")
-        # Depending on your API, None might mean fetch all or a default.
-        # If your API requires a specific count for "all", you might need to adjust here or in fetch_linkedin_posts_core.
     try:
-        logging.info(f"Step 1: Fetching core posts for org_urn: {org_urn}. Fetch count parameter for API: {fetch_count_value}")
         processed_raw_posts, stats_map, _ = fetch_linkedin_posts_core(client_id, token_dict, org_urn, count=fetch_count_value)
         if not processed_raw_posts:
-            logging.info("No posts found to process via LinkedIn API after step 1.")
-            return "<p style='color:orange; text-align:center;'>ℹ️ No new LinkedIn posts found to process at this time.</p>"
-        post_urns = [post["id"] for post in processed_raw_posts if post.get("id")]
-        logging.info(f"Extracted {len(post_urns)} post URNs for further processing.")
-        logging.info("Step 2: Fetching comments via LinkedIn API.")
-        all_comments_data = fetch_comments(client_id, token_dict, post_urns, stats_map)
-        logging.info("Step 3: Analyzing sentiment.")
-        sentiments_per_post = analyze_sentiment(all_comments_data)
-        logging.info("Step 4: Compiling detailed posts.")
-        detailed_posts = compile_detailed_posts(processed_raw_posts, stats_map, sentiments_per_post)
-        logging.info("Step 5: Preparing data for Bubble.")
-        li_posts, li_post_stats, li_post_comments = prepare_data_for_bubble(detailed_posts, all_comments_data)
-        logging.info("Step 6: Uploading data to Bubble.")
-        bulk_upload_to_bubble(li_posts, "LI_posts")
-        bulk_upload_to_bubble(li_post_stats, "LI_post_stats")
-        bulk_upload_to_bubble(li_post_comments, "LI_post_comments")
-        action_performed = f"Initial data fetch (~{fetch_count_value} posts)" if fetch_count_value == DEFAULT_INITIAL_FETCH_COUNT else f"Data update (target: ~{fetch_count_value} posts)"
-        logging.info(f"Successfully completed: {action_performed}. Uploaded posts and comments to Bubble.")
-        return f"<p style='color:green; text-align:center;'>✅ {action_performed} complete. Posts and comments from LinkedIn uploaded to Bubble.</p>"
     except ValueError as ve:
         logging.error(f"ValueError during LinkedIn data processing: {ve}")
@@ -242,17 +270,11 @@ def run_mentions_and_load(token_state):
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
                title="LinkedIn Post Viewer & Analytics") as app:
-    # Define DEFAULT_INITIAL_FETCH_COUNT here if needed by guarded_fetch_posts for its messages,
-    # or ensure it's passed/accessible if logic depends on it there.
-    # For now, it's only used within process_and_store_bubble_token.
-    DEFAULT_INITIAL_FETCH_COUNT = 100
     token_state = gr.State(value={
         "token": None,
         "client_id": None,
         "org_urn": None,
-        "bubble_posts_df": None,
         "fetch_count_for_api": 0
     })

 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# --- Global Constants ---
+# Standard number of posts for initial fetch
+DEFAULT_INITIAL_FETCH_COUNT = 100
+# Key for post URN in data processed from LinkedIn (e.g., in detailed_posts)
+LINKEDIN_POST_URN_KEY = 'id'
+# Column name for post URN in the DataFrame fetched from Bubble (bubble_posts_df)
+BUBBLE_POST_URN_COLUMN_NAME = 'id' # Adjust if your Bubble 'LI_posts' table uses a different column name for URNs
 def check_token_status(token_state):
     """Checks the status of the LinkedIn token."""
     return "✅ Token available" if token_state and token_state.get("token") else "❌ Token not available"
     }
     new_state.update({"org_urn": org_urn, "bubble_posts_df": new_state.get("bubble_posts_df"), "fetch_count_for_api": new_state.get("fetch_count_for_api", 0)})
     button_update = gr.update(visible=False, interactive=False, value="🔄 Sync LinkedIn Posts")
     client_id = os.environ.get("Linkedin_client_id")
         new_state["token"] = None
         logging.info("No valid URL user token provided for LinkedIn token fetch, or an error was indicated.")
     current_org_urn = new_state.get("org_urn")
     if current_org_urn:
         logging.info(f"Attempting to fetch posts from Bubble for org_urn: {current_org_urn}")
         try:
             fetched_df, error_message = fetch_linkedin_posts_data_from_bubble(current_org_urn, "LI_posts")
             if error_message:
                 logging.warning(f"Error reported by fetch_linkedin_posts_data_from_bubble: {error_message}. Treating as no data.")
+                new_state["bubble_posts_df"] = pd.DataFrame() # Ensure it's an empty DataFrame
             else:
+                new_state["bubble_posts_df"] = fetched_df if fetched_df is not None else pd.DataFrame()
         except Exception as e:
             logging.error(f"❌ Error fetching posts from Bubble: {e}. Treating as no data.")
+            new_state["bubble_posts_df"] = pd.DataFrame()
     else:
         logging.warning("Org URN not available in state. Cannot fetch posts from Bubble.")
+        new_state["bubble_posts_df"] = pd.DataFrame()
     DATE_COLUMN_NAME = 'published_at'
     if new_state["bubble_posts_df"] is None or new_state["bubble_posts_df"].empty:
         logging.info(f"ℹ️ No posts found in Bubble or DataFrame is empty. Button to fetch initial {DEFAULT_INITIAL_FETCH_COUNT} posts will be visible.")
         button_update = gr.update(value=f"🔄 Fetch Initial {DEFAULT_INITIAL_FETCH_COUNT} LinkedIn Posts", visible=True, interactive=True)
     else:
         try:
+            df_for_date_check = new_state["bubble_posts_df"].copy()
             if DATE_COLUMN_NAME not in df_for_date_check.columns:
                 logging.warning(f"Date column '{DATE_COLUMN_NAME}' not found in Bubble posts DataFrame. Assuming initial fetch of {DEFAULT_INITIAL_FETCH_COUNT} posts.")
                 new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
                 new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
                 button_update = gr.update(value=f"🔄 Fetch Initial {DEFAULT_INITIAL_FETCH_COUNT} (Date Column Empty)", visible=True, interactive=True)
             else:
                 df_for_date_check[DATE_COLUMN_NAME] = pd.to_datetime(df_for_date_check[DATE_COLUMN_NAME], errors='coerce', utc=True)
                 last_post_date_utc = df_for_date_check[DATE_COLUMN_NAME].dropna().max()
     return token_status_message, new_state, button_update
 def guarded_fetch_posts(token_state):
     logging.info("Starting guarded_fetch_posts process.")
     if not token_state or not token_state.get("token"):
         logging.error("Access denied for guarded_fetch_posts. No LinkedIn token available.")
     token_dict = token_state.get("token")
     org_urn = token_state.get('org_urn')
     fetch_count_value = token_state.get('fetch_count_for_api')
+    bubble_posts_df = token_state.get("bubble_posts_df") # Get existing posts
     if not org_urn:
+        logging.error("Organization URN (org_urn) not found in token_state.")
         return "<p style='color:red; text-align:center;'>❌ Configuration error: Organization URN missing.</p>"
     if not client_id or client_id == "ENV VAR MISSING":
+        logging.error("Client ID not found or missing in token_state.")
         return "<p style='color:red; text-align:center;'>❌ Configuration error: LinkedIn Client ID missing.</p>"
     if fetch_count_value == 0:
+        logging.info("Data is fresh. No new posts fetched based on date check.")
         return "<p style='color:green; text-align:center;'>✅ Data is already up-to-date. No new posts fetched.</p>"
     try:
+        logging.info(f"Step 1: Fetching core posts for org_urn: {org_urn}. Fetch count: {fetch_count_value}")
         processed_raw_posts, stats_map, _ = fetch_linkedin_posts_core(client_id, token_dict, org_urn, count=fetch_count_value)
         if not processed_raw_posts:
+            logging.info("No posts retrieved from LinkedIn API.")
+            return "<p style='color:orange; text-align:center;'>ℹ️ No new LinkedIn posts found to process.</p>"
+        # --- Filter out posts already in Bubble ---
+        existing_post_urns = set()
+        if bubble_posts_df is not None and not bubble_posts_df.empty and BUBBLE_POST_URN_COLUMN_NAME in bubble_posts_df.columns:
+            existing_post_urns = set(bubble_posts_df[BUBBLE_POST_URN_COLUMN_NAME].dropna().astype(str))
+            logging.info(f"Found {len(existing_post_urns)} existing post URNs in Bubble data.")
+        else:
+            logging.info("No existing posts found in Bubble data or URN column missing; all fetched posts will be considered new.")
+        # Filter processed_raw_posts before compiling detailed_posts
+        new_raw_posts = [
+            post for post in processed_raw_posts
+            if str(post.get(LINKEDIN_POST_URN_KEY)) not in existing_post_urns
+        ]
+        if not new_raw_posts:
+            logging.info("All fetched LinkedIn posts are already present in Bubble. No new posts to add.")
+            return "<p style='color:green; text-align:center;'>✅ All fetched posts already exist in Bubble. Data is up-to-date.</p>"
+        logging.info(f"Identified {len(new_raw_posts)} new posts to process after filtering against Bubble data.")
+        # Continue processing only with new_raw_posts
+        post_urns_to_process = [post[LINKEDIN_POST_URN_KEY] for post in new_raw_posts if post.get(LINKEDIN_POST_URN_KEY)]
+        logging.info("Step 2: Fetching comments for new posts via LinkedIn API.")
+        # Adjust stats_map if it's keyed by URNs; ensure it's relevant for new_raw_posts
+        # For simplicity, assuming fetch_comments and subsequent steps can handle potentially fewer URNs
+        all_comments_data = fetch_comments(client_id, token_dict, post_urns_to_process, stats_map)
+        logging.info("Step 3: Analyzing sentiment for new posts.")
+        sentiments_per_post = analyze_sentiment(all_comments_data) # Assumes all_comments_data is now for new posts
+        logging.info("Step 4: Compiling detailed data for new posts.")
+        # Pass new_raw_posts to compile_detailed_posts
+        detailed_new_posts = compile_detailed_posts(new_raw_posts, stats_map, sentiments_per_post)
+        logging.info("Step 5: Preparing data for Bubble (only new posts).")
+        # Pass detailed_new_posts to prepare_data_for_bubble
+        li_posts, li_post_stats, li_post_comments = prepare_data_for_bubble(detailed_new_posts, all_comments_data)
+        logging.info(f"Step 6: Uploading {len(li_posts)} new posts and their related data to Bubble.")
+        if li_posts: # Ensure there's actually something to upload
+            bulk_upload_to_bubble(li_posts, "LI_posts")
+            if li_post_stats:
+                bulk_upload_to_bubble(li_post_stats, "LI_post_stats")
+            if li_post_comments:
+                bulk_upload_to_bubble(li_post_comments, "LI_post_comments")
+            action_message = f"uploaded {len(li_posts)} new post(s)"
+        else:
+            action_message = "found no new posts to upload after detailed processing"
+            logging.info("No new posts to upload after final preparation for Bubble.")
+        final_message_verb = "Initial data fetch" if fetch_count_value == DEFAULT_INITIAL_FETCH_COUNT and not existing_post_urns else "Data update"
+        logging.info(f"Successfully completed: {final_message_verb}. {action_message} to Bubble.")
+        return f"<p style='color:green; text-align:center;'>✅ {final_message_verb} complete. Successfully {action_message} to Bubble.</p>"
     except ValueError as ve:
         logging.error(f"ValueError during LinkedIn data processing: {ve}")
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
                title="LinkedIn Post Viewer & Analytics") as app:
     token_state = gr.State(value={
         "token": None,
         "client_id": None,
         "org_urn": None,
+        "bubble_posts_df": pd.DataFrame(), # Initialize with empty DataFrame
         "fetch_count_for_api": 0
     })