Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on May 20

Commit

d33a3a6

verified ·

1 Parent(s): 6adec24

Update Linkedin_Data_API_Calls.py

Browse files

Files changed (1) hide show

Linkedin_Data_API_Calls.py +106 -33

Linkedin_Data_API_Calls.py CHANGED Viewed

@@ -652,46 +652,119 @@ def fetch_linkedin_mentions_core(comm_client_id, community_token, org_urn, count
     logging.info(f"Found {len(mention_share_urns)} unique share URNs from SHARE_MENTION notifications.")
-    for share_urn in mention_share_urns:
-        encoded_share_urn = quote(share_urn, safe='')
-        post_detail_url = f"{API_REST_BASE}/posts/{encoded_share_urn}"
-        logging.info(f"Fetching details for mentioned post: {post_detail_url}")
-        try:
-            post_resp = session.get(post_detail_url)
-            post_resp.raise_for_status()
-            post_data = post_resp.json()
-            commentary_raw = post_data.get("commentary")
-            if not commentary_raw and "specificContent" in post_data:
-                 share_content = post_data.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {})
-                 commentary_raw = share_content.get("shareCommentaryV2", {}).get("text", "")
-            if not commentary_raw:
-                logging.warning(f"No commentary found for share URN {share_urn}. Skipping.")
-                continue
-            mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
-            timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
-            published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
-            author_urn = post_data.get("author", "urn:li:unknown")
-            processed_mentions_internal.append({
-                "mention_id": f"mention_{share_urn}",
-                "share_urn": share_urn,
-                "mention_text_raw": commentary_raw,
-                "mention_text_cleaned": mention_text_cleaned,
-                "published_at_timestamp": timestamp,
-                "published_at_iso": published_at_iso,
-                "mentioned_by_author_urn": author_urn,
-                "organization_urn_mentioned": org_urn
-            })
         except requests.exceptions.RequestException as e:
             status = getattr(e.response, 'status_code', 'N/A')
             text = getattr(e.response, 'text', 'No response text')
-            logging.warning(f"Failed to fetch post details for share URN {share_urn} (Status: {status}): {e}. Response: {text}")
         except json.JSONDecodeError as e:
-            logging.warning(f"Failed to decode JSON for post details {share_urn}: {e}. Response: {post_resp.text if post_resp else 'No resp obj'}")
     logging.info(f"Processed {len(processed_mentions_internal)} mentions with their post details.")
     return processed_mentions_internal

     logging.info(f"Found {len(mention_share_urns)} unique share URNs from SHARE_MENTION notifications.")
+    # for share_urn in mention_share_urns:
+    #     encoded_share_urn = quote(share_urn, safe='')
+    #     post_detail_url = f"{API_REST_BASE}/posts/{encoded_share_urn}"
+    #     logging.info(f"Fetching details for mentioned post: {post_detail_url}")
+    #     try:
+    #         post_resp = session.get(post_detail_url)
+    #         post_resp.raise_for_status()
+    #         post_data = post_resp.json()
+    #         commentary_raw = post_data.get("commentary")
+    #         if not commentary_raw and "specificContent" in post_data:
+    #              share_content = post_data.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {})
+    #              commentary_raw = share_content.get("shareCommentaryV2", {}).get("text", "")
+    #         if not commentary_raw:
+    #             logging.warning(f"No commentary found for share URN {share_urn}. Skipping.")
+    #             continue
+    #         mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
+    #         timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
+    #         published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
+    #         author_urn = post_data.get("author", "urn:li:unknown")
+    #         processed_mentions_internal.append({
+    #             "mention_id": f"mention_{share_urn}",
+    #             "share_urn": share_urn,
+    #             "mention_text_raw": commentary_raw,
+    #             "mention_text_cleaned": mention_text_cleaned,
+    #             "published_at_timestamp": timestamp,
+    #             "published_at_iso": published_at_iso,
+    #             "mentioned_by_author_urn": author_urn,
+    #             "organization_urn_mentioned": org_urn
+    #         })
+    #     except requests.exceptions.RequestException as e:
+    #         status = getattr(e.response, 'status_code', 'N/A')
+    #         text = getattr(e.response, 'text', 'No response text')
+    #         logging.warning(f"Failed to fetch post details for share URN {share_urn} (Status: {status}): {e}. Response: {text}")
+    #     except json.JSONDecodeError as e:
+    #         logging.warning(f"Failed to decode JSON for post details {share_urn}: {e}. Response: {post_resp.text if post_resp else 'No resp obj'}")
+    if mention_share_urns:
+        # Encode URNs for the batch request URL
+        encoded_urns = [quote(urn, safe='') for urn in mention_share_urns]
+        formatted_urns = ",".join(encoded_urns)
+        # Construct the URL for batch fetching post details
+        # API_REST_BASE should be the base URL like "https://api.linkedin.com/rest"
+        batch_posts_url = f"{API_REST_BASE}/posts?ids=List({formatted_urns})"
+        logging.info(f"Fetching details for {len(mention_share_urns)} posts in a batch: {batch_posts_url}")
+        try:
+            batch_resp = session.get(batch_posts_url)
+            batch_resp.raise_for_status() # Raise an exception for HTTP errors
+            batch_data = batch_resp.json()
+            results = batch_data.get("results", {}) # Contains post details keyed by URN
+            errors = batch_data.get("errors", {})   # Contains errors for specific URNs
+            statuses = batch_data.get("statuses", {}) # Contains HTTP statuses for specific URNs
+            # Process each share URN using the data from the batch response
+            for share_urn in mention_share_urns:
+                if share_urn not in results:
+                    # Log if a URN was requested but not found in the results
+                    logging.warning(
+                        f"Post details for share URN {share_urn} not found in batch response. "
+                        f"Status: {statuses.get(share_urn)}, Error: {errors.get(share_urn)}"
+                    )
+                    continue
+                post_data = results[share_urn]
+                # Extract commentary - try direct 'commentary' field first, then fallback
+                commentary_raw = post_data.get("commentary")
+                if not commentary_raw and "specificContent" in post_data:
+                    # Fallback for older structures or specific share types if 'commentary' is not top-level
+                    share_content = post_data.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {})
+                    commentary_raw = share_content.get("shareCommentaryV2", {}).get("text", "")
+                if not commentary_raw:
+                    logging.warning(f"No commentary found for share URN {share_urn} in batch data. Skipping.")
+                    continue
+                # Clean the commentary text (assuming this function is defined)
+                mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
+                # Extract timestamp and convert to ISO format
+                timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
+                published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
+                # Extract author URN
+                author_urn = post_data.get("author", "urn:li:unknown") # Default if author is not found
+                # Append processed mention data
+                processed_mentions_internal.append({
+                    "mention_id": f"mention_{share_urn}", # Create a unique ID for the mention
+                    "share_urn": share_urn,
+                    "mention_text_raw": commentary_raw,
+                    "mention_text_cleaned": mention_text_cleaned,
+                    "published_at_timestamp": timestamp,
+                    "published_at_iso": published_at_iso,
+                    "mentioned_by_author_urn": author_urn,
+                    "organization_urn_mentioned": org_urn # The URN of the organization that was mentioned
+                })
         except requests.exceptions.RequestException as e:
             status = getattr(e.response, 'status_code', 'N/A')
             text = getattr(e.response, 'text', 'No response text')
+            logging.error(f"Failed to fetch batch post details (Status: {status}): {e}. Response: {text}")
         except json.JSONDecodeError as e:
+            # Log error if JSON decoding fails for the batch response
+            logging.error(f"Failed to decode JSON from batch posts response: {e}. Response: {batch_resp.text if batch_resp else 'No resp obj'}")
     logging.info(f"Processed {len(processed_mentions_internal)} mentions with their post details.")
     return processed_mentions_internal