GuglielmoTor commited on
Commit
d33a3a6
·
verified ·
1 Parent(s): 6adec24

Update Linkedin_Data_API_Calls.py

Browse files
Files changed (1) hide show
  1. Linkedin_Data_API_Calls.py +106 -33
Linkedin_Data_API_Calls.py CHANGED
@@ -652,46 +652,119 @@ def fetch_linkedin_mentions_core(comm_client_id, community_token, org_urn, count
652
 
653
  logging.info(f"Found {len(mention_share_urns)} unique share URNs from SHARE_MENTION notifications.")
654
 
655
- for share_urn in mention_share_urns:
656
- encoded_share_urn = quote(share_urn, safe='')
657
- post_detail_url = f"{API_REST_BASE}/posts/{encoded_share_urn}"
658
- logging.info(f"Fetching details for mentioned post: {post_detail_url}")
659
- try:
660
- post_resp = session.get(post_detail_url)
661
- post_resp.raise_for_status()
662
- post_data = post_resp.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
663
 
664
- commentary_raw = post_data.get("commentary")
665
- if not commentary_raw and "specificContent" in post_data:
666
- share_content = post_data.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {})
667
- commentary_raw = share_content.get("shareCommentaryV2", {}).get("text", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
 
669
- if not commentary_raw:
670
- logging.warning(f"No commentary found for share URN {share_urn}. Skipping.")
671
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
 
673
- mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
674
- timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
675
- published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
676
- author_urn = post_data.get("author", "urn:li:unknown")
677
-
678
- processed_mentions_internal.append({
679
- "mention_id": f"mention_{share_urn}",
680
- "share_urn": share_urn,
681
- "mention_text_raw": commentary_raw,
682
- "mention_text_cleaned": mention_text_cleaned,
683
- "published_at_timestamp": timestamp,
684
- "published_at_iso": published_at_iso,
685
- "mentioned_by_author_urn": author_urn,
686
- "organization_urn_mentioned": org_urn
687
- })
688
  except requests.exceptions.RequestException as e:
689
  status = getattr(e.response, 'status_code', 'N/A')
690
  text = getattr(e.response, 'text', 'No response text')
691
- logging.warning(f"Failed to fetch post details for share URN {share_urn} (Status: {status}): {e}. Response: {text}")
692
  except json.JSONDecodeError as e:
693
- logging.warning(f"Failed to decode JSON for post details {share_urn}: {e}. Response: {post_resp.text if post_resp else 'No resp obj'}")
694
-
 
 
695
  logging.info(f"Processed {len(processed_mentions_internal)} mentions with their post details.")
696
  return processed_mentions_internal
697
 
 
652
 
653
  logging.info(f"Found {len(mention_share_urns)} unique share URNs from SHARE_MENTION notifications.")
654
 
655
+ # for share_urn in mention_share_urns:
656
+ # encoded_share_urn = quote(share_urn, safe='')
657
+ # post_detail_url = f"{API_REST_BASE}/posts/{encoded_share_urn}"
658
+ # logging.info(f"Fetching details for mentioned post: {post_detail_url}")
659
+ # try:
660
+ # post_resp = session.get(post_detail_url)
661
+ # post_resp.raise_for_status()
662
+ # post_data = post_resp.json()
663
+
664
+ # commentary_raw = post_data.get("commentary")
665
+ # if not commentary_raw and "specificContent" in post_data:
666
+ # share_content = post_data.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {})
667
+ # commentary_raw = share_content.get("shareCommentaryV2", {}).get("text", "")
668
+
669
+ # if not commentary_raw:
670
+ # logging.warning(f"No commentary found for share URN {share_urn}. Skipping.")
671
+ # continue
672
+
673
+ # mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
674
+ # timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
675
+ # published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
676
+ # author_urn = post_data.get("author", "urn:li:unknown")
677
+
678
+ # processed_mentions_internal.append({
679
+ # "mention_id": f"mention_{share_urn}",
680
+ # "share_urn": share_urn,
681
+ # "mention_text_raw": commentary_raw,
682
+ # "mention_text_cleaned": mention_text_cleaned,
683
+ # "published_at_timestamp": timestamp,
684
+ # "published_at_iso": published_at_iso,
685
+ # "mentioned_by_author_urn": author_urn,
686
+ # "organization_urn_mentioned": org_urn
687
+ # })
688
+ # except requests.exceptions.RequestException as e:
689
+ # status = getattr(e.response, 'status_code', 'N/A')
690
+ # text = getattr(e.response, 'text', 'No response text')
691
+ # logging.warning(f"Failed to fetch post details for share URN {share_urn} (Status: {status}): {e}. Response: {text}")
692
+ # except json.JSONDecodeError as e:
693
+ # logging.warning(f"Failed to decode JSON for post details {share_urn}: {e}. Response: {post_resp.text if post_resp else 'No resp obj'}")
694
+
695
+ if mention_share_urns:
696
+ # Encode URNs for the batch request URL
697
+ encoded_urns = [quote(urn, safe='') for urn in mention_share_urns]
698
+ formatted_urns = ",".join(encoded_urns)
699
+
700
+ # Construct the URL for batch fetching post details
701
+ # API_REST_BASE should be the base URL like "https://api.linkedin.com/rest"
702
+ batch_posts_url = f"{API_REST_BASE}/posts?ids=List({formatted_urns})"
703
+ logging.info(f"Fetching details for {len(mention_share_urns)} posts in a batch: {batch_posts_url}")
704
 
705
+ try:
706
+ batch_resp = session.get(batch_posts_url)
707
+ batch_resp.raise_for_status() # Raise an exception for HTTP errors
708
+ batch_data = batch_resp.json()
709
+
710
+ results = batch_data.get("results", {}) # Contains post details keyed by URN
711
+ errors = batch_data.get("errors", {}) # Contains errors for specific URNs
712
+ statuses = batch_data.get("statuses", {}) # Contains HTTP statuses for specific URNs
713
+
714
+ # Process each share URN using the data from the batch response
715
+ for share_urn in mention_share_urns:
716
+ if share_urn not in results:
717
+ # Log if a URN was requested but not found in the results
718
+ logging.warning(
719
+ f"Post details for share URN {share_urn} not found in batch response. "
720
+ f"Status: {statuses.get(share_urn)}, Error: {errors.get(share_urn)}"
721
+ )
722
+ continue
723
 
724
+ post_data = results[share_urn]
725
+
726
+ # Extract commentary - try direct 'commentary' field first, then fallback
727
+ commentary_raw = post_data.get("commentary")
728
+ if not commentary_raw and "specificContent" in post_data:
729
+ # Fallback for older structures or specific share types if 'commentary' is not top-level
730
+ share_content = post_data.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {})
731
+ commentary_raw = share_content.get("shareCommentaryV2", {}).get("text", "")
732
+
733
+ if not commentary_raw:
734
+ logging.warning(f"No commentary found for share URN {share_urn} in batch data. Skipping.")
735
+ continue
736
+
737
+ # Clean the commentary text (assuming this function is defined)
738
+ mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
739
+
740
+ # Extract timestamp and convert to ISO format
741
+ timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
742
+ published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
743
+
744
+ # Extract author URN
745
+ author_urn = post_data.get("author", "urn:li:unknown") # Default if author is not found
746
+
747
+ # Append processed mention data
748
+ processed_mentions_internal.append({
749
+ "mention_id": f"mention_{share_urn}", # Create a unique ID for the mention
750
+ "share_urn": share_urn,
751
+ "mention_text_raw": commentary_raw,
752
+ "mention_text_cleaned": mention_text_cleaned,
753
+ "published_at_timestamp": timestamp,
754
+ "published_at_iso": published_at_iso,
755
+ "mentioned_by_author_urn": author_urn,
756
+ "organization_urn_mentioned": org_urn # The URN of the organization that was mentioned
757
+ })
758
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
759
  except requests.exceptions.RequestException as e:
760
  status = getattr(e.response, 'status_code', 'N/A')
761
  text = getattr(e.response, 'text', 'No response text')
762
+ logging.error(f"Failed to fetch batch post details (Status: {status}): {e}. Response: {text}")
763
  except json.JSONDecodeError as e:
764
+ # Log error if JSON decoding fails for the batch response
765
+ logging.error(f"Failed to decode JSON from batch posts response: {e}. Response: {batch_resp.text if batch_resp else 'No resp obj'}")
766
+
767
+
768
  logging.info(f"Processed {len(processed_mentions_internal)} mentions with their post details.")
769
  return processed_mentions_internal
770