Spaces:
Running
Running
Update Linkedin_Data_API_Calls.py
Browse files- Linkedin_Data_API_Calls.py +106 -33
Linkedin_Data_API_Calls.py
CHANGED
@@ -652,46 +652,119 @@ def fetch_linkedin_mentions_core(comm_client_id, community_token, org_urn, count
|
|
652 |
|
653 |
logging.info(f"Found {len(mention_share_urns)} unique share URNs from SHARE_MENTION notifications.")
|
654 |
|
655 |
-
for share_urn in mention_share_urns:
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
663 |
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
668 |
|
669 |
-
|
670 |
-
|
671 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
672 |
|
673 |
-
mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
|
674 |
-
timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
|
675 |
-
published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
|
676 |
-
author_urn = post_data.get("author", "urn:li:unknown")
|
677 |
-
|
678 |
-
processed_mentions_internal.append({
|
679 |
-
"mention_id": f"mention_{share_urn}",
|
680 |
-
"share_urn": share_urn,
|
681 |
-
"mention_text_raw": commentary_raw,
|
682 |
-
"mention_text_cleaned": mention_text_cleaned,
|
683 |
-
"published_at_timestamp": timestamp,
|
684 |
-
"published_at_iso": published_at_iso,
|
685 |
-
"mentioned_by_author_urn": author_urn,
|
686 |
-
"organization_urn_mentioned": org_urn
|
687 |
-
})
|
688 |
except requests.exceptions.RequestException as e:
|
689 |
status = getattr(e.response, 'status_code', 'N/A')
|
690 |
text = getattr(e.response, 'text', 'No response text')
|
691 |
-
logging.
|
692 |
except json.JSONDecodeError as e:
|
693 |
-
|
694 |
-
|
|
|
|
|
695 |
logging.info(f"Processed {len(processed_mentions_internal)} mentions with their post details.")
|
696 |
return processed_mentions_internal
|
697 |
|
|
|
652 |
|
653 |
logging.info(f"Found {len(mention_share_urns)} unique share URNs from SHARE_MENTION notifications.")
|
654 |
|
655 |
+
# for share_urn in mention_share_urns:
|
656 |
+
# encoded_share_urn = quote(share_urn, safe='')
|
657 |
+
# post_detail_url = f"{API_REST_BASE}/posts/{encoded_share_urn}"
|
658 |
+
# logging.info(f"Fetching details for mentioned post: {post_detail_url}")
|
659 |
+
# try:
|
660 |
+
# post_resp = session.get(post_detail_url)
|
661 |
+
# post_resp.raise_for_status()
|
662 |
+
# post_data = post_resp.json()
|
663 |
+
|
664 |
+
# commentary_raw = post_data.get("commentary")
|
665 |
+
# if not commentary_raw and "specificContent" in post_data:
|
666 |
+
# share_content = post_data.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {})
|
667 |
+
# commentary_raw = share_content.get("shareCommentaryV2", {}).get("text", "")
|
668 |
+
|
669 |
+
# if not commentary_raw:
|
670 |
+
# logging.warning(f"No commentary found for share URN {share_urn}. Skipping.")
|
671 |
+
# continue
|
672 |
+
|
673 |
+
# mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
|
674 |
+
# timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
|
675 |
+
# published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
|
676 |
+
# author_urn = post_data.get("author", "urn:li:unknown")
|
677 |
+
|
678 |
+
# processed_mentions_internal.append({
|
679 |
+
# "mention_id": f"mention_{share_urn}",
|
680 |
+
# "share_urn": share_urn,
|
681 |
+
# "mention_text_raw": commentary_raw,
|
682 |
+
# "mention_text_cleaned": mention_text_cleaned,
|
683 |
+
# "published_at_timestamp": timestamp,
|
684 |
+
# "published_at_iso": published_at_iso,
|
685 |
+
# "mentioned_by_author_urn": author_urn,
|
686 |
+
# "organization_urn_mentioned": org_urn
|
687 |
+
# })
|
688 |
+
# except requests.exceptions.RequestException as e:
|
689 |
+
# status = getattr(e.response, 'status_code', 'N/A')
|
690 |
+
# text = getattr(e.response, 'text', 'No response text')
|
691 |
+
# logging.warning(f"Failed to fetch post details for share URN {share_urn} (Status: {status}): {e}. Response: {text}")
|
692 |
+
# except json.JSONDecodeError as e:
|
693 |
+
# logging.warning(f"Failed to decode JSON for post details {share_urn}: {e}. Response: {post_resp.text if post_resp else 'No resp obj'}")
|
694 |
+
|
695 |
+
if mention_share_urns:
|
696 |
+
# Encode URNs for the batch request URL
|
697 |
+
encoded_urns = [quote(urn, safe='') for urn in mention_share_urns]
|
698 |
+
formatted_urns = ",".join(encoded_urns)
|
699 |
+
|
700 |
+
# Construct the URL for batch fetching post details
|
701 |
+
# API_REST_BASE should be the base URL like "https://api.linkedin.com/rest"
|
702 |
+
batch_posts_url = f"{API_REST_BASE}/posts?ids=List({formatted_urns})"
|
703 |
+
logging.info(f"Fetching details for {len(mention_share_urns)} posts in a batch: {batch_posts_url}")
|
704 |
|
705 |
+
try:
|
706 |
+
batch_resp = session.get(batch_posts_url)
|
707 |
+
batch_resp.raise_for_status() # Raise an exception for HTTP errors
|
708 |
+
batch_data = batch_resp.json()
|
709 |
+
|
710 |
+
results = batch_data.get("results", {}) # Contains post details keyed by URN
|
711 |
+
errors = batch_data.get("errors", {}) # Contains errors for specific URNs
|
712 |
+
statuses = batch_data.get("statuses", {}) # Contains HTTP statuses for specific URNs
|
713 |
+
|
714 |
+
# Process each share URN using the data from the batch response
|
715 |
+
for share_urn in mention_share_urns:
|
716 |
+
if share_urn not in results:
|
717 |
+
# Log if a URN was requested but not found in the results
|
718 |
+
logging.warning(
|
719 |
+
f"Post details for share URN {share_urn} not found in batch response. "
|
720 |
+
f"Status: {statuses.get(share_urn)}, Error: {errors.get(share_urn)}"
|
721 |
+
)
|
722 |
+
continue
|
723 |
|
724 |
+
post_data = results[share_urn]
|
725 |
+
|
726 |
+
# Extract commentary - try direct 'commentary' field first, then fallback
|
727 |
+
commentary_raw = post_data.get("commentary")
|
728 |
+
if not commentary_raw and "specificContent" in post_data:
|
729 |
+
# Fallback for older structures or specific share types if 'commentary' is not top-level
|
730 |
+
share_content = post_data.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {})
|
731 |
+
commentary_raw = share_content.get("shareCommentaryV2", {}).get("text", "")
|
732 |
+
|
733 |
+
if not commentary_raw:
|
734 |
+
logging.warning(f"No commentary found for share URN {share_urn} in batch data. Skipping.")
|
735 |
+
continue
|
736 |
+
|
737 |
+
# Clean the commentary text (assuming this function is defined)
|
738 |
+
mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
|
739 |
+
|
740 |
+
# Extract timestamp and convert to ISO format
|
741 |
+
timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
|
742 |
+
published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
|
743 |
+
|
744 |
+
# Extract author URN
|
745 |
+
author_urn = post_data.get("author", "urn:li:unknown") # Default if author is not found
|
746 |
+
|
747 |
+
# Append processed mention data
|
748 |
+
processed_mentions_internal.append({
|
749 |
+
"mention_id": f"mention_{share_urn}", # Create a unique ID for the mention
|
750 |
+
"share_urn": share_urn,
|
751 |
+
"mention_text_raw": commentary_raw,
|
752 |
+
"mention_text_cleaned": mention_text_cleaned,
|
753 |
+
"published_at_timestamp": timestamp,
|
754 |
+
"published_at_iso": published_at_iso,
|
755 |
+
"mentioned_by_author_urn": author_urn,
|
756 |
+
"organization_urn_mentioned": org_urn # The URN of the organization that was mentioned
|
757 |
+
})
|
758 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
759 |
except requests.exceptions.RequestException as e:
|
760 |
status = getattr(e.response, 'status_code', 'N/A')
|
761 |
text = getattr(e.response, 'text', 'No response text')
|
762 |
+
logging.error(f"Failed to fetch batch post details (Status: {status}): {e}. Response: {text}")
|
763 |
except json.JSONDecodeError as e:
|
764 |
+
# Log error if JSON decoding fails for the batch response
|
765 |
+
logging.error(f"Failed to decode JSON from batch posts response: {e}. Response: {batch_resp.text if batch_resp else 'No resp obj'}")
|
766 |
+
|
767 |
+
|
768 |
logging.info(f"Processed {len(processed_mentions_internal)} mentions with their post details.")
|
769 |
return processed_mentions_internal
|
770 |
|