Spaces:

GuglielmoTor
/

LinkedinMonitor

Building

App Files Files Community

GuglielmoTor commited on 20 days ago

Commit

9c2556f

verified ·

1 Parent(s): d399f5c

Update Data_Fetching_and_Rendering.py

Browse files

Files changed (1) hide show

Data_Fetching_and_Rendering.py +210 -0

Data_Fetching_and_Rendering.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import json
 import requests
 from sessions import create_session
 API_V2_BASE = 'https://api.linkedin.com/v2'
 def fetch_org_urn(comm_client_id, comm_token_dict):
     """
@@ -75,3 +77,211 @@ def fetch_org_urn(comm_client_id, comm_token_dict):
     print(f"Found Org: {org_name} ({org_urn_full})")
     return org_urn_full, org_name

 import json
 import requests
 from sessions import create_session
+import html
 API_V2_BASE = 'https://api.linkedin.com/v2'
+API_REST_BASE = "https://api.linkedin.com/rest"
 def fetch_org_urn(comm_client_id, comm_token_dict):
     """
     print(f"Found Org: {org_name} ({org_urn_full})")
     return org_urn_full, org_name
+def fetch_posts_and_stats(comm_client_id, community_token, count=10):
+    """Fetches posts using Marketing token and stats using Marketing token."""
+    print("--- Fetching Posts and Stats ---")
+    if not community_token:
+         print("WARN: Community token missing, but not currently used for post/stat fetching.")
+         raise ValueError("Community token is missing.") # Don't raise if not needed
+    # Ensure tokens are in the correct format (dict)
+    comm_token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'} # Process if needed later
+    ln_comm = create_session(comm_client_id, token=comm_token_dict) # Keep session available if needed
+    # 1) Get Org URN (using Marketing token)
+    #org_urn, org_name = fetch_org_urn(comm_token_dict) # Reuses the function
+    org_urn, org_name = "urn:li:organization:19010008", "GRLS"
+    # 2) Fetch latest posts (using Marketing Token via REST API)
+    # Endpoint requires r_organization_social permission
+    posts_url = f"{API_REST_BASE}/posts?author={org_urn}&q=author&count={count}&sortBy=LAST_MODIFIED"
+    print(f"Attempting to fetch posts from: {posts_url} using Marketing token")
+    try:
+        resp_posts = ln_comm.get(posts_url)
+        print(f"→ POSTS Request Headers: {resp_posts.request.headers}")
+        print(f"→ POSTS Response Status: {resp_posts.status_code}")
+        # Limit printing large response bodies
+        print(f"→ POSTS Response Body (first 500 chars): {resp_posts.text[:500]}")
+        resp_posts.raise_for_status()
+        print("Fetched posts using Marketing token.")
+    except requests.exceptions.RequestException as e:
+        status = e.response.status_code if e.response is not None else "N/A"
+        details = ""
+        if e.response is not None:
+            try:
+                details = f" Details: {e.response.json()}"
+            except json.JSONDecodeError:
+                details = f" Response: {e.response.text[:200]}..."
+        print(f"ERROR: Fetching posts failed with Marketing token (Status: {status}).{details}")
+        raise ValueError(f"Failed to fetch posts using Marketing token (Status: {status}). Check permissions (r_organization_social).") from e
+    raw_posts_data = resp_posts.json()
+    raw_posts = raw_posts_data.get("elements", [])
+    print(f"Fetched {len(raw_posts)} raw posts.")
+    if not raw_posts:
+        return [], org_name # Return empty list and org name if no posts
+    # 3) Extract Post URNs (shares or ugcPosts)
+    post_urns = [p.get("id") for p in raw_posts if p.get("id") and (":share:" in p.get("id") or ":ugcPost:" in p.get("id"))]
+    if not post_urns:
+        print("WARN: No post URNs (share or ugcPost) found in the fetched posts.")
+        return [], org_name
+    print(f"Post URNs to fetch stats for: {post_urns}")
+    # 4) Fetch stats (using Comm session via REST API)
+    # Endpoint requires r_organization_social permission
+    stats_map = {}
+    batch_size = 20 # API likely has a limit on number of URNs per request
+    urn_batches = [post_urns[i:i + batch_size] for i in range(0, len(post_urns), batch_size)]
+    for batch in urn_batches:
+        if not batch: continue
+        stats_url = f"{API_REST_BASE}/organizationalEntityShareStatistics"
+        # Parameters need to be structured correctly: q=organizationalEntity, organizationalEntity=orgURN, shares[0]=shareURN1, ugcPosts[0]=ugcURN1 etc.
+        params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
+        share_idx, ugc_idx = 0, 0
+        for urn in batch:
+            if ':share:' in urn:
+                params[f'shares[{share_idx}]'] = urn
+                share_idx += 1
+            elif ':ugcPost:' in urn:
+                 params[f'ugcPosts[{ugc_idx}]'] = urn
+                 ugc_idx += 1
+            else:
+                 print(f"WARN: Skipping unknown URN type for stats: {urn}")
+        if share_idx == 0 and ugc_idx == 0:
+            print("WARN: Skipping stats fetch for batch as no valid share/ugcPost URNs found.")
+            continue
+        print(f"Fetching stats for batch from: {stats_url} with {len(params)-2} URNs using Marketing token")
+        try:
+            resp_stats = ln_comm.get(stats_url, params=params)
+            print(f"→ STATS Request URL: {resp_stats.request.url}") # Log the exact URL called
+            print(f"→ STATS Request Headers: {resp_stats.request.headers}")
+            print(f"→ STATS Response Status: {resp_stats.status_code}")
+            print(f"→ STATS Response Body (first 500 chars): {resp_stats.text[:500]}")
+            resp_stats.raise_for_status()
+            stats_data = resp_stats.json().get("elements", [])
+            print(f"Received {len(stats_data)} stats elements for this batch.")
+            # Map stats back to their URNs
+            for elem in stats_data:
+                # Key in response is 'share' or 'ugcPost' containing the URN
+                urn_key = elem.get('share') or elem.get('ugcPost')
+                if urn_key:
+                    # Store the whole 'totalShareStatistics' object
+                    stats_map[urn_key] = elem.get('totalShareStatistics', {})
+                else:
+                    print(f"WARN: Stats element missing 'share' or 'ugcPost' key: {elem}")
+        except requests.exceptions.RequestException as e:
+            status = e.response.status_code if e.response is not None else "N/A"
+            details = ""
+            if e.response is not None:
+                try:
+                    details = f" Details: {e.response.json()}"
+                except json.JSONDecodeError:
+                    details = f" Response: {e.response.text[:200]}..."
+            print(f"ERROR fetching stats batch using Marketing token (Status: {status}).{details}")
+            print("WARN: Skipping stats for this batch due to error.")
+            # Optionally raise an error here if stats are critical, or continue with partial data
+            # raise ValueError(f"Failed to fetch stats batch (Status: {status}).") from e
+    print(f"Fetched stats for {len(stats_map)} posts in total.")
+    # 5) Assemble combined post data
+    combined_posts = []
+    for post in raw_posts:
+        post_id = post.get("id")
+        if not post_id: continue
+        stats = stats_map.get(post_id, {}) # Get stats dict, default to empty if not found
+        published_ts = post.get("publishedAt")
+        created_ts = post.get("createdAt")
+        # Prefer publishedAt, fallback to createdAt
+        timestamp = published_ts or created_ts
+        when = datetime.fromtimestamp(timestamp / 1000).strftime("%Y-%m-%d %H:%M") if timestamp else "Unknown Date"
+        # --- Text Extraction Logic ---
+        text = ""
+        # Priority: REST API 'commentary' field seems most reliable for simple text posts
+        commentary_rest = post.get("commentary")
+        if commentary_rest:
+            text = commentary_rest
+        else:
+            # Fallback to V2 style fields if REST commentary is missing
+            # Check specificContent first (for shares with commentary)
+            specific_content = post.get("specificContent", {})
+            share_content = specific_content.get("com.linkedin.ugc.ShareContent", {})
+            share_commentary_v2 = share_content.get("shareCommentaryV2", {}).get("text")
+            if share_commentary_v2:
+                text = share_commentary_v2
+            else:
+                # Check top-level commentaryV2 (less common?)
+                commentary_v2 = post.get("commentaryV2", {}).get("text")
+                if commentary_v2:
+                    text = commentary_v2
+                else:
+                    # Check for article titles if it's an article share
+                    article_content = specific_content.get("com.linkedin.ugc.ArticleContent", {})
+                    article_title = article_content.get("title")
+                    if article_title:
+                         text = f"Article: {article_title}"
+                    else:
+                        # Check older 'content' field (might be deprecated)
+                        content_text = post.get("content", {}).get("text", {}).get("text")
+                        if content_text:
+                            text = content_text
+                        else:
+                            # Final fallback
+                            text = "[Media post or share without text]"
+        # Escape and truncate text for HTML display
+        display_text = html.escape(text[:250]).replace("\n", "<br>") + ("..." if len(text) > 250 else "")
+        # --- Stats Extraction ---
+        # Use .get with default 0 for robustness
+        impressions = stats.get("impressionCount", 0) or 0
+        likes = stats.get("likeCount", 0) or 0
+        comments = stats.get("commentCount", 0) or 0
+        clicks = stats.get("clickCount", 0) or 0
+        shares = stats.get("shareCount", 0) or 0
+        # Calculate engagement rate manually if 'engagement' field isn't present or reliable
+        engagement_num = likes + comments + clicks + shares # Sum of interactions
+        engagement_rate_manual = (engagement_num / impressions * 100) if impressions > 0 else 0.0
+        # Check if API provides 'engagement' field (usually rate as decimal)
+        engagement_api = stats.get('engagement')
+        if engagement_api is not None:
+            try:
+                # API provides rate as decimal (e.g., 0.02 for 2%)
+                engagement_str = f"{float(engagement_api) * 100:.2f}%"
+            except (ValueError, TypeError):
+                 # Fallback to manual calculation if API value is invalid
+                 engagement_str = f"{engagement_rate_manual:.2f}%"
+        else:
+             # Use manual calculation if API field is missing
+             engagement_str = f"{engagement_rate_manual:.2f}%"
+        combined_posts.append({
+            "id": post_id, "when": when, "text": display_text,
+            "likes": likes, "comments": comments, "impressions": impressions,
+            "clicks": clicks, "shares": shares, # Added shares to dict
+            "engagement": engagement_str,
+        })
+    return combined_posts, org_name