File size: 14,250 Bytes
9c8c059
07be99a
f3b41b9
9c2556f
f3b41b9
a96ea35
9c2556f
a96ea35
9f1c65e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c2556f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
import json
import requests
from sessions import create_session
import html

API_V2_BASE = 'https://api.linkedin.com/v2'
API_REST_BASE = "https://api.linkedin.com/rest"

def fetch_org_urn(comm_client_id, comm_token_dict):
    """
    Fetches the user's administrated organization URN and name using the Marketing token.
    Expects comm_token_dict to be the full token dictionary.
    Raises ValueError on failure.
    """
    print("--- Fetching Organization URN ---")
    if not comm_token_dict or 'access_token' not in comm_token_dict:
         print("ERROR: Invalid or missing Marketing token dictionary for fetching Org URN.")
         raise ValueError("Marketing token is missing or invalid.")

    ln_mkt = create_session(comm_client_id, token=comm_token_dict)

    # Fetch organizational roles directly using the V2 API
    url = (
        f"{API_V2_BASE}/organizationalEntityAcls"
        "?q=roleAssignee&role=ADMINISTRATOR&state=APPROVED" # Find orgs where user is ADMIN
        "&projection=(elements*(*,organizationalTarget~(id,localizedName)))" # Get URN and name
    )
    print(f"Fetching Org URN details from: {url}")
    try:
        r = ln_mkt.get(url)
        r.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
    except requests.exceptions.RequestException as e:
        print(f"ERROR: Failed to fetch organizationalEntityAcls with Marketing token.")
        # Provide specific feedback based on status code if possible
        status = e.response.status_code if e.response is not None else "N/A"
        details = ""
        if e.response is not None:
            try:
                details = f" Details: {e.response.json()}"
            except json.JSONDecodeError:
                details = f" Response: {e.response.text[:200]}..." # Show partial text
        raise ValueError(f"Failed to fetch Organization details (Status: {status}). Check Marketing App permissions (r_organization_admin) and ensure the user is an admin of an org page.{details}") from e

    data = r.json()
    print(f"Org URN Response Data: {json.dumps(data, indent=2)}")
    elements = data.get('elements')

    if not elements:
        print("WARNING: No organizations found where the user is an ADMINISTRATOR.")
        # Try fetching with MEMBER role as a fallback? Might require different scope.
        # For now, stick to ADMINISTRATOR as per scope.
        raise ValueError("No organizations found for this user where they have the ADMINISTRATOR role. Ensure the Marketing App has 'r_organization_admin' permission and the user is an admin of an organization page.")

    # Assuming the first organization found is the target
    # In a real app, you might let the user choose if they admin multiple orgs.
    org_element = elements[0]

    # Extract Full URN ('organizationalTarget' field contains the URN string)
    org_urn_full = org_element.get('organizationalTarget')
    if not org_urn_full or not isinstance(org_urn_full, str) or not org_urn_full.startswith("urn:li:organization:"):
        print(f"ERROR: Could not extract valid Organization URN ('organizationalTarget') from API response element: {org_element}")
        raise ValueError("Could not extract a valid Organization URN from the API response.")

    # Extract Name (from the projected 'organizationalTarget~' field)
    org_name = None
    # The key might be exactly 'organizationalTarget~' or something similar depending on projection syntax variations
    org_target_details_key = next((k for k in org_element if k.endswith('organizationalTarget~')), None)

    if org_target_details_key and isinstance(org_element.get(org_target_details_key), dict):
        org_name = org_element[org_target_details_key].get('localizedName')

    if not org_name:
        # Fallback name using the ID part of the URN
        org_id = org_urn_full.split(':')[-1]
        org_name = f"Organization ({org_id})"
        print(f"WARN: Could not find localizedName, using fallback: {org_name}")

    print(f"Found Org: {org_name} ({org_urn_full})")
    return org_urn_full, org_name




def fetch_posts_and_stats(comm_client_id, community_token, count=10):
    """Fetches posts using Marketing token and stats using Marketing token."""
    print("--- Fetching Posts and Stats ---")

    if not community_token:
         print("WARN: Community token missing, but not currently used for post/stat fetching.")
         raise ValueError("Community token is missing.") # Don't raise if not needed

    # Ensure tokens are in the correct format (dict)
    comm_token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'} # Process if needed later

    ln_comm = create_session(comm_client_id, token=comm_token_dict) # Keep session available if needed

    # 1) Get Org URN (using Marketing token)
    #org_urn, org_name = fetch_org_urn(comm_token_dict) # Reuses the function
    org_urn, org_name = "urn:li:organization:19010008", "GRLS"

    # 2) Fetch latest posts (using Marketing Token via REST API)
    # Endpoint requires r_organization_social permission
    posts_url = f"{API_REST_BASE}/posts?author={org_urn}&q=author&count={count}&sortBy=LAST_MODIFIED"

    print(f"Attempting to fetch posts from: {posts_url} using Marketing token")
    try:
        resp_posts = ln_comm.get(posts_url)
        print(f"β†’ POSTS Request Headers: {resp_posts.request.headers}")
        print(f"β†’ POSTS Response Status: {resp_posts.status_code}")
        # Limit printing large response bodies
        print(f"β†’ POSTS Response Body (first 500 chars): {resp_posts.text[:500]}")
        resp_posts.raise_for_status()
        print("Fetched posts using Marketing token.")
    except requests.exceptions.RequestException as e:
        status = e.response.status_code if e.response is not None else "N/A"
        details = ""
        if e.response is not None:
            try:
                details = f" Details: {e.response.json()}"
            except json.JSONDecodeError:
                details = f" Response: {e.response.text[:200]}..."
        print(f"ERROR: Fetching posts failed with Marketing token (Status: {status}).{details}")
        raise ValueError(f"Failed to fetch posts using Marketing token (Status: {status}). Check permissions (r_organization_social).") from e

    raw_posts_data = resp_posts.json()
    raw_posts = raw_posts_data.get("elements", [])
    print(f"Fetched {len(raw_posts)} raw posts.")

    if not raw_posts:
        return [], org_name # Return empty list and org name if no posts

    # 3) Extract Post URNs (shares or ugcPosts)
    post_urns = [p.get("id") for p in raw_posts if p.get("id") and (":share:" in p.get("id") or ":ugcPost:" in p.get("id"))]
    if not post_urns:
        print("WARN: No post URNs (share or ugcPost) found in the fetched posts.")
        return [], org_name

    print(f"Post URNs to fetch stats for: {post_urns}")

    # 4) Fetch stats (using Comm session via REST API)
    # Endpoint requires r_organization_social permission
    stats_map = {}
    batch_size = 20 # API likely has a limit on number of URNs per request
    urn_batches = [post_urns[i:i + batch_size] for i in range(0, len(post_urns), batch_size)]

    for batch in urn_batches:
        if not batch: continue

        stats_url = f"{API_REST_BASE}/organizationalEntityShareStatistics"
        # Parameters need to be structured correctly: q=organizationalEntity, organizationalEntity=orgURN, shares[0]=shareURN1, ugcPosts[0]=ugcURN1 etc.
        params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
        share_idx, ugc_idx = 0, 0
        for urn in batch:
            if ':share:' in urn:
                params[f'shares[{share_idx}]'] = urn
                share_idx += 1
            elif ':ugcPost:' in urn:
                 params[f'ugcPosts[{ugc_idx}]'] = urn
                 ugc_idx += 1
            else:
                 print(f"WARN: Skipping unknown URN type for stats: {urn}")

        if share_idx == 0 and ugc_idx == 0:
            print("WARN: Skipping stats fetch for batch as no valid share/ugcPost URNs found.")
            continue

        print(f"Fetching stats for batch from: {stats_url} with {len(params)-2} URNs using Marketing token")
        try:
            resp_stats = ln_comm.get(stats_url, params=params)
            print(f"β†’ STATS Request URL: {resp_stats.request.url}") # Log the exact URL called
            print(f"β†’ STATS Request Headers: {resp_stats.request.headers}")
            print(f"β†’ STATS Response Status: {resp_stats.status_code}")
            print(f"β†’ STATS Response Body (first 500 chars): {resp_stats.text[:500]}")
            resp_stats.raise_for_status()
            stats_data = resp_stats.json().get("elements", [])
            print(f"Received {len(stats_data)} stats elements for this batch.")

            # Map stats back to their URNs
            for elem in stats_data:
                # Key in response is 'share' or 'ugcPost' containing the URN
                urn_key = elem.get('share') or elem.get('ugcPost')
                if urn_key:
                    # Store the whole 'totalShareStatistics' object
                    stats_map[urn_key] = elem.get('totalShareStatistics', {})
                else:
                    print(f"WARN: Stats element missing 'share' or 'ugcPost' key: {elem}")


        except requests.exceptions.RequestException as e:
            status = e.response.status_code if e.response is not None else "N/A"
            details = ""
            if e.response is not None:
                try:
                    details = f" Details: {e.response.json()}"
                except json.JSONDecodeError:
                    details = f" Response: {e.response.text[:200]}..."
            print(f"ERROR fetching stats batch using Marketing token (Status: {status}).{details}")
            print("WARN: Skipping stats for this batch due to error.")
            # Optionally raise an error here if stats are critical, or continue with partial data
            # raise ValueError(f"Failed to fetch stats batch (Status: {status}).") from e

    print(f"Fetched stats for {len(stats_map)} posts in total.")

    # 5) Assemble combined post data
    combined_posts = []
    for post in raw_posts:
        post_id = post.get("id")
        if not post_id: continue

        stats = stats_map.get(post_id, {}) # Get stats dict, default to empty if not found
        published_ts = post.get("publishedAt")
        created_ts = post.get("createdAt")
        # Prefer publishedAt, fallback to createdAt
        timestamp = published_ts or created_ts
        when = datetime.fromtimestamp(timestamp / 1000).strftime("%Y-%m-%d %H:%M") if timestamp else "Unknown Date"

        # --- Text Extraction Logic ---
        text = ""
        # Priority: REST API 'commentary' field seems most reliable for simple text posts
        commentary_rest = post.get("commentary")
        if commentary_rest:
            text = commentary_rest
        else:
            # Fallback to V2 style fields if REST commentary is missing
            # Check specificContent first (for shares with commentary)
            specific_content = post.get("specificContent", {})
            share_content = specific_content.get("com.linkedin.ugc.ShareContent", {})
            share_commentary_v2 = share_content.get("shareCommentaryV2", {}).get("text")
            if share_commentary_v2:
                text = share_commentary_v2
            else:
                # Check top-level commentaryV2 (less common?)
                commentary_v2 = post.get("commentaryV2", {}).get("text")
                if commentary_v2:
                    text = commentary_v2
                else:
                    # Check for article titles if it's an article share
                    article_content = specific_content.get("com.linkedin.ugc.ArticleContent", {})
                    article_title = article_content.get("title")
                    if article_title:
                         text = f"Article: {article_title}"
                    else:
                        # Check older 'content' field (might be deprecated)
                        content_text = post.get("content", {}).get("text", {}).get("text")
                        if content_text:
                            text = content_text
                        else:
                            # Final fallback
                            text = "[Media post or share without text]"

        # Escape and truncate text for HTML display
        display_text = html.escape(text[:250]).replace("\n", "<br>") + ("..." if len(text) > 250 else "")

        # --- Stats Extraction ---
        # Use .get with default 0 for robustness
        impressions = stats.get("impressionCount", 0) or 0
        likes = stats.get("likeCount", 0) or 0
        comments = stats.get("commentCount", 0) or 0
        clicks = stats.get("clickCount", 0) or 0
        shares = stats.get("shareCount", 0) or 0

        # Calculate engagement rate manually if 'engagement' field isn't present or reliable
        engagement_num = likes + comments + clicks + shares # Sum of interactions
        engagement_rate_manual = (engagement_num / impressions * 100) if impressions > 0 else 0.0

        # Check if API provides 'engagement' field (usually rate as decimal)
        engagement_api = stats.get('engagement')
        if engagement_api is not None:
            try:
                # API provides rate as decimal (e.g., 0.02 for 2%)
                engagement_str = f"{float(engagement_api) * 100:.2f}%"
            except (ValueError, TypeError):
                 # Fallback to manual calculation if API value is invalid
                 engagement_str = f"{engagement_rate_manual:.2f}%"
        else:
             # Use manual calculation if API field is missing
             engagement_str = f"{engagement_rate_manual:.2f}%"


        combined_posts.append({
            "id": post_id, "when": when, "text": display_text,
            "likes": likes, "comments": comments, "impressions": impressions,
            "clicks": clicks, "shares": shares, # Added shares to dict
            "engagement": engagement_str,
        })

    return combined_posts, org_name