Spaces:
Running
Running
Update Linkedin_Data_API_Calls.py
Browse files- Linkedin_Data_API_Calls.py +47 -18
Linkedin_Data_API_Calls.py
CHANGED
@@ -276,21 +276,34 @@ def fetch_linkedin_posts_core(comm_client_id, community_token, org_urn, count):
|
|
276 |
|
277 |
def fetch_comments(comm_client_id, community_token, post_urns, stats_map):
|
278 |
"""
|
279 |
-
Fetches comments for a list of post URNs.
|
280 |
Uses stats_map to potentially skip posts with 0 comments.
|
281 |
"""
|
282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
linkedin_session = create_session(comm_client_id, token=token_dict)
|
284 |
-
linkedin_session.headers.update({
|
285 |
-
'LinkedIn-Version': "202502"
|
286 |
-
})
|
287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
all_comments_by_post = {}
|
289 |
logging.info(f"Fetching comments for {len(post_urns)} posts.")
|
290 |
|
291 |
for post_urn in post_urns:
|
292 |
post_stats = stats_map.get(post_urn, {})
|
293 |
-
|
|
|
|
|
294 |
|
295 |
if comment_count_from_stats == 0:
|
296 |
logging.info(f"Skipping comment fetch for {post_urn} as commentCount is 0 in stats_map.")
|
@@ -298,8 +311,11 @@ def fetch_comments(comm_client_id, community_token, post_urns, stats_map):
|
|
298 |
continue
|
299 |
|
300 |
try:
|
301 |
-
|
302 |
-
|
|
|
|
|
|
|
303 |
|
304 |
logging.debug(f"Fetching comments from URL: {url} for post URN: {post_urn}")
|
305 |
response = linkedin_session.get(url)
|
@@ -308,28 +324,41 @@ def fetch_comments(comm_client_id, community_token, post_urns, stats_map):
|
|
308 |
elements = response.json().get('elements', [])
|
309 |
comments_texts = []
|
310 |
for c in elements:
|
311 |
-
|
312 |
-
|
313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
all_comments_by_post[post_urn] = comments_texts
|
315 |
logging.info(f"Fetched {len(comments_texts)} comments for {post_urn}.")
|
316 |
elif response.status_code == 403:
|
317 |
-
|
318 |
-
|
319 |
elif response.status_code == 404:
|
320 |
-
|
321 |
-
|
322 |
else:
|
323 |
logging.error(f"Error fetching comments for {post_urn}. Status: {response.status_code}. URL: {url}. Response: {response.text}")
|
324 |
-
all_comments_by_post[post_urn] = []
|
325 |
except requests.exceptions.RequestException as e:
|
326 |
logging.error(f"RequestException fetching comments for {post_urn}: {e}")
|
327 |
all_comments_by_post[post_urn] = []
|
328 |
except json.JSONDecodeError as e:
|
329 |
-
|
|
|
|
|
|
|
|
|
330 |
all_comments_by_post[post_urn] = []
|
331 |
except Exception as e:
|
332 |
-
|
|
|
333 |
all_comments_by_post[post_urn] = []
|
334 |
|
335 |
return all_comments_by_post
|
|
|
276 |
|
277 |
def fetch_comments(comm_client_id, community_token, post_urns, stats_map):
|
278 |
"""
|
279 |
+
Fetches comments for a list of post URNs using the socialActions endpoint.
|
280 |
Uses stats_map to potentially skip posts with 0 comments.
|
281 |
"""
|
282 |
+
# Ensure community_token is in the expected dictionary format for create_session
|
283 |
+
if isinstance(community_token, str):
|
284 |
+
token_dict = {'access_token': community_token, 'token_type': 'Bearer'}
|
285 |
+
elif isinstance(community_token, dict) and 'access_token' in community_token:
|
286 |
+
token_dict = community_token
|
287 |
+
else:
|
288 |
+
logging.error("Invalid community_token format. Expected a string or a dict with 'access_token'.")
|
289 |
+
return {urn: [] for urn in post_urns} # Return empty for all if token is bad
|
290 |
+
|
291 |
linkedin_session = create_session(comm_client_id, token=token_dict)
|
|
|
|
|
|
|
292 |
|
293 |
+
# Set the LinkedIn API version header
|
294 |
+
# This is crucial for API compatibility.
|
295 |
+
linkedin_session.headers.update({
|
296 |
+
'LinkedIn-Version': "202502" # Or your target version
|
297 |
+
})
|
298 |
+
|
299 |
all_comments_by_post = {}
|
300 |
logging.info(f"Fetching comments for {len(post_urns)} posts.")
|
301 |
|
302 |
for post_urn in post_urns:
|
303 |
post_stats = stats_map.get(post_urn, {})
|
304 |
+
# Try to get comment count from "commentSummary" first, then fallback to "commentCount"
|
305 |
+
comment_summary = post_stats.get("commentSummary", {})
|
306 |
+
comment_count_from_stats = comment_summary.get("totalComments", post_stats.get('commentCount', 0))
|
307 |
|
308 |
if comment_count_from_stats == 0:
|
309 |
logging.info(f"Skipping comment fetch for {post_urn} as commentCount is 0 in stats_map.")
|
|
|
311 |
continue
|
312 |
|
313 |
try:
|
314 |
+
# IMPORTANT: Use the correct endpoint structure from your working code.
|
315 |
+
# The post_urn goes directly into the path and should NOT be URL-encoded here.
|
316 |
+
url = f"{API_REST_BASE}/socialActions/{post_urn}/comments?sortOrder=CHRONOLOGICAL"
|
317 |
+
# If you want to add other parameters like 'count' or 'start', append them, e.g.,
|
318 |
+
# url = f"{API_REST_BASE}/socialActions/{post_urn}/comments?sortOrder=CHRONOLOGICAL&count=10"
|
319 |
|
320 |
logging.debug(f"Fetching comments from URL: {url} for post URN: {post_urn}")
|
321 |
response = linkedin_session.get(url)
|
|
|
324 |
elements = response.json().get('elements', [])
|
325 |
comments_texts = []
|
326 |
for c in elements:
|
327 |
+
# Extracting comment text. Adjust if the structure is different.
|
328 |
+
# The original working code stored `data.get('elements', [])`
|
329 |
+
# If you need the full comment object, store 'c' instead of 'comment_text'.
|
330 |
+
message_obj = c.get('message', {})
|
331 |
+
if isinstance(message_obj, dict): # Ensure message is a dict before .get('text')
|
332 |
+
comment_text = message_obj.get('text')
|
333 |
+
if comment_text:
|
334 |
+
comments_texts.append(comment_text)
|
335 |
+
elif isinstance(message_obj, str): # Sometimes message might be just a string
|
336 |
+
comments_texts.append(message_obj)
|
337 |
+
|
338 |
all_comments_by_post[post_urn] = comments_texts
|
339 |
logging.info(f"Fetched {len(comments_texts)} comments for {post_urn}.")
|
340 |
elif response.status_code == 403:
|
341 |
+
logging.warning(f"Forbidden (403) to fetch comments for {post_urn}. URL: {url}. Response: {response.text}. Check permissions or API version.")
|
342 |
+
all_comments_by_post[post_urn] = [] # Or some error indicator
|
343 |
elif response.status_code == 404:
|
344 |
+
logging.warning(f"Comments not found (404) for {post_urn}. URL: {url}. Response: {response.text}")
|
345 |
+
all_comments_by_post[post_urn] = []
|
346 |
else:
|
347 |
logging.error(f"Error fetching comments for {post_urn}. Status: {response.status_code}. URL: {url}. Response: {response.text}")
|
348 |
+
all_comments_by_post[post_urn] = [] # Or some error indicator
|
349 |
except requests.exceptions.RequestException as e:
|
350 |
logging.error(f"RequestException fetching comments for {post_urn}: {e}")
|
351 |
all_comments_by_post[post_urn] = []
|
352 |
except json.JSONDecodeError as e:
|
353 |
+
# Log the response text if it's available and JSON decoding fails
|
354 |
+
response_text_for_log = 'N/A'
|
355 |
+
if 'response' in locals() and hasattr(response, 'text'):
|
356 |
+
response_text_for_log = response.text
|
357 |
+
logging.error(f"JSONDecodeError fetching comments for {post_urn}. Response: {response_text_for_log}. Error: {e}")
|
358 |
all_comments_by_post[post_urn] = []
|
359 |
except Exception as e:
|
360 |
+
# Catch any other unexpected errors
|
361 |
+
logging.error(f"Unexpected error fetching comments for {post_urn}: {e}", exc_info=True) # exc_info=True for traceback
|
362 |
all_comments_by_post[post_urn] = []
|
363 |
|
364 |
return all_comments_by_post
|