GuglielmoTor commited on
Commit
97314bb
·
verified ·
1 Parent(s): 2c911b1

Update Linkedin_Data_API_Calls.py

Browse files
Files changed (1) hide show
  1. Linkedin_Data_API_Calls.py +47 -18
Linkedin_Data_API_Calls.py CHANGED
@@ -276,21 +276,34 @@ def fetch_linkedin_posts_core(comm_client_id, community_token, org_urn, count):
276
 
277
  def fetch_comments(comm_client_id, community_token, post_urns, stats_map):
278
  """
279
- Fetches comments for a list of post URNs.
280
  Uses stats_map to potentially skip posts with 0 comments.
281
  """
282
- token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'}
 
 
 
 
 
 
 
 
283
  linkedin_session = create_session(comm_client_id, token=token_dict)
284
- linkedin_session.headers.update({
285
- 'LinkedIn-Version': "202502"
286
- })
287
 
 
 
 
 
 
 
288
  all_comments_by_post = {}
289
  logging.info(f"Fetching comments for {len(post_urns)} posts.")
290
 
291
  for post_urn in post_urns:
292
  post_stats = stats_map.get(post_urn, {})
293
- comment_count_from_stats = post_stats.get("commentSummary", {}).get("totalComments", post_stats.get('commentCount', 0))
 
 
294
 
295
  if comment_count_from_stats == 0:
296
  logging.info(f"Skipping comment fetch for {post_urn} as commentCount is 0 in stats_map.")
@@ -298,8 +311,11 @@ def fetch_comments(comm_client_id, community_token, post_urns, stats_map):
298
  continue
299
 
300
  try:
301
- encoded_post_urn = quote(post_urn, safe='')
302
- url = f"{API_REST_BASE}/comments?q=entity&entityUrn={encoded_post_urn}&sortOrder=CHRONOLOGICAL"
 
 
 
303
 
304
  logging.debug(f"Fetching comments from URL: {url} for post URN: {post_urn}")
305
  response = linkedin_session.get(url)
@@ -308,28 +324,41 @@ def fetch_comments(comm_client_id, community_token, post_urns, stats_map):
308
  elements = response.json().get('elements', [])
309
  comments_texts = []
310
  for c in elements:
311
- comment_text = c.get('message', {}).get('text')
312
- if comment_text:
313
- comments_texts.append(comment_text)
 
 
 
 
 
 
 
 
314
  all_comments_by_post[post_urn] = comments_texts
315
  logging.info(f"Fetched {len(comments_texts)} comments for {post_urn}.")
316
  elif response.status_code == 403:
317
- logging.warning(f"Forbidden (403) to fetch comments for {post_urn}. URL: {url}. Response: {response.text}. Check permissions or API version.")
318
- all_comments_by_post[post_urn] = []
319
  elif response.status_code == 404:
320
- logging.warning(f"Comments not found (404) for {post_urn}. URL: {url}. Response: {response.text}")
321
- all_comments_by_post[post_urn] = []
322
  else:
323
  logging.error(f"Error fetching comments for {post_urn}. Status: {response.status_code}. URL: {url}. Response: {response.text}")
324
- all_comments_by_post[post_urn] = []
325
  except requests.exceptions.RequestException as e:
326
  logging.error(f"RequestException fetching comments for {post_urn}: {e}")
327
  all_comments_by_post[post_urn] = []
328
  except json.JSONDecodeError as e:
329
- logging.error(f"JSONDecodeError fetching comments for {post_urn}. Response: {response.text if 'response' in locals() else 'N/A'}. Error: {e}")
 
 
 
 
330
  all_comments_by_post[post_urn] = []
331
  except Exception as e:
332
- logging.error(f"Unexpected error fetching comments for {post_urn}: {e}")
 
333
  all_comments_by_post[post_urn] = []
334
 
335
  return all_comments_by_post
 
276
 
277
  def fetch_comments(comm_client_id, community_token, post_urns, stats_map):
278
  """
279
+ Fetches comments for a list of post URNs using the socialActions endpoint.
280
  Uses stats_map to potentially skip posts with 0 comments.
281
  """
282
+ # Ensure community_token is in the expected dictionary format for create_session
283
+ if isinstance(community_token, str):
284
+ token_dict = {'access_token': community_token, 'token_type': 'Bearer'}
285
+ elif isinstance(community_token, dict) and 'access_token' in community_token:
286
+ token_dict = community_token
287
+ else:
288
+ logging.error("Invalid community_token format. Expected a string or a dict with 'access_token'.")
289
+ return {urn: [] for urn in post_urns} # Return empty for all if token is bad
290
+
291
  linkedin_session = create_session(comm_client_id, token=token_dict)
 
 
 
292
 
293
+ # Set the LinkedIn API version header
294
+ # This is crucial for API compatibility.
295
+ linkedin_session.headers.update({
296
+ 'LinkedIn-Version': "202502" # Or your target version
297
+ })
298
+
299
  all_comments_by_post = {}
300
  logging.info(f"Fetching comments for {len(post_urns)} posts.")
301
 
302
  for post_urn in post_urns:
303
  post_stats = stats_map.get(post_urn, {})
304
+ # Try to get comment count from "commentSummary" first, then fallback to "commentCount"
305
+ comment_summary = post_stats.get("commentSummary", {})
306
+ comment_count_from_stats = comment_summary.get("totalComments", post_stats.get('commentCount', 0))
307
 
308
  if comment_count_from_stats == 0:
309
  logging.info(f"Skipping comment fetch for {post_urn} as commentCount is 0 in stats_map.")
 
311
  continue
312
 
313
  try:
314
+ # IMPORTANT: Use the correct endpoint structure from your working code.
315
+ # The post_urn goes directly into the path and should NOT be URL-encoded here.
316
+ url = f"{API_REST_BASE}/socialActions/{post_urn}/comments?sortOrder=CHRONOLOGICAL"
317
+ # If you want to add other parameters like 'count' or 'start', append them, e.g.,
318
+ # url = f"{API_REST_BASE}/socialActions/{post_urn}/comments?sortOrder=CHRONOLOGICAL&count=10"
319
 
320
  logging.debug(f"Fetching comments from URL: {url} for post URN: {post_urn}")
321
  response = linkedin_session.get(url)
 
324
  elements = response.json().get('elements', [])
325
  comments_texts = []
326
  for c in elements:
327
+ # Extracting comment text. Adjust if the structure is different.
328
+ # The original working code stored `data.get('elements', [])`
329
+ # If you need the full comment object, store 'c' instead of 'comment_text'.
330
+ message_obj = c.get('message', {})
331
+ if isinstance(message_obj, dict): # Ensure message is a dict before .get('text')
332
+ comment_text = message_obj.get('text')
333
+ if comment_text:
334
+ comments_texts.append(comment_text)
335
+ elif isinstance(message_obj, str): # Sometimes message might be just a string
336
+ comments_texts.append(message_obj)
337
+
338
  all_comments_by_post[post_urn] = comments_texts
339
  logging.info(f"Fetched {len(comments_texts)} comments for {post_urn}.")
340
  elif response.status_code == 403:
341
+ logging.warning(f"Forbidden (403) to fetch comments for {post_urn}. URL: {url}. Response: {response.text}. Check permissions or API version.")
342
+ all_comments_by_post[post_urn] = [] # Or some error indicator
343
  elif response.status_code == 404:
344
+ logging.warning(f"Comments not found (404) for {post_urn}. URL: {url}. Response: {response.text}")
345
+ all_comments_by_post[post_urn] = []
346
  else:
347
  logging.error(f"Error fetching comments for {post_urn}. Status: {response.status_code}. URL: {url}. Response: {response.text}")
348
+ all_comments_by_post[post_urn] = [] # Or some error indicator
349
  except requests.exceptions.RequestException as e:
350
  logging.error(f"RequestException fetching comments for {post_urn}: {e}")
351
  all_comments_by_post[post_urn] = []
352
  except json.JSONDecodeError as e:
353
+ # Log the response text if it's available and JSON decoding fails
354
+ response_text_for_log = 'N/A'
355
+ if 'response' in locals() and hasattr(response, 'text'):
356
+ response_text_for_log = response.text
357
+ logging.error(f"JSONDecodeError fetching comments for {post_urn}. Response: {response_text_for_log}. Error: {e}")
358
  all_comments_by_post[post_urn] = []
359
  except Exception as e:
360
+ # Catch any other unexpected errors
361
+ logging.error(f"Unexpected error fetching comments for {post_urn}: {e}", exc_info=True) # exc_info=True for traceback
362
  all_comments_by_post[post_urn] = []
363
 
364
  return all_comments_by_post