GuglielmoTor commited on
Commit
2c911b1
·
verified ·
1 Parent(s): a3a5c36

Update Linkedin_Data_API_Calls.py

Browse files
Files changed (1) hide show
  1. Linkedin_Data_API_Calls.py +94 -15
Linkedin_Data_API_Calls.py CHANGED
@@ -122,45 +122,124 @@ def fetch_linkedin_posts_core(comm_client_id, community_token, org_urn, count):
122
 
123
  stats_map = {}
124
  if post_urns_for_stats:
125
- batch_size_stats = 20
126
  for i in range(0, len(post_urns_for_stats), batch_size_stats):
127
  batch_urns = post_urns_for_stats[i:i+batch_size_stats]
128
  params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
129
- share_idx = 0
130
- ugc_idx = 0
 
 
 
 
 
131
  for urn_str in batch_urns:
132
  if ":share:" in urn_str:
133
  params[f"shares[{share_idx}]"] = urn_str
134
  share_idx += 1
 
135
  elif ":ugcPost:" in urn_str:
136
  params[f"ugcPosts[{ugc_idx}]"] = urn_str
137
  ugc_idx += 1
 
138
  else:
139
  logging.warning(f"URN {urn_str} is not a recognized share or ugcPost type for stats. Skipping.")
140
  continue
141
 
142
- if not share_idx and not ugc_idx:
 
 
143
  continue
144
-
145
  try:
146
- logging.info(f"Fetching stats for batch of {len(batch_urns)} URNs starting with URN: {batch_urns[0]}")
 
 
 
147
  stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
148
- stat_resp.raise_for_status()
149
  stats_data = stat_resp.json()
150
- for urn_key, stat_element_values in stats_data.get("results", {}).items():
151
- stats_map[urn_key] = stat_element_values.get("totalShareStatistics", {})
152
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  if stats_data.get("errors"):
154
  for urn_errored, error_detail in stats_data.get("errors", {}).items():
155
- logging.warning(f"Error fetching stats for URN {urn_errored}: {error_detail.get('message', 'Unknown error')}")
156
-
157
- logging.info(f"Successfully processed stats response for {len(batch_urns)} URNs. Current stats_map size: {len(stats_map)}")
 
 
 
 
 
 
 
 
 
158
  except requests.exceptions.RequestException as e:
 
159
  status_code = getattr(e.response, 'status_code', 'N/A')
160
  response_text = getattr(e.response, 'text', 'No response text')
161
- logging.warning(f"Failed to fetch stats for a batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text}")
162
  except json.JSONDecodeError as e:
163
- logging.warning(f"Failed to decode JSON from stats response: {e}. Response: {stat_resp.text if stat_resp else 'No response text'}")
 
 
 
 
 
 
 
 
164
 
165
  processed_raw_posts = []
166
  for p in raw_posts_api:
 
122
 
123
  stats_map = {}
124
  if post_urns_for_stats:
125
+ batch_size_stats = 20
126
  for i in range(0, len(post_urns_for_stats), batch_size_stats):
127
  batch_urns = post_urns_for_stats[i:i+batch_size_stats]
128
  params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
129
+
130
+ share_idx = 0 # Index for share URNs in the current batch's params
131
+ ugc_idx = 0 # Index for ugcPost URNs in the current batch's params
132
+
133
+ # Keep track of URNs actually added to this batch's parameters for logging
134
+ urns_in_current_api_call = []
135
+
136
  for urn_str in batch_urns:
137
  if ":share:" in urn_str:
138
  params[f"shares[{share_idx}]"] = urn_str
139
  share_idx += 1
140
+ urns_in_current_api_call.append(urn_str)
141
  elif ":ugcPost:" in urn_str:
142
  params[f"ugcPosts[{ugc_idx}]"] = urn_str
143
  ugc_idx += 1
144
+ urns_in_current_api_call.append(urn_str)
145
  else:
146
  logging.warning(f"URN {urn_str} is not a recognized share or ugcPost type for stats. Skipping.")
147
  continue
148
 
149
+ # If no valid URNs were prepared for this batch, skip the API call
150
+ if not share_idx and not ugc_idx: # or check 'if not urns_in_current_api_call:'
151
+ logging.info(f"Skipping API call for an empty or invalid batch of URNs (original batch segment size: {len(batch_urns)}).")
152
  continue
153
+
154
  try:
155
+ # Log the URNs being sent in this specific API call
156
+ logging.info(f"Fetching stats for batch of {len(urns_in_current_api_call)} URNs. First URN in call: {urns_in_current_api_call[0] if urns_in_current_api_call else 'N/A'}")
157
+
158
+ # Actual API call
159
  stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
160
+ stat_resp.raise_for_status() # Raises an HTTPError for bad responses (4XX or 5XX)
161
  stats_data = stat_resp.json()
162
+
163
+ # --- Corrected Parsing Logic ---
164
+ # LinkedIn API for batch stats often returns an "elements" list.
165
+ elements_from_api = stats_data.get("elements")
166
+
167
+ if isinstance(elements_from_api, list):
168
+ if not elements_from_api:
169
+ logging.info(f"API returned 'elements' but it's an empty list for the URNs in this call.")
170
+
171
+ processed_urns_in_batch = 0
172
+ for item in elements_from_api:
173
+ urn_in_item = None
174
+ # Determine the URN key (e.g., 'share' or 'ugcPost')
175
+ if "share" in item:
176
+ urn_in_item = item.get("share")
177
+ elif "ugcPost" in item:
178
+ urn_in_item = item.get("ugcPost")
179
+ # Add other URN types if necessary, e.g., elif "article" in item: ...
180
+
181
+ if urn_in_item:
182
+ stats_values = item.get("totalShareStatistics", {})
183
+ if stats_values: # Only add if there are actual stats
184
+ stats_map[urn_in_item] = stats_values
185
+ processed_urns_in_batch +=1
186
+ else:
187
+ # It's possible an URN is returned without stats, or with empty stats
188
+ logging.debug(f"No 'totalShareStatistics' data found for URN: {urn_in_item} in API item: {item}")
189
+ stats_map[urn_in_item] = {} # Store empty stats if URN was processed but had no data
190
+ else:
191
+ logging.warning(f"Could not extract a recognized URN key from API element: {item}")
192
+ logging.info(f"Successfully processed {processed_urns_in_batch} URNs with stats from the API response for this batch. Current total stats_map size: {len(stats_map)}")
193
+
194
+ elif elements_from_api is None and "results" in stats_data:
195
+ # Fallback or alternative check if your API version *does* use "results"
196
+ # This was your original attempt. If "elements" is consistently missing,
197
+ # you might need to debug the exact structure of "results".
198
+ logging.warning(f"API response does not contain 'elements' key, but 'results' key is present. Attempting to parse 'results'. Response keys: {stats_data.keys()}")
199
+ results_dict = stats_data.get("results", {})
200
+ if isinstance(results_dict, dict):
201
+ for urn_key, stat_element_values in results_dict.items():
202
+ stats_map[urn_key] = stat_element_values.get("totalShareStatistics", {})
203
+ logging.info(f"Processed stats from 'results' dictionary. Current stats_map size: {len(stats_map)}")
204
+ else:
205
+ logging.error(f"'results' key found but is not a dictionary. Type: {type(results_dict)}")
206
+
207
+ else:
208
+ # Neither "elements" (as list) nor "results" (as dict) found as expected
209
+ logging.error(f"API response structure not recognized. Expected 'elements' (list) or 'results' (dict). Got keys: {stats_data.keys()}. Full response sample: {str(stats_data)[:500]}")
210
+
211
+ # --- End Corrected Parsing Logic ---
212
+
213
+ # Check for specific errors reported by the API within the JSON response
214
  if stats_data.get("errors"):
215
  for urn_errored, error_detail in stats_data.get("errors", {}).items():
216
+ logging.warning(f"API reported error for URN {urn_errored}: {error_detail.get('message', 'Unknown API error message')}")
217
+
218
+ # This log might be slightly misleading if parsing failed but no exception occurred.
219
+ # The more specific log after parsing 'elements' is better.
220
+ # logging.info(f"Successfully processed stats response for {len(urns_in_current_api_call)} URNs. Current stats_map size: {len(stats_map)}")
221
+
222
+
223
+ except requests.exceptions.HTTPError as e:
224
+ # Specific handling for HTTP errors (4xx, 5xx)
225
+ status_code = e.response.status_code
226
+ response_text = e.response.text
227
+ logging.warning(f"HTTP error fetching stats for a batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text[:500]}") # Log first 500 chars of response
228
  except requests.exceptions.RequestException as e:
229
+ # Catch other requests-related errors (e.g., connection issues)
230
  status_code = getattr(e.response, 'status_code', 'N/A')
231
  response_text = getattr(e.response, 'text', 'No response text')
232
+ logging.warning(f"Request failed for stats batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text[:500]}")
233
  except json.JSONDecodeError as e:
234
+ # Handle cases where the response is not valid JSON
235
+ response_text_for_json_error = stat_resp.text if 'stat_resp' in locals() and hasattr(stat_resp, 'text') else 'Response object not available or no text attribute'
236
+ logging.warning(f"Failed to decode JSON from stats response: {e}. Response text: {response_text_for_json_error[:500]}") # Log first 500 chars
237
+ except Exception as e:
238
+ # Catch any other unexpected errors during the batch processing
239
+ logging.error(f"An unexpected error occurred processing stats batch: {e}", exc_info=True)
240
+
241
+
242
+ logging.info(f"Finished processing all URN batches. Final stats_map size: {len(stats_map)}")
243
 
244
  processed_raw_posts = []
245
  for p in raw_posts_api: