Spaces:
Sleeping
Sleeping
Update Linkedin_Data_API_Calls.py
Browse files- Linkedin_Data_API_Calls.py +94 -15
Linkedin_Data_API_Calls.py
CHANGED
|
@@ -122,45 +122,124 @@ def fetch_linkedin_posts_core(comm_client_id, community_token, org_urn, count):
|
|
| 122 |
|
| 123 |
stats_map = {}
|
| 124 |
if post_urns_for_stats:
|
| 125 |
-
batch_size_stats = 20
|
| 126 |
for i in range(0, len(post_urns_for_stats), batch_size_stats):
|
| 127 |
batch_urns = post_urns_for_stats[i:i+batch_size_stats]
|
| 128 |
params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
for urn_str in batch_urns:
|
| 132 |
if ":share:" in urn_str:
|
| 133 |
params[f"shares[{share_idx}]"] = urn_str
|
| 134 |
share_idx += 1
|
|
|
|
| 135 |
elif ":ugcPost:" in urn_str:
|
| 136 |
params[f"ugcPosts[{ugc_idx}]"] = urn_str
|
| 137 |
ugc_idx += 1
|
|
|
|
| 138 |
else:
|
| 139 |
logging.warning(f"URN {urn_str} is not a recognized share or ugcPost type for stats. Skipping.")
|
| 140 |
continue
|
| 141 |
|
| 142 |
-
|
|
|
|
|
|
|
| 143 |
continue
|
| 144 |
-
|
| 145 |
try:
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
| 147 |
stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
|
| 148 |
-
stat_resp.raise_for_status()
|
| 149 |
stats_data = stat_resp.json()
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
if stats_data.get("errors"):
|
| 154 |
for urn_errored, error_detail in stats_data.get("errors", {}).items():
|
| 155 |
-
logging.warning(f"
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
except requests.exceptions.RequestException as e:
|
|
|
|
| 159 |
status_code = getattr(e.response, 'status_code', 'N/A')
|
| 160 |
response_text = getattr(e.response, 'text', 'No response text')
|
| 161 |
-
logging.warning(f"
|
| 162 |
except json.JSONDecodeError as e:
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
processed_raw_posts = []
|
| 166 |
for p in raw_posts_api:
|
|
|
|
| 122 |
|
| 123 |
stats_map = {}
|
| 124 |
if post_urns_for_stats:
|
| 125 |
+
batch_size_stats = 20
|
| 126 |
for i in range(0, len(post_urns_for_stats), batch_size_stats):
|
| 127 |
batch_urns = post_urns_for_stats[i:i+batch_size_stats]
|
| 128 |
params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
|
| 129 |
+
|
| 130 |
+
share_idx = 0 # Index for share URNs in the current batch's params
|
| 131 |
+
ugc_idx = 0 # Index for ugcPost URNs in the current batch's params
|
| 132 |
+
|
| 133 |
+
# Keep track of URNs actually added to this batch's parameters for logging
|
| 134 |
+
urns_in_current_api_call = []
|
| 135 |
+
|
| 136 |
for urn_str in batch_urns:
|
| 137 |
if ":share:" in urn_str:
|
| 138 |
params[f"shares[{share_idx}]"] = urn_str
|
| 139 |
share_idx += 1
|
| 140 |
+
urns_in_current_api_call.append(urn_str)
|
| 141 |
elif ":ugcPost:" in urn_str:
|
| 142 |
params[f"ugcPosts[{ugc_idx}]"] = urn_str
|
| 143 |
ugc_idx += 1
|
| 144 |
+
urns_in_current_api_call.append(urn_str)
|
| 145 |
else:
|
| 146 |
logging.warning(f"URN {urn_str} is not a recognized share or ugcPost type for stats. Skipping.")
|
| 147 |
continue
|
| 148 |
|
| 149 |
+
# If no valid URNs were prepared for this batch, skip the API call
|
| 150 |
+
if not share_idx and not ugc_idx: # or check 'if not urns_in_current_api_call:'
|
| 151 |
+
logging.info(f"Skipping API call for an empty or invalid batch of URNs (original batch segment size: {len(batch_urns)}).")
|
| 152 |
continue
|
| 153 |
+
|
| 154 |
try:
|
| 155 |
+
# Log the URNs being sent in this specific API call
|
| 156 |
+
logging.info(f"Fetching stats for batch of {len(urns_in_current_api_call)} URNs. First URN in call: {urns_in_current_api_call[0] if urns_in_current_api_call else 'N/A'}")
|
| 157 |
+
|
| 158 |
+
# Actual API call
|
| 159 |
stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
|
| 160 |
+
stat_resp.raise_for_status() # Raises an HTTPError for bad responses (4XX or 5XX)
|
| 161 |
stats_data = stat_resp.json()
|
| 162 |
+
|
| 163 |
+
# --- Corrected Parsing Logic ---
|
| 164 |
+
# LinkedIn API for batch stats often returns an "elements" list.
|
| 165 |
+
elements_from_api = stats_data.get("elements")
|
| 166 |
+
|
| 167 |
+
if isinstance(elements_from_api, list):
|
| 168 |
+
if not elements_from_api:
|
| 169 |
+
logging.info(f"API returned 'elements' but it's an empty list for the URNs in this call.")
|
| 170 |
+
|
| 171 |
+
processed_urns_in_batch = 0
|
| 172 |
+
for item in elements_from_api:
|
| 173 |
+
urn_in_item = None
|
| 174 |
+
# Determine the URN key (e.g., 'share' or 'ugcPost')
|
| 175 |
+
if "share" in item:
|
| 176 |
+
urn_in_item = item.get("share")
|
| 177 |
+
elif "ugcPost" in item:
|
| 178 |
+
urn_in_item = item.get("ugcPost")
|
| 179 |
+
# Add other URN types if necessary, e.g., elif "article" in item: ...
|
| 180 |
+
|
| 181 |
+
if urn_in_item:
|
| 182 |
+
stats_values = item.get("totalShareStatistics", {})
|
| 183 |
+
if stats_values: # Only add if there are actual stats
|
| 184 |
+
stats_map[urn_in_item] = stats_values
|
| 185 |
+
processed_urns_in_batch +=1
|
| 186 |
+
else:
|
| 187 |
+
# It's possible an URN is returned without stats, or with empty stats
|
| 188 |
+
logging.debug(f"No 'totalShareStatistics' data found for URN: {urn_in_item} in API item: {item}")
|
| 189 |
+
stats_map[urn_in_item] = {} # Store empty stats if URN was processed but had no data
|
| 190 |
+
else:
|
| 191 |
+
logging.warning(f"Could not extract a recognized URN key from API element: {item}")
|
| 192 |
+
logging.info(f"Successfully processed {processed_urns_in_batch} URNs with stats from the API response for this batch. Current total stats_map size: {len(stats_map)}")
|
| 193 |
+
|
| 194 |
+
elif elements_from_api is None and "results" in stats_data:
|
| 195 |
+
# Fallback or alternative check if your API version *does* use "results"
|
| 196 |
+
# This was your original attempt. If "elements" is consistently missing,
|
| 197 |
+
# you might need to debug the exact structure of "results".
|
| 198 |
+
logging.warning(f"API response does not contain 'elements' key, but 'results' key is present. Attempting to parse 'results'. Response keys: {stats_data.keys()}")
|
| 199 |
+
results_dict = stats_data.get("results", {})
|
| 200 |
+
if isinstance(results_dict, dict):
|
| 201 |
+
for urn_key, stat_element_values in results_dict.items():
|
| 202 |
+
stats_map[urn_key] = stat_element_values.get("totalShareStatistics", {})
|
| 203 |
+
logging.info(f"Processed stats from 'results' dictionary. Current stats_map size: {len(stats_map)}")
|
| 204 |
+
else:
|
| 205 |
+
logging.error(f"'results' key found but is not a dictionary. Type: {type(results_dict)}")
|
| 206 |
+
|
| 207 |
+
else:
|
| 208 |
+
# Neither "elements" (as list) nor "results" (as dict) found as expected
|
| 209 |
+
logging.error(f"API response structure not recognized. Expected 'elements' (list) or 'results' (dict). Got keys: {stats_data.keys()}. Full response sample: {str(stats_data)[:500]}")
|
| 210 |
+
|
| 211 |
+
# --- End Corrected Parsing Logic ---
|
| 212 |
+
|
| 213 |
+
# Check for specific errors reported by the API within the JSON response
|
| 214 |
if stats_data.get("errors"):
|
| 215 |
for urn_errored, error_detail in stats_data.get("errors", {}).items():
|
| 216 |
+
logging.warning(f"API reported error for URN {urn_errored}: {error_detail.get('message', 'Unknown API error message')}")
|
| 217 |
+
|
| 218 |
+
# This log might be slightly misleading if parsing failed but no exception occurred.
|
| 219 |
+
# The more specific log after parsing 'elements' is better.
|
| 220 |
+
# logging.info(f"Successfully processed stats response for {len(urns_in_current_api_call)} URNs. Current stats_map size: {len(stats_map)}")
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
except requests.exceptions.HTTPError as e:
|
| 224 |
+
# Specific handling for HTTP errors (4xx, 5xx)
|
| 225 |
+
status_code = e.response.status_code
|
| 226 |
+
response_text = e.response.text
|
| 227 |
+
logging.warning(f"HTTP error fetching stats for a batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text[:500]}") # Log first 500 chars of response
|
| 228 |
except requests.exceptions.RequestException as e:
|
| 229 |
+
# Catch other requests-related errors (e.g., connection issues)
|
| 230 |
status_code = getattr(e.response, 'status_code', 'N/A')
|
| 231 |
response_text = getattr(e.response, 'text', 'No response text')
|
| 232 |
+
logging.warning(f"Request failed for stats batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text[:500]}")
|
| 233 |
except json.JSONDecodeError as e:
|
| 234 |
+
# Handle cases where the response is not valid JSON
|
| 235 |
+
response_text_for_json_error = stat_resp.text if 'stat_resp' in locals() and hasattr(stat_resp, 'text') else 'Response object not available or no text attribute'
|
| 236 |
+
logging.warning(f"Failed to decode JSON from stats response: {e}. Response text: {response_text_for_json_error[:500]}") # Log first 500 chars
|
| 237 |
+
except Exception as e:
|
| 238 |
+
# Catch any other unexpected errors during the batch processing
|
| 239 |
+
logging.error(f"An unexpected error occurred processing stats batch: {e}", exc_info=True)
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
logging.info(f"Finished processing all URN batches. Final stats_map size: {len(stats_map)}")
|
| 243 |
|
| 244 |
processed_raw_posts = []
|
| 245 |
for p in raw_posts_api:
|