Spaces:
Running
Running
Update Linkedin_Data_API_Calls.py
Browse files- Linkedin_Data_API_Calls.py +94 -15
Linkedin_Data_API_Calls.py
CHANGED
@@ -122,45 +122,124 @@ def fetch_linkedin_posts_core(comm_client_id, community_token, org_urn, count):
|
|
122 |
|
123 |
stats_map = {}
|
124 |
if post_urns_for_stats:
|
125 |
-
batch_size_stats = 20
|
126 |
for i in range(0, len(post_urns_for_stats), batch_size_stats):
|
127 |
batch_urns = post_urns_for_stats[i:i+batch_size_stats]
|
128 |
params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
|
129 |
-
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
131 |
for urn_str in batch_urns:
|
132 |
if ":share:" in urn_str:
|
133 |
params[f"shares[{share_idx}]"] = urn_str
|
134 |
share_idx += 1
|
|
|
135 |
elif ":ugcPost:" in urn_str:
|
136 |
params[f"ugcPosts[{ugc_idx}]"] = urn_str
|
137 |
ugc_idx += 1
|
|
|
138 |
else:
|
139 |
logging.warning(f"URN {urn_str} is not a recognized share or ugcPost type for stats. Skipping.")
|
140 |
continue
|
141 |
|
142 |
-
|
|
|
|
|
143 |
continue
|
144 |
-
|
145 |
try:
|
146 |
-
|
|
|
|
|
|
|
147 |
stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
|
148 |
-
stat_resp.raise_for_status()
|
149 |
stats_data = stat_resp.json()
|
150 |
-
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
if stats_data.get("errors"):
|
154 |
for urn_errored, error_detail in stats_data.get("errors", {}).items():
|
155 |
-
logging.warning(f"
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
except requests.exceptions.RequestException as e:
|
|
|
159 |
status_code = getattr(e.response, 'status_code', 'N/A')
|
160 |
response_text = getattr(e.response, 'text', 'No response text')
|
161 |
-
logging.warning(f"
|
162 |
except json.JSONDecodeError as e:
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
processed_raw_posts = []
|
166 |
for p in raw_posts_api:
|
|
|
122 |
|
123 |
stats_map = {}
|
124 |
if post_urns_for_stats:
|
125 |
+
batch_size_stats = 20
|
126 |
for i in range(0, len(post_urns_for_stats), batch_size_stats):
|
127 |
batch_urns = post_urns_for_stats[i:i+batch_size_stats]
|
128 |
params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
|
129 |
+
|
130 |
+
share_idx = 0 # Index for share URNs in the current batch's params
|
131 |
+
ugc_idx = 0 # Index for ugcPost URNs in the current batch's params
|
132 |
+
|
133 |
+
# Keep track of URNs actually added to this batch's parameters for logging
|
134 |
+
urns_in_current_api_call = []
|
135 |
+
|
136 |
for urn_str in batch_urns:
|
137 |
if ":share:" in urn_str:
|
138 |
params[f"shares[{share_idx}]"] = urn_str
|
139 |
share_idx += 1
|
140 |
+
urns_in_current_api_call.append(urn_str)
|
141 |
elif ":ugcPost:" in urn_str:
|
142 |
params[f"ugcPosts[{ugc_idx}]"] = urn_str
|
143 |
ugc_idx += 1
|
144 |
+
urns_in_current_api_call.append(urn_str)
|
145 |
else:
|
146 |
logging.warning(f"URN {urn_str} is not a recognized share or ugcPost type for stats. Skipping.")
|
147 |
continue
|
148 |
|
149 |
+
# If no valid URNs were prepared for this batch, skip the API call
|
150 |
+
if not share_idx and not ugc_idx: # or check 'if not urns_in_current_api_call:'
|
151 |
+
logging.info(f"Skipping API call for an empty or invalid batch of URNs (original batch segment size: {len(batch_urns)}).")
|
152 |
continue
|
153 |
+
|
154 |
try:
|
155 |
+
# Log the URNs being sent in this specific API call
|
156 |
+
logging.info(f"Fetching stats for batch of {len(urns_in_current_api_call)} URNs. First URN in call: {urns_in_current_api_call[0] if urns_in_current_api_call else 'N/A'}")
|
157 |
+
|
158 |
+
# Actual API call
|
159 |
stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
|
160 |
+
stat_resp.raise_for_status() # Raises an HTTPError for bad responses (4XX or 5XX)
|
161 |
stats_data = stat_resp.json()
|
162 |
+
|
163 |
+
# --- Corrected Parsing Logic ---
|
164 |
+
# LinkedIn API for batch stats often returns an "elements" list.
|
165 |
+
elements_from_api = stats_data.get("elements")
|
166 |
+
|
167 |
+
if isinstance(elements_from_api, list):
|
168 |
+
if not elements_from_api:
|
169 |
+
logging.info(f"API returned 'elements' but it's an empty list for the URNs in this call.")
|
170 |
+
|
171 |
+
processed_urns_in_batch = 0
|
172 |
+
for item in elements_from_api:
|
173 |
+
urn_in_item = None
|
174 |
+
# Determine the URN key (e.g., 'share' or 'ugcPost')
|
175 |
+
if "share" in item:
|
176 |
+
urn_in_item = item.get("share")
|
177 |
+
elif "ugcPost" in item:
|
178 |
+
urn_in_item = item.get("ugcPost")
|
179 |
+
# Add other URN types if necessary, e.g., elif "article" in item: ...
|
180 |
+
|
181 |
+
if urn_in_item:
|
182 |
+
stats_values = item.get("totalShareStatistics", {})
|
183 |
+
if stats_values: # Only add if there are actual stats
|
184 |
+
stats_map[urn_in_item] = stats_values
|
185 |
+
processed_urns_in_batch +=1
|
186 |
+
else:
|
187 |
+
# It's possible an URN is returned without stats, or with empty stats
|
188 |
+
logging.debug(f"No 'totalShareStatistics' data found for URN: {urn_in_item} in API item: {item}")
|
189 |
+
stats_map[urn_in_item] = {} # Store empty stats if URN was processed but had no data
|
190 |
+
else:
|
191 |
+
logging.warning(f"Could not extract a recognized URN key from API element: {item}")
|
192 |
+
logging.info(f"Successfully processed {processed_urns_in_batch} URNs with stats from the API response for this batch. Current total stats_map size: {len(stats_map)}")
|
193 |
+
|
194 |
+
elif elements_from_api is None and "results" in stats_data:
|
195 |
+
# Fallback or alternative check if your API version *does* use "results"
|
196 |
+
# This was your original attempt. If "elements" is consistently missing,
|
197 |
+
# you might need to debug the exact structure of "results".
|
198 |
+
logging.warning(f"API response does not contain 'elements' key, but 'results' key is present. Attempting to parse 'results'. Response keys: {stats_data.keys()}")
|
199 |
+
results_dict = stats_data.get("results", {})
|
200 |
+
if isinstance(results_dict, dict):
|
201 |
+
for urn_key, stat_element_values in results_dict.items():
|
202 |
+
stats_map[urn_key] = stat_element_values.get("totalShareStatistics", {})
|
203 |
+
logging.info(f"Processed stats from 'results' dictionary. Current stats_map size: {len(stats_map)}")
|
204 |
+
else:
|
205 |
+
logging.error(f"'results' key found but is not a dictionary. Type: {type(results_dict)}")
|
206 |
+
|
207 |
+
else:
|
208 |
+
# Neither "elements" (as list) nor "results" (as dict) found as expected
|
209 |
+
logging.error(f"API response structure not recognized. Expected 'elements' (list) or 'results' (dict). Got keys: {stats_data.keys()}. Full response sample: {str(stats_data)[:500]}")
|
210 |
+
|
211 |
+
# --- End Corrected Parsing Logic ---
|
212 |
+
|
213 |
+
# Check for specific errors reported by the API within the JSON response
|
214 |
if stats_data.get("errors"):
|
215 |
for urn_errored, error_detail in stats_data.get("errors", {}).items():
|
216 |
+
logging.warning(f"API reported error for URN {urn_errored}: {error_detail.get('message', 'Unknown API error message')}")
|
217 |
+
|
218 |
+
# This log might be slightly misleading if parsing failed but no exception occurred.
|
219 |
+
# The more specific log after parsing 'elements' is better.
|
220 |
+
# logging.info(f"Successfully processed stats response for {len(urns_in_current_api_call)} URNs. Current stats_map size: {len(stats_map)}")
|
221 |
+
|
222 |
+
|
223 |
+
except requests.exceptions.HTTPError as e:
|
224 |
+
# Specific handling for HTTP errors (4xx, 5xx)
|
225 |
+
status_code = e.response.status_code
|
226 |
+
response_text = e.response.text
|
227 |
+
logging.warning(f"HTTP error fetching stats for a batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text[:500]}") # Log first 500 chars of response
|
228 |
except requests.exceptions.RequestException as e:
|
229 |
+
# Catch other requests-related errors (e.g., connection issues)
|
230 |
status_code = getattr(e.response, 'status_code', 'N/A')
|
231 |
response_text = getattr(e.response, 'text', 'No response text')
|
232 |
+
logging.warning(f"Request failed for stats batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text[:500]}")
|
233 |
except json.JSONDecodeError as e:
|
234 |
+
# Handle cases where the response is not valid JSON
|
235 |
+
response_text_for_json_error = stat_resp.text if 'stat_resp' in locals() and hasattr(stat_resp, 'text') else 'Response object not available or no text attribute'
|
236 |
+
logging.warning(f"Failed to decode JSON from stats response: {e}. Response text: {response_text_for_json_error[:500]}") # Log first 500 chars
|
237 |
+
except Exception as e:
|
238 |
+
# Catch any other unexpected errors during the batch processing
|
239 |
+
logging.error(f"An unexpected error occurred processing stats batch: {e}", exc_info=True)
|
240 |
+
|
241 |
+
|
242 |
+
logging.info(f"Finished processing all URN batches. Final stats_map size: {len(stats_map)}")
|
243 |
|
244 |
processed_raw_posts = []
|
245 |
for p in raw_posts_api:
|