Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on May 16

Commit

2c911b1

verified ·

1 Parent(s): a3a5c36

Update Linkedin_Data_API_Calls.py

Browse files

Files changed (1) hide show

Linkedin_Data_API_Calls.py +94 -15

Linkedin_Data_API_Calls.py CHANGED Viewed

@@ -122,45 +122,124 @@ def fetch_linkedin_posts_core(comm_client_id, community_token, org_urn, count):
     stats_map = {}
     if post_urns_for_stats:
-        batch_size_stats = 20
         for i in range(0, len(post_urns_for_stats), batch_size_stats):
             batch_urns = post_urns_for_stats[i:i+batch_size_stats]
             params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
-            share_idx = 0
-            ugc_idx = 0
             for urn_str in batch_urns:
                 if ":share:" in urn_str:
                     params[f"shares[{share_idx}]"] = urn_str
                     share_idx += 1
                 elif ":ugcPost:" in urn_str:
                     params[f"ugcPosts[{ugc_idx}]"] = urn_str
                     ugc_idx += 1
                 else:
                     logging.warning(f"URN {urn_str} is not a recognized share or ugcPost type for stats. Skipping.")
                     continue
-            if not share_idx and not ugc_idx:
                 continue
             try:
-                logging.info(f"Fetching stats for batch of {len(batch_urns)} URNs starting with URN: {batch_urns[0]}")
                 stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
-                stat_resp.raise_for_status()
                 stats_data = stat_resp.json()
-                for urn_key, stat_element_values in stats_data.get("results", {}).items():
-                    stats_map[urn_key] = stat_element_values.get("totalShareStatistics", {})
                 if stats_data.get("errors"):
                     for urn_errored, error_detail in stats_data.get("errors", {}).items():
-                        logging.warning(f"Error fetching stats for URN {urn_errored}: {error_detail.get('message', 'Unknown error')}")
-                logging.info(f"Successfully processed stats response for {len(batch_urns)} URNs. Current stats_map size: {len(stats_map)}")
             except requests.exceptions.RequestException as e:
                 status_code = getattr(e.response, 'status_code', 'N/A')
                 response_text = getattr(e.response, 'text', 'No response text')
-                logging.warning(f"Failed to fetch stats for a batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text}")
             except json.JSONDecodeError as e:
-                logging.warning(f"Failed to decode JSON from stats response: {e}. Response: {stat_resp.text if stat_resp else 'No response text'}")
     processed_raw_posts = []
     for p in raw_posts_api:

     stats_map = {}
     if post_urns_for_stats:
+        batch_size_stats = 20
         for i in range(0, len(post_urns_for_stats), batch_size_stats):
             batch_urns = post_urns_for_stats[i:i+batch_size_stats]
             params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
+            share_idx = 0 # Index for share URNs in the current batch's params
+            ugc_idx = 0   # Index for ugcPost URNs in the current batch's params
+            # Keep track of URNs actually added to this batch's parameters for logging
+            urns_in_current_api_call = []
             for urn_str in batch_urns:
                 if ":share:" in urn_str:
                     params[f"shares[{share_idx}]"] = urn_str
                     share_idx += 1
+                    urns_in_current_api_call.append(urn_str)
                 elif ":ugcPost:" in urn_str:
                     params[f"ugcPosts[{ugc_idx}]"] = urn_str
                     ugc_idx += 1
+                    urns_in_current_api_call.append(urn_str)
                 else:
                     logging.warning(f"URN {urn_str} is not a recognized share or ugcPost type for stats. Skipping.")
                     continue
+            # If no valid URNs were prepared for this batch, skip the API call
+            if not share_idx and not ugc_idx: # or check 'if not urns_in_current_api_call:'
+                logging.info(f"Skipping API call for an empty or invalid batch of URNs (original batch segment size: {len(batch_urns)}).")
                 continue
             try:
+                # Log the URNs being sent in this specific API call
+                logging.info(f"Fetching stats for batch of {len(urns_in_current_api_call)} URNs. First URN in call: {urns_in_current_api_call[0] if urns_in_current_api_call else 'N/A'}")
+                # Actual API call
                 stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
+                stat_resp.raise_for_status() # Raises an HTTPError for bad responses (4XX or 5XX)
                 stats_data = stat_resp.json()
+                # --- Corrected Parsing Logic ---
+                # LinkedIn API for batch stats often returns an "elements" list.
+                elements_from_api = stats_data.get("elements")
+                if isinstance(elements_from_api, list):
+                    if not elements_from_api:
+                        logging.info(f"API returned 'elements' but it's an empty list for the URNs in this call.")
+                    processed_urns_in_batch = 0
+                    for item in elements_from_api:
+                        urn_in_item = None
+                        # Determine the URN key (e.g., 'share' or 'ugcPost')
+                        if "share" in item:
+                            urn_in_item = item.get("share")
+                        elif "ugcPost" in item:
+                            urn_in_item = item.get("ugcPost")
+                        # Add other URN types if necessary, e.g., elif "article" in item: ...
+                        if urn_in_item:
+                            stats_values = item.get("totalShareStatistics", {})
+                            if stats_values: # Only add if there are actual stats
+                                stats_map[urn_in_item] = stats_values
+                                processed_urns_in_batch +=1
+                            else:
+                                # It's possible an URN is returned without stats, or with empty stats
+                                logging.debug(f"No 'totalShareStatistics' data found for URN: {urn_in_item} in API item: {item}")
+                                stats_map[urn_in_item] = {} # Store empty stats if URN was processed but had no data
+                        else:
+                            logging.warning(f"Could not extract a recognized URN key from API element: {item}")
+                    logging.info(f"Successfully processed {processed_urns_in_batch} URNs with stats from the API response for this batch. Current total stats_map size: {len(stats_map)}")
+                elif elements_from_api is None and "results" in stats_data:
+                    # Fallback or alternative check if your API version *does* use "results"
+                    # This was your original attempt. If "elements" is consistently missing,
+                    # you might need to debug the exact structure of "results".
+                    logging.warning(f"API response does not contain 'elements' key, but 'results' key is present. Attempting to parse 'results'. Response keys: {stats_data.keys()}")
+                    results_dict = stats_data.get("results", {})
+                    if isinstance(results_dict, dict):
+                        for urn_key, stat_element_values in results_dict.items():
+                            stats_map[urn_key] = stat_element_values.get("totalShareStatistics", {})
+                        logging.info(f"Processed stats from 'results' dictionary. Current stats_map size: {len(stats_map)}")
+                    else:
+                        logging.error(f"'results' key found but is not a dictionary. Type: {type(results_dict)}")
+                else:
+                    # Neither "elements" (as list) nor "results" (as dict) found as expected
+                    logging.error(f"API response structure not recognized. Expected 'elements' (list) or 'results' (dict). Got keys: {stats_data.keys()}. Full response sample: {str(stats_data)[:500]}")
+                # --- End Corrected Parsing Logic ---
+                # Check for specific errors reported by the API within the JSON response
                 if stats_data.get("errors"):
                     for urn_errored, error_detail in stats_data.get("errors", {}).items():
+                        logging.warning(f"API reported error for URN {urn_errored}: {error_detail.get('message', 'Unknown API error message')}")
+                # This log might be slightly misleading if parsing failed but no exception occurred.
+                # The more specific log after parsing 'elements' is better.
+                # logging.info(f"Successfully processed stats response for {len(urns_in_current_api_call)} URNs. Current stats_map size: {len(stats_map)}")
+            except requests.exceptions.HTTPError as e:
+                # Specific handling for HTTP errors (4xx, 5xx)
+                status_code = e.response.status_code
+                response_text = e.response.text
+                logging.warning(f"HTTP error fetching stats for a batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text[:500]}") # Log first 500 chars of response
             except requests.exceptions.RequestException as e:
+                # Catch other requests-related errors (e.g., connection issues)
                 status_code = getattr(e.response, 'status_code', 'N/A')
                 response_text = getattr(e.response, 'text', 'No response text')
+                logging.warning(f"Request failed for stats batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text[:500]}")
             except json.JSONDecodeError as e:
+                # Handle cases where the response is not valid JSON
+                response_text_for_json_error = stat_resp.text if 'stat_resp' in locals() and hasattr(stat_resp, 'text') else 'Response object not available or no text attribute'
+                logging.warning(f"Failed to decode JSON from stats response: {e}. Response text: {response_text_for_json_error[:500]}") # Log first 500 chars
+            except Exception as e:
+                # Catch any other unexpected errors during the batch processing
+                logging.error(f"An unexpected error occurred processing stats batch: {e}", exc_info=True)
+        logging.info(f"Finished processing all URN batches. Final stats_map size: {len(stats_map)}")
     processed_raw_posts = []
     for p in raw_posts_api: