GuglielmoTor commited on
Commit
543fdff
·
verified ·
1 Parent(s): 517193e

Update linkedin_follower_stats.py

Browse files
Files changed (1) hide show
  1. linkedin_follower_stats.py +52 -45
linkedin_follower_stats.py CHANGED
@@ -27,20 +27,20 @@ def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path,
27
  """
28
  mapping = {}
29
  try:
30
- logging.debug(f"Fetching names from URL: {url} with params: {params}")
31
  response = session.get(url, params=params)
32
  response.raise_for_status()
33
  data = response.json()
34
 
35
  items = data
36
- for key in result_key_path: # Navigate to the list/dict of items
37
  if isinstance(items, dict):
38
  items = items.get(key, [])
39
  else:
40
  logging.warning(f"Expected dict to get key '{key}' but got {type(items)} at path {result_key_path} for URL {url}. Check result_key_path.")
41
  return mapping
42
 
43
- if isinstance(items, dict): # For batch responses like geo/industry (where keys are IDs)
44
  for item_id_str, item_data in items.items():
45
  name = item_data
46
  for key_nav in name_key_path:
@@ -53,7 +53,7 @@ def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path,
53
  mapping[item_id_str] = name
54
  else:
55
  logging.warning(f"No name found for ID {item_id_str} at path {name_key_path} in item: {item_data} from URL {url}")
56
- elif isinstance(items, list): # For list responses like functions/seniorities
57
  for item in items:
58
  item_id_val = item.get(id_key)
59
  name = item
@@ -72,7 +72,7 @@ def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path,
72
 
73
  except requests.exceptions.RequestException as e:
74
  status_code = getattr(e.response, 'status_code', 'N/A')
75
- error_text = getattr(e.response, 'text', str(e))
76
  logging.error(f"Error fetching names from {url} (Status: {status_code}): {error_text}")
77
  except json.JSONDecodeError as e:
78
  logging.error(f"Error decoding JSON for names from {url}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
@@ -83,16 +83,14 @@ def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path,
83
  def get_functions_map(session):
84
  """Fetches all LinkedIn functions and returns a map of {id: name}."""
85
  url = f"{API_V2_BASE}/functions"
86
- # Rely on Accept-Language header from session for localization
87
- params = {}
88
  logging.info("Fetching all LinkedIn functions.")
89
  return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
90
 
91
  def get_seniorities_map(session):
92
  """Fetches all LinkedIn seniorities and returns a map of {id: name}."""
93
  url = f"{API_V2_BASE}/seniorities"
94
- # Rely on Accept-Language header from session for localization
95
- params = {}
96
  logging.info("Fetching all LinkedIn seniorities.")
97
  return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
98
 
@@ -104,8 +102,13 @@ def get_industries_map(session, industry_urns, version="DEFAULT"):
104
  if not unique_ids: return {}
105
 
106
  url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
107
- params = {'ids': unique_ids, 'locale': 'en_US'} # Corrected locale parameter
108
- logging.info(f"Fetching names for {len(unique_ids)} unique industry IDs.")
 
 
 
 
 
109
  return _fetch_linkedin_names(session, url, params, ["results"], ["name", "localized", "en_US"])
110
 
111
 
@@ -116,9 +119,14 @@ def get_geo_map(session, geo_urns):
116
  unique_ids = list(set(filter(None, geo_ids)))
117
  if not unique_ids: return {}
118
 
119
- ids_param_value = "List(" + ",".join(map(str,unique_ids)) + ")"
120
- locale_param = "en_US" # Corrected locale parameter
121
- url = f"{API_V2_BASE}/geo?ids={quote(ids_param_value)}&locale={locale_param}"
 
 
 
 
 
122
  logging.info(f"Fetching names for {len(unique_ids)} unique geo IDs using URL: {url}")
123
  return _fetch_linkedin_names(session, url, {}, ["results"], ["defaultLocalizedName", "value"])
124
 
@@ -142,24 +150,29 @@ def _parse_urn_to_id(urn_string):
142
  def fetch_monthly_follower_gains(session, org_urn):
143
  """
144
  Fetches monthly follower gains for the last 12-13 months to ensure full coverage.
 
145
  """
146
  results = []
147
  now = datetime.now(timezone.utc)
 
 
148
  thirteen_months_ago = now - relativedelta(months=13)
149
  start_of_period = thirteen_months_ago.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
150
  start_ms = int(start_of_period.timestamp() * 1000)
151
 
152
- url = (
153
- f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
154
- f"?q=organizationalEntity"
155
- f"&organizationalEntity={quote(org_urn)}"
156
- f"&timeGranularity=MONTH" # Corrected parameter name
157
- f"&startTime={start_ms}" # Corrected parameter name
158
- )
159
- logging.info(f"Fetching monthly follower gains from: {url}")
 
 
160
 
161
  try:
162
- response = session.get(url)
163
  response.raise_for_status()
164
  data = response.json()
165
 
@@ -171,7 +184,7 @@ def fetch_monthly_follower_gains(session, org_urn):
171
  continue
172
 
173
  date_obj = datetime.fromtimestamp(start_timestamp_ms / 1000, tz=timezone.utc)
174
- date_str = date_obj.strftime('%Y-%m-%d')
175
 
176
  follower_gains = item.get("followerGains", {})
177
  organic_gain = follower_gains.get("organicFollowerGain", 0)
@@ -201,14 +214,17 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
201
  Fetches current follower demographics, applying Top-N for specified categories.
202
  """
203
  final_demographics_results = []
204
- url = (
205
- f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
206
- f"?q=organizationalEntity&organizationalEntity={quote(org_urn)}"
207
- )
208
- logging.info(f"Fetching follower demographics from: {url} for org URN {org_urn}")
 
 
 
209
 
210
  try:
211
- response = session.get(url)
212
  response.raise_for_status()
213
  data = response.json()
214
 
@@ -217,9 +233,8 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
217
  logging.warning(f"No elements found in follower demographics response for {org_urn}.")
218
  return []
219
 
220
- stat_element = elements[0] # Data is usually in the first element
221
 
222
- # Helper to convert raw API items for a single demographic type into our structured list
223
  def _get_entries_for_type(raw_items_list, type_name, id_map, id_field_name_in_item, org_urn_val):
224
  current_type_entries = []
225
  if not raw_items_list:
@@ -228,13 +243,11 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
228
 
229
  for item in raw_items_list:
230
  category_name_val = "Unknown"
231
- # For associationType, the id_field_name_in_item is the direct name
232
  if type_name == "follower_association":
233
  category_name_val = item.get(id_field_name_in_item, f"Unknown {id_field_name_in_item}")
234
- else: # For URN-based categories
235
  urn_val = item.get(id_field_name_in_item)
236
  entity_id = _parse_urn_to_id(urn_val)
237
- # Use str(entity_id) for map lookup as map keys were stored as strings
238
  category_name_val = id_map.get(str(entity_id), f"Unknown {type_name.split('_')[-1].capitalize()} (ID: {entity_id if entity_id else urn_val})")
239
 
240
  counts = item.get("followerCounts", {})
@@ -250,7 +263,6 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
250
  })
251
  return current_type_entries
252
 
253
- # Fetch live maps for industries and geo as they depend on URNs from the current API response
254
  industry_urns_to_map = [item.get("industry") for item in stat_element.get("followerCountsByIndustry", []) if item.get("industry")]
255
  geo_urns_to_map = [item.get("geo") for item in stat_element.get("followerCountsByGeoCountry", []) if item.get("geo")]
256
 
@@ -262,7 +274,7 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
262
  {"items_key": "followerCountsByFunction", "type_name": "follower_function", "id_map": functions_map, "id_field": "function", "top_n": 10},
263
  {"items_key": "followerCountsByIndustry", "type_name": "follower_industry", "id_map": live_industries_map, "id_field": "industry", "top_n": 10},
264
  {"items_key": "followerCountsByGeoCountry", "type_name": "follower_geo", "id_map": live_geo_map, "id_field": "geo", "top_n": 10},
265
- {"items_key": "followerCountsByAssociationType", "type_name": "follower_association", "id_map": {}, "id_field": "associationType", "top_n": None} # Keep all associations
266
  ]
267
 
268
  for config in demographic_configs:
@@ -270,15 +282,14 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
270
  processed_entries = _get_entries_for_type(raw_items, config["type_name"], config["id_map"], config["id_field"], org_urn)
271
 
272
  if config["top_n"] is not None and processed_entries:
273
- # Sort by organic follower count (ensure it's numeric)
274
- for entry in processed_entries: # Ensure numeric for sorting
275
  if not isinstance(entry.get("follower_count_organic"), (int, float)):
276
  entry["follower_count_organic"] = 0
277
  sorted_entries = sorted(processed_entries, key=lambda x: x.get("follower_count_organic", 0), reverse=True)
278
  final_demographics_results.extend(sorted_entries[:config["top_n"]])
279
  logging.debug(f"Added top {config['top_n']} for {config['type_name']}. Count: {len(sorted_entries[:config['top_n']])}")
280
  else:
281
- final_demographics_results.extend(processed_entries) # Add all if top_n is None or no entries
282
  logging.debug(f"Added all for {config['type_name']}. Count: {len(processed_entries)}")
283
 
284
  logging.info(f"Processed follower demographics for {org_urn}. Total entries from all types: {len(final_demographics_results)}")
@@ -312,7 +323,7 @@ def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
312
  session.headers.update({
313
  "X-Restli-Protocol-Version": "2.0.0",
314
  "LinkedIn-Version": LINKEDIN_API_VERSION,
315
- # "Accept-Language": "en_US" # Consider adding if not set by create_session and locale issues persist for v2 name lookups
316
  })
317
  except Exception as e:
318
  logging.error(f"Failed to create session or update headers for org {org_urn}: {e}", exc_info=True)
@@ -320,9 +331,6 @@ def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
320
 
321
  logging.info(f"Starting follower stats retrieval for org: {org_urn}")
322
 
323
- # These maps are fetched once per call to get_linkedin_follower_stats
324
- # For industries and geo, the maps will be fetched live within fetch_follower_demographics
325
- # as they depend on URNs from the API response itself.
326
  functions_map = get_functions_map(session)
327
  seniorities_map = get_seniorities_map(session)
328
 
@@ -334,7 +342,6 @@ def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
334
  monthly_gains = fetch_monthly_follower_gains(session, org_urn)
335
  all_follower_data.extend(monthly_gains)
336
 
337
- # Pass pre-fetched function and seniority maps. Industry and Geo maps are fetched inside.
338
  demographics = fetch_follower_demographics(session, org_urn, functions_map, seniorities_map)
339
  all_follower_data.extend(demographics)
340
 
 
27
  """
28
  mapping = {}
29
  try:
30
+ logging.debug(f"Fetching names from URL: {url} with params: {json.dumps(params)}") # Log params for clarity
31
  response = session.get(url, params=params)
32
  response.raise_for_status()
33
  data = response.json()
34
 
35
  items = data
36
+ for key in result_key_path:
37
  if isinstance(items, dict):
38
  items = items.get(key, [])
39
  else:
40
  logging.warning(f"Expected dict to get key '{key}' but got {type(items)} at path {result_key_path} for URL {url}. Check result_key_path.")
41
  return mapping
42
 
43
+ if isinstance(items, dict):
44
  for item_id_str, item_data in items.items():
45
  name = item_data
46
  for key_nav in name_key_path:
 
53
  mapping[item_id_str] = name
54
  else:
55
  logging.warning(f"No name found for ID {item_id_str} at path {name_key_path} in item: {item_data} from URL {url}")
56
+ elif isinstance(items, list):
57
  for item in items:
58
  item_id_val = item.get(id_key)
59
  name = item
 
72
 
73
  except requests.exceptions.RequestException as e:
74
  status_code = getattr(e.response, 'status_code', 'N/A')
75
+ error_text = getattr(e.response, 'text', str(e)) # Log the raw error text
76
  logging.error(f"Error fetching names from {url} (Status: {status_code}): {error_text}")
77
  except json.JSONDecodeError as e:
78
  logging.error(f"Error decoding JSON for names from {url}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
 
83
  def get_functions_map(session):
84
  """Fetches all LinkedIn functions and returns a map of {id: name}."""
85
  url = f"{API_V2_BASE}/functions"
86
+ params = {} # Relies on Accept-Language header from session
 
87
  logging.info("Fetching all LinkedIn functions.")
88
  return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
89
 
90
  def get_seniorities_map(session):
91
  """Fetches all LinkedIn seniorities and returns a map of {id: name}."""
92
  url = f"{API_V2_BASE}/seniorities"
93
+ params = {} # Relies on Accept-Language header from session
 
94
  logging.info("Fetching all LinkedIn seniorities.")
95
  return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
96
 
 
102
  if not unique_ids: return {}
103
 
104
  url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
105
+ # As per LinkedIn docs for BATCH_GET: ids={id1}&ids={id2}&locale.language=en&locale.country=US
106
+ params = {
107
+ 'ids': unique_ids, # requests library will format this as ids=id1&ids=id2...
108
+ 'locale.language': 'en',
109
+ 'locale.country': 'US'
110
+ }
111
+ logging.info(f"Fetching names for {len(unique_ids)} unique industry IDs using BATCH_GET.")
112
  return _fetch_linkedin_names(session, url, params, ["results"], ["name", "localized", "en_US"])
113
 
114
 
 
119
  unique_ids = list(set(filter(None, geo_ids)))
120
  if not unique_ids: return {}
121
 
122
+ # As per LinkedIn docs for BATCH_GET: ids=List(12345,23456)&locale=(language:en,country:US)
123
+ ids_param_string = "List(" + ",".join(map(str, unique_ids)) + ")"
124
+ locale_param_string = "(language:en,country:US)" # Must be exactly this string format
125
+
126
+ # Parameters must be passed in the URL string directly for this specific API format
127
+ # The `params` dict for session.get() will be empty.
128
+ url = f"{API_V2_BASE}/geo?ids={quote(ids_param_string)}&locale={quote(locale_param_string)}"
129
+
130
  logging.info(f"Fetching names for {len(unique_ids)} unique geo IDs using URL: {url}")
131
  return _fetch_linkedin_names(session, url, {}, ["results"], ["defaultLocalizedName", "value"])
132
 
 
150
  def fetch_monthly_follower_gains(session, org_urn):
151
  """
152
  Fetches monthly follower gains for the last 12-13 months to ensure full coverage.
153
+ Uses parameter names as confirmed by user's working script.
154
  """
155
  results = []
156
  now = datetime.now(timezone.utc)
157
+ # Go back 13 months to ensure we capture at least 12 full previous months
158
+ # and have a buffer, as LinkedIn might report based on full previous months.
159
  thirteen_months_ago = now - relativedelta(months=13)
160
  start_of_period = thirteen_months_ago.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
161
  start_ms = int(start_of_period.timestamp() * 1000)
162
 
163
+ # Parameters as per user's working script and common LinkedIn patterns for time-bound stats
164
+ params = {
165
+ 'q': 'organizationalEntity',
166
+ 'organizationalEntity': org_urn,
167
+ 'timeIntervals.timeGranularityType': 'MONTH',
168
+ 'timeIntervals.timeRange.start': start_ms
169
+ }
170
+ url = f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
171
+
172
+ logging.info(f"Fetching monthly follower gains from: {url} with params: {json.dumps(params)}")
173
 
174
  try:
175
+ response = session.get(url, params=params)
176
  response.raise_for_status()
177
  data = response.json()
178
 
 
184
  continue
185
 
186
  date_obj = datetime.fromtimestamp(start_timestamp_ms / 1000, tz=timezone.utc)
187
+ date_str = date_obj.strftime('%Y-%m-%d') # First day of the month
188
 
189
  follower_gains = item.get("followerGains", {})
190
  organic_gain = follower_gains.get("organicFollowerGain", 0)
 
214
  Fetches current follower demographics, applying Top-N for specified categories.
215
  """
216
  final_demographics_results = []
217
+ # Parameters for the main demographics call
218
+ params = {
219
+ 'q': 'organizationalEntity',
220
+ 'organizationalEntity': org_urn
221
+ }
222
+ url = f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
223
+
224
+ logging.info(f"Fetching follower demographics from: {url} for org URN {org_urn} with params: {json.dumps(params)}")
225
 
226
  try:
227
+ response = session.get(url, params=params)
228
  response.raise_for_status()
229
  data = response.json()
230
 
 
233
  logging.warning(f"No elements found in follower demographics response for {org_urn}.")
234
  return []
235
 
236
+ stat_element = elements[0]
237
 
 
238
  def _get_entries_for_type(raw_items_list, type_name, id_map, id_field_name_in_item, org_urn_val):
239
  current_type_entries = []
240
  if not raw_items_list:
 
243
 
244
  for item in raw_items_list:
245
  category_name_val = "Unknown"
 
246
  if type_name == "follower_association":
247
  category_name_val = item.get(id_field_name_in_item, f"Unknown {id_field_name_in_item}")
248
+ else:
249
  urn_val = item.get(id_field_name_in_item)
250
  entity_id = _parse_urn_to_id(urn_val)
 
251
  category_name_val = id_map.get(str(entity_id), f"Unknown {type_name.split('_')[-1].capitalize()} (ID: {entity_id if entity_id else urn_val})")
252
 
253
  counts = item.get("followerCounts", {})
 
263
  })
264
  return current_type_entries
265
 
 
266
  industry_urns_to_map = [item.get("industry") for item in stat_element.get("followerCountsByIndustry", []) if item.get("industry")]
267
  geo_urns_to_map = [item.get("geo") for item in stat_element.get("followerCountsByGeoCountry", []) if item.get("geo")]
268
 
 
274
  {"items_key": "followerCountsByFunction", "type_name": "follower_function", "id_map": functions_map, "id_field": "function", "top_n": 10},
275
  {"items_key": "followerCountsByIndustry", "type_name": "follower_industry", "id_map": live_industries_map, "id_field": "industry", "top_n": 10},
276
  {"items_key": "followerCountsByGeoCountry", "type_name": "follower_geo", "id_map": live_geo_map, "id_field": "geo", "top_n": 10},
277
+ {"items_key": "followerCountsByAssociationType", "type_name": "follower_association", "id_map": {}, "id_field": "associationType", "top_n": None}
278
  ]
279
 
280
  for config in demographic_configs:
 
282
  processed_entries = _get_entries_for_type(raw_items, config["type_name"], config["id_map"], config["id_field"], org_urn)
283
 
284
  if config["top_n"] is not None and processed_entries:
285
+ for entry in processed_entries:
 
286
  if not isinstance(entry.get("follower_count_organic"), (int, float)):
287
  entry["follower_count_organic"] = 0
288
  sorted_entries = sorted(processed_entries, key=lambda x: x.get("follower_count_organic", 0), reverse=True)
289
  final_demographics_results.extend(sorted_entries[:config["top_n"]])
290
  logging.debug(f"Added top {config['top_n']} for {config['type_name']}. Count: {len(sorted_entries[:config['top_n']])}")
291
  else:
292
+ final_demographics_results.extend(processed_entries)
293
  logging.debug(f"Added all for {config['type_name']}. Count: {len(processed_entries)}")
294
 
295
  logging.info(f"Processed follower demographics for {org_urn}. Total entries from all types: {len(final_demographics_results)}")
 
323
  session.headers.update({
324
  "X-Restli-Protocol-Version": "2.0.0",
325
  "LinkedIn-Version": LINKEDIN_API_VERSION,
326
+ "Accept-Language": "en_US" # Explicitly set for v2 name lookups if not default in session
327
  })
328
  except Exception as e:
329
  logging.error(f"Failed to create session or update headers for org {org_urn}: {e}", exc_info=True)
 
331
 
332
  logging.info(f"Starting follower stats retrieval for org: {org_urn}")
333
 
 
 
 
334
  functions_map = get_functions_map(session)
335
  seniorities_map = get_seniorities_map(session)
336
 
 
342
  monthly_gains = fetch_monthly_follower_gains(session, org_urn)
343
  all_follower_data.extend(monthly_gains)
344
 
 
345
  demographics = fetch_follower_demographics(session, org_urn, functions_map, seniorities_map)
346
  all_follower_data.extend(demographics)
347