GuglielmoTor commited on
Commit
4d12aa9
·
verified ·
1 Parent(s): 14ab04f

Update linkedin_follower_stats.py

Browse files
Files changed (1) hide show
  1. linkedin_follower_stats.py +192 -331
linkedin_follower_stats.py CHANGED
@@ -1,291 +1,225 @@
1
- # -- coding: utf-8 --
2
  import json
3
  import requests
4
  import logging
5
  from datetime import datetime, timezone, timedelta
6
- from urllib.parse import quote, urlencode
7
- import requests_oauthlib # For version logging
8
 
9
  # Assuming you have a sessions.py with create_session
10
  # If sessions.py or create_session is not found, it will raise an ImportError,
11
  # which is appropriate for a module that depends on it.
12
- from sessions import create_session # Make sure this file exists and is correct
13
 
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
 
16
  API_V2_BASE = 'https://api.linkedin.com/v2'
17
  API_REST_BASE = "https://api.linkedin.com/rest"
18
- LINKEDIN_API_VERSION = "202502"
19
 
20
  # --- ID to Name Mapping Helper Functions ---
21
 
22
  def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path, id_key="id"):
23
  """
24
  Generic helper to fetch and map IDs to names from a LinkedIn API endpoint.
 
 
25
  """
26
  mapping = {}
27
- request_url_for_logging = url
28
- response_obj = None
29
  try:
30
- logging.debug(f"_fetch_linkedin_names (id: {id(session)}): About to prepare request. Session token: {session.token}, Session authorized: {session.authorized}, Expires at: {session.token.get('expires_at') if session.token else 'N/A'}")
31
- logging.debug(f"_fetch_linkedin_names (id: {id(session)}): Session auth object: type={type(session.auth)}, value={session.auth}")
32
-
33
- req = requests.Request('GET', url, params=params)
34
- prepared_req = session.prepare_request(req)
35
- request_url_for_logging = prepared_req.url
36
-
37
- logging.debug(f"Fetching names from URL: {request_url_for_logging}")
38
- logging.debug(f"Request headers for _fetch_linkedin_names: {json.dumps(dict(prepared_req.headers), indent=2)}")
39
-
40
- response_obj = session.send(prepared_req, timeout=30)
41
- response_obj.raise_for_status()
42
- data = response_obj.json()
43
 
44
  items = data
45
- for key in result_key_path:
46
  if isinstance(items, dict):
47
- items = items.get(key)
48
- if items is None:
49
- logging.warning(f"Key '{key}' not found in response from {request_url_for_logging} at path {result_key_path}. Response data: {json.dumps(data, indent=2)}")
50
- return mapping
51
- else:
52
- logging.warning(f"Expected dict to get key '{key}' but got {type(items)} at path {result_key_path} for URL {request_url_for_logging}. Check result_key_path.")
53
- return mapping
54
-
55
- if items is None:
56
- logging.warning(f"Items became None after navigating result_key_path for URL {request_url_for_logging}. Path: {result_key_path}")
57
- return mapping
58
 
59
  if isinstance(items, dict):
60
  for item_id_str, item_data in items.items():
61
  name = item_data
62
- for key_nav in name_key_path:
63
  if isinstance(name, dict):
64
  name = name.get(key_nav)
65
  else:
66
- name = None
67
  break
68
  if name:
69
- mapping[item_id_str] = name
70
  else:
71
- logging.warning(f"No name found for ID {item_id_str} at path {name_key_path} in item: {item_data} from URL {request_url_for_logging}")
72
  elif isinstance(items, list):
73
  for item in items:
74
  item_id_val = item.get(id_key)
75
  name = item
76
- for key_nav in name_key_path:
77
  if isinstance(name, dict):
78
  name = name.get(key_nav)
79
  else:
80
- name = None
81
  break
82
  if item_id_val is not None and name:
83
- mapping[str(item_id_val)] = name
84
  else:
85
- if item_id_val is None:
86
- logging.warning(f"No ID ('{id_key}') found in item: {item} from URL {request_url_for_logging}")
87
- if name is None:
88
- logging.warning(f"No name found at path {name_key_path} for item with ID '{item_id_val}' in item: {item} from URL {request_url_for_logging}")
89
  else:
90
- logging.warning(f"Expected list or dict of items at {result_key_path} from URL {request_url_for_logging}, got {type(items)}. Full items: {items}")
91
-
92
- except requests.exceptions.HTTPError as http_err:
93
- status_code = "N/A"; error_text = str(http_err); response_headers = {}
94
- if http_err.response is not None:
95
- status_code = http_err.response.status_code
96
- error_text = http_err.response.text
97
- response_headers = dict(http_err.response.headers)
98
- logging.error(f"HTTP error in _fetch_linkedin_names from {request_url_for_logging} (Status: {status_code}): {error_text}")
99
- logging.error(f"Response Headers: {json.dumps(response_headers, indent=2)}")
100
- except requests.exceptions.RequestException as req_err:
101
- logging.error(f"Request error in _fetch_linkedin_names from {request_url_for_logging}: {str(req_err)}")
102
- if req_err.response is not None:
103
- logging.error(f"Associated Response Status: {req_err.response.status_code}, Text: {req_err.response.text}, Headers: {json.dumps(dict(req_err.response.headers), indent=2)}")
104
- except json.JSONDecodeError as json_err:
105
- response_text = "Not available"
106
- if response_obj is not None and hasattr(response_obj, 'text'): response_text = response_obj.text
107
- logging.error(f"Error decoding JSON for names from {request_url_for_logging}: {json_err}. Response text: {response_text}")
108
  except Exception as e:
109
- logging.error(f"Unexpected error in _fetch_linkedin_names from {request_url_for_logging}: {e}", exc_info=True)
110
- if response_obj is not None:
111
- logging.error(f"Associated Response (if any) Status: {getattr(response_obj, 'status_code', 'N/A')}, Text: {getattr(response_obj, 'text', 'N/A')}")
112
  return mapping
113
 
114
  def get_functions_map(session):
 
115
  url = f"{API_V2_BASE}/functions"
116
- params = {'count': 200}
117
  logging.info("Fetching all LinkedIn functions.")
118
  return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
119
 
120
  def get_seniorities_map(session):
 
121
  url = f"{API_V2_BASE}/seniorities"
122
- params = {'count': 200}
123
  logging.info("Fetching all LinkedIn seniorities.")
124
  return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
125
 
126
  def get_industries_map(session, industry_urns, version="DEFAULT"):
 
 
127
  industry_ids = [_parse_urn_to_id(urn) for urn in industry_urns or []]
128
  unique_ids = set(filter(None, industry_ids))
129
  if not unique_ids:
130
  return {}
131
 
 
132
  url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
133
- params = { 'start': 0, 'count': 500 }
134
- request_url_for_logging = url
135
- response_obj = None
136
- logging.info(f"Fetching all industries (to filter {len(unique_ids)} IDs)")
 
 
137
  try:
138
- logging.debug(f"get_industries_map (id: {id(session)}): About to prepare request. Session token: {session.token}, Session authorized: {session.authorized}, Expires at: {session.token.get('expires_at') if session.token else 'N/A'}")
139
- logging.debug(f"get_industries_map (id: {id(session)}): Session auth object: type={type(session.auth)}, value={session.auth}")
140
- req = requests.Request('GET', url, params=params)
141
- prepared_req = session.prepare_request(req)
142
- request_url_for_logging = prepared_req.url
143
- logging.debug(f"Requesting all industries from URL: {request_url_for_logging}")
144
- logging.debug(f"Request headers for get_industries_map: {json.dumps(dict(prepared_req.headers), indent=2)}")
145
-
146
- response_obj = session.send(prepared_req, timeout=30)
147
- response_obj.raise_for_status()
148
- data = response_obj.json()
149
  elements = data.get('elements', [])
150
 
151
  mapping = {}
152
  for el in elements:
153
  el_id = el.get('id')
154
  if el_id and str(el_id) in unique_ids:
155
- name = el.get('name', {}).get('localized', {}).get('en_US')
 
 
 
156
  if name:
157
  mapping[str(el_id)] = name
158
  else:
159
- logging.warning(f"Industry {el_id} has no en_US name field in element: {el} from URL {request_url_for_logging}")
160
  return mapping
161
- except requests.exceptions.HTTPError as http_err:
162
- status_code = "N/A"; error_text = str(http_err); response_headers = {}
163
- if http_err.response is not None:
164
- status_code = http_err.response.status_code
165
- error_text = http_err.response.text
166
- response_headers = dict(http_err.response.headers)
167
- logging.error(f"HTTP error fetching all industries from {request_url_for_logging} (Status: {status_code}): {error_text}")
168
- logging.error(f"Response Headers: {json.dumps(response_headers, indent=2)}")
169
- return {}
170
- except requests.exceptions.RequestException as req_err:
171
- logging.error(f"Request error fetching all industries from {request_url_for_logging}: {str(req_err)}")
172
- if req_err.response is not None:
173
- logging.error(f"Associated Response Status: {req_err.response.status_code}, Text: {req_err.response.text}, Headers: {json.dumps(dict(req_err.response.headers), indent=2)}")
174
- return {}
175
- except json.JSONDecodeError as json_err:
176
- response_text = "Not available"
177
- if response_obj is not None and hasattr(response_obj, 'text'): response_text = response_obj.text
178
- logging.error(f"Error decoding JSON for industries from {request_url_for_logging}: {json_err}. Response text: {response_text}")
179
- return {}
180
- except Exception as e:
181
- logging.error(f"Unexpected error fetching all industries from {request_url_for_logging}: {e}", exc_info=True)
182
- if response_obj is not None:
183
- logging.error(f"Associated Response (if any) Status: {getattr(response_obj, 'status_code', 'N/A')}, Text: {getattr(response_obj, 'text', 'N/A')}")
184
  return {}
185
 
 
 
186
  def get_geo_map(session, geo_urns):
 
187
  if not geo_urns: return {}
188
  geo_ids = [_parse_urn_to_id(urn) for urn in geo_urns if urn]
189
  unique_ids = list(set(filter(None, geo_ids)))
190
  if not unique_ids: return {}
191
 
192
- MAX_GEO_IDS_PER_CALL = 100
193
- all_geo_mappings = {}
194
-
195
- for i in range(0, len(unique_ids), MAX_GEO_IDS_PER_CALL):
196
- chunk_ids = unique_ids[i:i + MAX_GEO_IDS_PER_CALL]
197
- if not chunk_ids: continue
198
 
199
- ids_param_value = "List(" + ",".join(map(str, chunk_ids)) + ")"
200
- locale_param_value = "(language:en,country:US)"
 
 
201
 
202
- url = f"{API_V2_BASE}/geo"
203
- geo_params = { 'ids': ids_param_value, 'locale': locale_param_value }
204
-
205
- logging.info(f"Fetching names for {len(chunk_ids)} geo IDs (chunk {i//MAX_GEO_IDS_PER_CALL + 1})")
206
- chunk_mapping = _fetch_linkedin_names(session, url, geo_params, ["results"], ["defaultLocalizedName", "value"])
207
- all_geo_mappings.update(chunk_mapping)
208
-
209
- return all_geo_mappings
210
 
211
 
212
  def _parse_urn_to_id(urn_string):
 
213
  if not isinstance(urn_string, str):
214
  logging.debug(f"Invalid URN type: {type(urn_string)}, value: {urn_string}. Cannot parse ID.")
215
  return None
216
  try:
217
  return urn_string.split(':')[-1]
218
- except IndexError:
219
  logging.warning(f"Could not parse ID from URN: {urn_string}")
220
  return None
221
- except Exception as e:
222
  logging.error(f"Unexpected error parsing URN {urn_string}: {e}")
223
  return None
224
 
225
  # --- Follower Data Fetching Functions ---
226
 
227
- def fetch_monthly_follower_gains(comm_client_id, token_dict, org_urn, api_rest_base_url):
228
  """
229
- Fetches monthly follower gains using a dedicated OAuth2Session for REST API calls.
 
230
  """
231
- now_utc = datetime.now(timezone.utc)
232
- # Calculate start of the month, approximately 12 months ago
233
- start_of_reporting_period = (now_utc - timedelta(days=365)).replace(day=1, hour=0, minute=0, second=0, microsecond=0)
234
- start_ms = int(start_of_reporting_period.timestamp() * 1000)
235
-
236
- base_url = f"{api_rest_base_url}/organizationalEntityFollowerStatistics"
237
- # Using the same timeIntervals format as in the original failing log, as it's standard for REST
238
- time_intervals_value = f"(timeRange:(start:{start_ms}),timeGranularityType:MONTH)"
239
-
240
- api_params = {
241
- "q": "organizationalEntity",
242
- "organizationalEntity": org_urn,
243
- "timeIntervals": time_intervals_value
244
- }
245
-
246
- logging.info(f"Preparing to fetch monthly follower gains for {org_urn} using a dedicated session.")
247
- logging.debug(f"API Parameters for monthly gains: {json.dumps(api_params)}")
248
- logging.debug(f"Received client_id for dedicated session: {comm_client_id}")
249
- logging.debug(f"Received token_dict for dedicated session (access_token type): {type(token_dict.get('access_token')) if token_dict else 'N/A'}")
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  results = []
253
- request_url_for_logging = "Not constructed"
254
- response_obj = None
255
-
256
  try:
257
- # Create a new session specifically for this REST API call
258
- # This session will not have X-Restli-Protocol-Version header by default from OAuth2Session
259
- # It will also not inherit any potentially problematic headers from a shared session.
260
- rest_session = create_session(comm_client_id, token=token_dict)
261
- rest_session.headers.update({
262
- "LinkedIn-Version": LINKEDIN_API_VERSION
263
- })
264
- logging.info(f"Dedicated REST session created for monthly gains (id: {id(rest_session)}).")
265
- logging.debug(f"Dedicated REST session token (type): {type(rest_session.token)}, Authorized: {rest_session.authorized}")
266
- if rest_session.token and 'access_token' in rest_session.token:
267
- logging.debug(f"Dedicated REST session access token (partial): {str(rest_session.token['access_token'])[:20]}...")
268
- else:
269
- logging.warning("Dedicated REST session: token is None or 'access_token' key is missing.")
270
-
271
-
272
- req = requests.Request('GET', base_url, params=api_params)
273
- prepared_req = rest_session.prepare_request(req) # Use the dedicated session
274
- request_url_for_logging = prepared_req.url
275
-
276
- logging.info(f"Requesting monthly follower gains from URL: {request_url_for_logging}")
277
- # Log only essential headers, avoid logging full token if present in headers directly
278
- headers_to_log = {k: v for k, v in prepared_req.headers.items() if k.lower() != 'authorization'}
279
- logging.debug(f"Request Headers for monthly gains (excluding Authorization): {json.dumps(headers_to_log, indent=2)}")
280
-
281
-
282
- response_obj = rest_session.send(prepared_req, timeout=30) # Use the dedicated session
283
- response_obj.raise_for_status()
284
- data = response_obj.json()
285
 
286
  elements = data.get('elements', [])
287
  if not elements:
288
- logging.info(f"No 'elements' found in monthly follower gains API response for {org_urn} (start_ms {start_ms}). Response data: {json.dumps(data, indent=2)}")
289
 
290
  for item in elements:
291
  time_range = item.get('timeRange', {})
@@ -294,128 +228,104 @@ def fetch_monthly_follower_gains(comm_client_id, token_dict, org_urn, api_rest_b
294
  logging.warning(f"Skipping item due to missing 'start' timestamp: {item}")
295
  continue
296
 
 
297
  date_obj = datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
298
  date_str = date_obj.strftime('%Y-%m-%d')
299
 
300
  gains = item.get('followerGains', {})
301
- if gains is None: gains = {}
 
 
302
 
303
  results.append({
304
- 'category_name': date_str,
305
  'follower_count_organic': gains.get('organicFollowerGain', 0),
306
  'follower_count_paid': gains.get('paidFollowerGain', 0),
307
  'follower_count_type': 'follower_gains_monthly',
308
  'organization_urn': org_urn
309
  })
310
- logging.info(f"Fetched {len(results)} monthly follower entries for {org_urn} starting from {start_of_reporting_period.strftime('%Y-%m-%d')}.")
311
 
312
  except requests.exceptions.HTTPError as http_err:
313
- code = "N/A"; text = str(http_err); resp_headers = {}
314
- if http_err.response is not None:
315
- code = http_err.response.status_code
316
- text = http_err.response.text
317
- resp_headers = dict(http_err.response.headers)
318
  logging.error(f"HTTP error fetching monthly gains for {org_urn}: {code} - {text}")
319
- logging.error(f"Request URL was: {request_url_for_logging}")
320
- logging.error(f"Response Headers: {json.dumps(resp_headers, indent=2)}")
321
- except requests.exceptions.RequestException as req_err:
322
- logging.error(f"RequestException fetching monthly gains for {org_urn}: {str(req_err)}")
323
- logging.error(f"Request URL was: {request_url_for_logging}")
324
- if req_err.response is not None:
325
- logging.error(f"Associated Response Status: {req_err.response.status_code}")
326
- logging.error(f"Associated Response Text: {req_err.response.text}")
327
- logging.error(f"Associated Response Headers: {json.dumps(dict(req_err.response.headers), indent=2)}")
328
- except json.JSONDecodeError as json_err:
329
- response_text = "Not available"
330
- if response_obj is not None and hasattr(response_obj, 'text'):
331
- response_text = response_obj.text
332
- logging.error(f"Error decoding JSON for monthly follower gains for {org_urn}: {json_err}")
333
- logging.error(f"Request URL was: {request_url_for_logging}")
334
- logging.error(f"Raw Response Text: {response_text}")
335
  except Exception as ex:
336
- logging.error(f"An unexpected error occurred while fetching monthly gains for {org_urn}: {str(ex)}", exc_info=True)
337
- logging.error(f"Request URL was: {request_url_for_logging}")
338
- if response_obj is not None:
339
- logging.error(f"Response Status (if available): {getattr(response_obj, 'status_code', 'N/A')}")
340
- logging.error(f"Response Text (if available): {getattr(response_obj, 'text', 'N/A')}")
341
- logging.error(f"Response Headers (if available): {json.dumps(dict(getattr(response_obj, 'headers', {})), indent=2)}")
342
-
343
  return results
344
 
345
 
346
  def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map):
 
 
 
347
  final_demographics_results = []
348
- base_url = f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
349
- params = { 'q': 'organizationalEntity', 'organizationalEntity': org_urn }
 
 
 
 
350
 
351
- logging.info(f"Preparing to fetch follower demographics for org URN {org_urn}.")
352
- logging.debug(f"API Parameters for demographics: {json.dumps(params)}")
353
- request_url_for_logging = "Not constructed"
354
- response_obj = None
355
 
356
  try:
357
- logging.debug(f"fetch_follower_demographics (id: {id(session)}): About to prepare request. Session token: {session.token}, Session authorized: {session.authorized}, Expires at: {session.token.get('expires_at') if session.token else 'N/A'}")
358
- logging.debug(f"fetch_follower_demographics (id: {id(session)}): Session auth object: type={type(session.auth)}, value={session.auth}")
359
- logging.debug(f"fetch_follower_demographics (id: {id(session)}): Auto-refresh URL: {session.auto_refresh_url}, Auto-refresh kwargs: {session.auto_refresh_kwargs}")
360
- if session.token and 'access_token' in session.token:
361
- logging.debug(f"fetch_follower_demographics: Access token (partial): {str(session.token['access_token'])[:20]}...")
362
- else:
363
- logging.warning("fetch_follower_demographics: session.token is None or 'access_token' key is missing before prepare_request.")
364
-
365
- req = requests.Request('GET', base_url, params=params)
366
- prepared_req = session.prepare_request(req)
367
- request_url_for_logging = prepared_req.url
368
-
369
- logging.info(f"Requesting follower demographics from URL: {request_url_for_logging}")
370
- logging.debug(f"Request Headers for demographics: {json.dumps(dict(prepared_req.headers), indent=2)}")
371
-
372
- response_obj = session.send(prepared_req, timeout=30)
373
- response_obj.raise_for_status()
374
- data = response_obj.json()
375
 
376
  elements = data.get("elements", [])
377
  if not elements:
378
- logging.warning(f"No elements found in follower demographics response for {org_urn}. Response data: {json.dumps(data, indent=2)}")
379
  return []
380
 
381
- stat_element = elements[0] if len(elements) > 0 else None
382
- if not stat_element:
383
- logging.warning(f"Elements list is empty or stat_element is None in demographics response for {org_urn}. Response data: {json.dumps(data, indent=2)}")
384
- return []
385
 
386
  def _get_entries_for_type(raw_items_list, type_name, id_map, id_field_name_in_item, org_urn_val):
387
  current_type_entries = []
388
  if not raw_items_list:
389
  logging.debug(f"No raw items for demographic type '{type_name}' for org {org_urn_val}.")
390
  return current_type_entries
 
391
  for item in raw_items_list:
392
  category_name_val = "Unknown"
393
- if type_name == "follower_association":
394
  category_name_val = item.get(id_field_name_in_item, f"Unknown {id_field_name_in_item}")
395
  else:
396
  urn_val = item.get(id_field_name_in_item)
397
  entity_id = _parse_urn_to_id(urn_val)
398
- if entity_id and id_map:
399
- category_name_val = id_map.get(str(entity_id), f"Unknown {type_name.split('_')[-1].capitalize()} (ID: {entity_id})")
400
- elif urn_val:
401
- category_name_val = f"Unmapped {type_name.split('_')[-1].capitalize()} (URN: {urn_val})"
402
- else:
403
- category_name_val = f"Missing URN for {type_name.split('_')[-1].capitalize()}"
404
  counts = item.get("followerCounts", {})
405
  organic_count = counts.get("organicFollowerCount", 0)
406
- paid_count = counts.get("paidFollowerCount", 0)
 
407
  current_type_entries.append({
408
  "category_name": category_name_val,
409
- "follower_count_organic": organic_count,
410
- "follower_count_paid": paid_count,
411
  "follower_count_type": type_name,
412
  "organization_urn": org_urn_val
413
  })
414
  return current_type_entries
 
415
  industry_urns_to_map = [item.get("industry") for item in stat_element.get("followerCountsByIndustry", []) if item.get("industry")]
416
  geo_urns_to_map = [item.get("geo") for item in stat_element.get("followerCountsByGeoCountry", []) if item.get("geo")]
417
- live_industries_map = get_industries_map(session, list(set(industry_urns_to_map)))
418
- live_geo_map = get_geo_map(session, list(set(geo_urns_to_map)))
 
 
419
  demographic_configs = [
420
  {"items_key": "followerCountsBySeniority", "type_name": "follower_seniority", "id_map": seniorities_map, "id_field": "seniority", "top_n": 10},
421
  {"items_key": "followerCountsByFunction", "type_name": "follower_function", "id_map": functions_map, "id_field": "function", "top_n": 10},
@@ -423,123 +333,74 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
423
  {"items_key": "followerCountsByGeoCountry", "type_name": "follower_geo", "id_map": live_geo_map, "id_field": "geo", "top_n": 10},
424
  {"items_key": "followerCountsByAssociationType", "type_name": "follower_association", "id_map": {}, "id_field": "associationType", "top_n": None}
425
  ]
 
426
  for config in demographic_configs:
427
  raw_items = stat_element.get(config["items_key"], [])
428
  processed_entries = _get_entries_for_type(raw_items, config["type_name"], config["id_map"], config["id_field"], org_urn)
 
429
  if config["top_n"] is not None and processed_entries:
430
- for entry in processed_entries:
431
  if not isinstance(entry.get("follower_count_organic"), (int, float)):
432
- logging.warning(f"Invalid organic follower count for sorting in {config['type_name']}: {entry['follower_count_organic']}. Setting to 0.")
433
  entry["follower_count_organic"] = 0
434
  sorted_entries = sorted(processed_entries, key=lambda x: x.get("follower_count_organic", 0), reverse=True)
435
  final_demographics_results.extend(sorted_entries[:config["top_n"]])
436
  logging.debug(f"Added top {config['top_n']} for {config['type_name']}. Count: {len(sorted_entries[:config['top_n']])}")
437
- else:
438
- final_demographics_results.extend(processed_entries)
439
  logging.debug(f"Added all for {config['type_name']}. Count: {len(processed_entries)}")
 
440
  logging.info(f"Processed follower demographics for {org_urn}. Total entries from all types: {len(final_demographics_results)}")
441
- except requests.exceptions.HTTPError as http_err:
442
- code = "N/A"; text = str(http_err); resp_headers = {}
443
- if http_err.response is not None:
444
- code = http_err.response.status_code
445
- text = http_err.response.text
446
- resp_headers = dict(http_err.response.headers)
447
- logging.error(f"HTTP error fetching follower demographics for {org_urn} (Status: {code}): {text}")
448
- logging.error(f"Request URL was: {request_url_for_logging}")
449
- logging.error(f"Response Headers: {json.dumps(resp_headers, indent=2)}")
450
- except requests.exceptions.RequestException as req_err:
451
- logging.error(f"RequestException fetching follower demographics for {org_urn}: {str(req_err)}")
452
- logging.error(f"Request URL was: {request_url_for_logging}")
453
- if req_err.response is not None:
454
- logging.error(f"Associated Response Status: {req_err.response.status_code}")
455
- logging.error(f"Associated Response Text: {req_err.response.text}")
456
- logging.error(f"Associated Response Headers: {json.dumps(dict(req_err.response.headers), indent=2)}")
457
- except json.JSONDecodeError as json_err:
458
- response_text = "Not available"
459
- if response_obj is not None and hasattr(response_obj, 'text'):
460
- response_text = response_obj.text
461
- logging.error(f"Error decoding JSON for follower demographics for {org_urn}: {json_err}")
462
- logging.error(f"Request URL was: {request_url_for_logging}")
463
- logging.error(f"Raw Response Text: {response_text}")
464
  except Exception as e:
465
  logging.error(f"Unexpected error fetching follower demographics for {org_urn}: {e}", exc_info=True)
466
- logging.error(f"Request URL was: {request_url_for_logging}")
467
- if response_obj is not None:
468
- logging.error(f"Response Status (if available): {getattr(response_obj, 'status_code', 'N/A')}")
469
- logging.error(f"Response Text (if available): {getattr(response_obj, 'text', 'N/A')}")
470
- logging.error(f"Response Headers (if available): {json.dumps(dict(getattr(response_obj, 'headers', {})), indent=2)}")
471
  return final_demographics_results
472
 
473
  # --- Main Orchestration Function ---
474
 
475
  def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
476
- logging.info(f"--- Initiating get_linkedin_follower_stats for org: {org_urn} ---")
477
- logging.info(f"requests-oauthlib version: {requests_oauthlib.__version__}") # Log version
478
- logging.debug(f"Received comm_client_id: {comm_client_id}")
479
- logging.debug(f"Received community_token - Type: {type(community_token)}, IsSet: {bool(community_token)}")
480
- if isinstance(community_token, str) and len(community_token) > 10:
481
- logging.debug(f"Received community_token (partial): {community_token[:10]}...")
482
- elif isinstance(community_token, dict):
483
- logging.debug(f"Received community_token (dict): { {k: (v[:10] + '...' if k == 'access_token' and isinstance(v, str) and len(v)>10 else v) for k,v in community_token.items()} }")
484
-
485
  if not all([comm_client_id, community_token, org_urn]):
486
- logging.error("Client ID, community_token, or Organization URN is missing or empty.")
487
- if not comm_client_id: logging.error("comm_client_id is missing.")
488
- if not community_token: logging.error("community_token is missing or empty.")
489
- if not org_urn: logging.error("org_urn is missing.")
490
  return []
491
 
492
- token_dict = community_token if isinstance(community_token, dict) else {'access_token': str(community_token), 'token_type': 'Bearer'}
493
 
494
- if not token_dict.get('access_token'):
495
- logging.error("Failed to construct a valid token_dict: 'access_token' is empty.")
496
- logging.debug(f"Problematic token_dict: {token_dict}")
497
- return []
498
-
499
- logging.debug(f"Constructed token_dict for session: { {k: (v[:10] + '...' if k == 'access_token' and isinstance(v, str) and len(v)>10 else v) for k,v in token_dict.items()} }")
500
-
501
- session = None
502
  try:
503
- session = create_session(comm_client_id, token=token_dict)
504
  session.headers.update({
505
  "X-Restli-Protocol-Version": "2.0.0",
506
- "LinkedIn-Version": LINKEDIN_API_VERSION,
507
- "Accept-Language": "en_US"
508
  })
509
- logging.info(f"Session (id: {id(session)}) created and headers updated for org {org_urn}.")
510
- logging.debug(f"get_linkedin_follower_stats (id: {id(session)}): Session token after creation: {session.token}, Session authorized: {session.authorized}, Expires at: {session.token.get('expires_at') if session.token else 'N/A'}")
511
- logging.debug(f"get_linkedin_follower_stats (id: {id(session)}): Session auth object: type={type(session.auth)}, value={session.auth}")
512
- logging.debug(f"get_linkedin_follower_stats (id: {id(session)}): Auto-refresh URL: {session.auto_refresh_url}, Auto-refresh kwargs: {session.auto_refresh_kwargs}")
513
-
514
- if session.token and 'access_token' in session.token:
515
- logging.debug(f"get_linkedin_follower_stats: Access token in session (partial): {str(session.token['access_token'])[:20]}...")
516
- else:
517
- logging.warning("get_linkedin_follower_stats: session.token is None or 'access_token' key is missing after session creation.")
518
-
519
-
520
  except Exception as e:
521
  logging.error(f"Failed to create session or update headers for org {org_urn}: {e}", exc_info=True)
522
- return []
523
 
524
- logging.info(f"Starting follower stats retrieval for org: {org_urn} using session (id: {id(session)})")
525
 
526
- functions_map = get_functions_map(session) # Pass session
527
- seniorities_map = get_seniorities_map(session) # Pass session
528
 
529
- if not functions_map: logging.warning(f"Functions map is empty for org {org_urn}.")
530
- if not seniorities_map: logging.warning(f"Seniorities map is empty for org {org_urn}.")
531
 
532
  all_follower_data = []
533
 
534
- monthly_gains = fetch_monthly_follower_gains(comm_client_id, token_dict, org_urn, API_REST_BASE)
535
  all_follower_data.extend(monthly_gains)
536
 
537
- demographics = fetch_follower_demographics(session, org_urn, functions_map, seniorities_map) # Pass session
538
  all_follower_data.extend(demographics)
539
 
540
- if not all_follower_data:
541
- logging.warning(f"No follower data (gains or demographics) could be compiled for {org_urn}.")
542
- else:
543
- logging.info(f"Successfully compiled {len(all_follower_data)} total follower stat entries for {org_urn}.")
544
-
545
- return all_follower_data
 
 
1
  import json
2
  import requests
3
  import logging
4
  from datetime import datetime, timezone, timedelta
5
+ from urllib.parse import quote
 
6
 
7
  # Assuming you have a sessions.py with create_session
8
  # If sessions.py or create_session is not found, it will raise an ImportError,
9
  # which is appropriate for a module that depends on it.
10
+ from sessions import create_session
11
 
12
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
 
14
  API_V2_BASE = 'https://api.linkedin.com/v2'
15
  API_REST_BASE = "https://api.linkedin.com/rest"
16
+ LINKEDIN_API_VERSION = "202502" # As per user's example for follower stats
17
 
18
  # --- ID to Name Mapping Helper Functions ---
19
 
20
  def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path, id_key="id"):
21
  """
22
  Generic helper to fetch and map IDs to names from a LinkedIn API endpoint.
23
+ result_key_path: list of keys to navigate to the list of items (e.g., ["elements"])
24
+ name_key_path: list of keys to navigate to the name within an item (e.g., ["name", "localized", "en_US"])
25
  """
26
  mapping = {}
 
 
27
  try:
28
+ logging.debug(f"Fetching names from URL: {url} with params: {json.dumps(params)}") # Log params for clarity
29
+ response = session.get(url, params=params)
30
+ response.raise_for_status()
31
+ data = response.json()
 
 
 
 
 
 
 
 
 
32
 
33
  items = data
34
+ for key in result_key_path:
35
  if isinstance(items, dict):
36
+ items = items.get(key, [])
37
+ else:
38
+ logging.warning(f"Expected dict to get key '{key}' but got {type(items)} at path {result_key_path} for URL {url}. Check result_key_path.")
39
+ return mapping
 
 
 
 
 
 
 
40
 
41
  if isinstance(items, dict):
42
  for item_id_str, item_data in items.items():
43
  name = item_data
44
+ for key_nav in name_key_path:
45
  if isinstance(name, dict):
46
  name = name.get(key_nav)
47
  else:
48
+ name = None
49
  break
50
  if name:
51
+ mapping[item_id_str] = name
52
  else:
53
+ logging.warning(f"No name found for ID {item_id_str} at path {name_key_path} in item: {item_data} from URL {url}")
54
  elif isinstance(items, list):
55
  for item in items:
56
  item_id_val = item.get(id_key)
57
  name = item
58
+ for key_nav in name_key_path:
59
  if isinstance(name, dict):
60
  name = name.get(key_nav)
61
  else:
62
+ name = None
63
  break
64
  if item_id_val is not None and name:
65
+ mapping[str(item_id_val)] = name
66
  else:
67
+ logging.warning(f"No ID ('{id_key}') or name found at path {name_key_path} in item: {item} from URL {url}")
 
 
 
68
  else:
69
+ logging.warning(f"Expected list or dict of items at {result_key_path} from URL {url}, got {type(items)}")
70
+
71
+ except requests.exceptions.RequestException as e:
72
+ status_code = getattr(e.response, 'status_code', 'N/A')
73
+ error_text = getattr(e.response, 'text', str(e)) # Log the raw error text
74
+ logging.error(f"Error fetching names from {url} (Status: {status_code}): {error_text}")
75
+ except json.JSONDecodeError as e:
76
+ logging.error(f"Error decoding JSON for names from {url}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
 
 
 
 
 
 
 
 
 
 
77
  except Exception as e:
78
+ logging.error(f"Unexpected error fetching names from {url}: {e}", exc_info=True)
 
 
79
  return mapping
80
 
81
  def get_functions_map(session):
82
+ """Fetches all LinkedIn functions and returns a map of {id: name}."""
83
  url = f"{API_V2_BASE}/functions"
84
+ params = {} # Relies on Accept-Language header from session
85
  logging.info("Fetching all LinkedIn functions.")
86
  return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
87
 
88
  def get_seniorities_map(session):
89
+ """Fetches all LinkedIn seniorities and returns a map of {id: name}."""
90
  url = f"{API_V2_BASE}/seniorities"
91
+ params = {} # Relies on Accept-Language header from session
92
  logging.info("Fetching all LinkedIn seniorities.")
93
  return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
94
 
95
  def get_industries_map(session, industry_urns, version="DEFAULT"):
96
+ """Fetches names for a list of industry URNs by pulling ALL industries and filtering locally."""
97
+ # parse and dedupe IDs
98
  industry_ids = [_parse_urn_to_id(urn) for urn in industry_urns or []]
99
  unique_ids = set(filter(None, industry_ids))
100
  if not unique_ids:
101
  return {}
102
 
103
+ # we'll page through the full list; LinkedIn defaults to 10, so bump count
104
  url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
105
+ params = {
106
+ 'start': 0,
107
+ 'count': 500 # should exceed total # of industries
108
+ }
109
+
110
+ logging.info(f"Fetching all industries (to filter {len(unique_ids)} IDs) from {url}")
111
  try:
112
+ response = session.get(url, params=params)
113
+ response.raise_for_status()
114
+ data = response.json()
 
 
 
 
 
 
 
 
115
  elements = data.get('elements', [])
116
 
117
  mapping = {}
118
  for el in elements:
119
  el_id = el.get('id')
120
  if el_id and str(el_id) in unique_ids:
121
+ # drill into name.localized.en_US
122
+ name = el.get('name', {}) \
123
+ .get('localized', {}) \
124
+ .get('en_US')
125
  if name:
126
  mapping[str(el_id)] = name
127
  else:
128
+ logging.warning(f"Industry {el_id} has no en_US name field")
129
  return mapping
130
+
131
+ except requests.exceptions.RequestException as e:
132
+ status_code = getattr(e.response, 'status_code', 'N/A')
133
+ logging.error(f"Error fetching all industries: {status_code} {getattr(e.response, 'text', str(e))}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  return {}
135
 
136
+
137
+
138
  def get_geo_map(session, geo_urns):
139
+ """Fetches names for a list of geo URNs. Returns a map {id: name}."""
140
  if not geo_urns: return {}
141
  geo_ids = [_parse_urn_to_id(urn) for urn in geo_urns if urn]
142
  unique_ids = list(set(filter(None, geo_ids)))
143
  if not unique_ids: return {}
144
 
145
+ # As per LinkedIn docs for BATCH_GET: ids=List(12345,23456)&locale=(language:en,country:US)
146
+ ids_param_string = "List(" + ",".join(map(str, unique_ids)) + ")"
147
+ locale_param_string = "(language:en,country:US)" # Must be exactly this string format
 
 
 
148
 
149
+ # Parameters must be passed in the URL string directly for this specific API format
150
+ # The `params` dict for session.get() will be empty.
151
+ url = f"{API_V2_BASE}/geo?ids={ids_param_string}&locale={locale_param_string}"
152
+ #url = f"{API_V2_BASE}/geo?ids=List({','.join(map(str, unique_ids))})&locale=(language:en,country:US)"
153
 
154
+
155
+ logging.info(f"Fetching names for {len(unique_ids)} unique geo IDs using URL: {url}")
156
+ return _fetch_linkedin_names(session, url, {}, ["results"], ["defaultLocalizedName", "value"])
 
 
 
 
 
157
 
158
 
159
  def _parse_urn_to_id(urn_string):
160
+ """Helper to get the last part (ID) from a URN string."""
161
  if not isinstance(urn_string, str):
162
  logging.debug(f"Invalid URN type: {type(urn_string)}, value: {urn_string}. Cannot parse ID.")
163
  return None
164
  try:
165
  return urn_string.split(':')[-1]
166
+ except IndexError:
167
  logging.warning(f"Could not parse ID from URN: {urn_string}")
168
  return None
169
+ except Exception as e:
170
  logging.error(f"Unexpected error parsing URN {urn_string}: {e}")
171
  return None
172
 
173
  # --- Follower Data Fetching Functions ---
174
 
175
+ def fetch_monthly_follower_gains(session, org_urn, api_rest_base):
176
  """
177
+ Fetches monthly follower gains for the last 12 full months.
178
+ The start date is set to the first day of the month, 12 months prior to the current month, at midnight UTC.
179
  """
180
+ # now = datetime.now()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
+ # twelve_months_ago = now - timedelta(days=365)
183
+ # twelve_months_ago = twelve_months_ago.replace(day=1)
184
+
185
+ # start_date = int(twelve_months_ago.timestamp() * 1000)
186
+
187
+ # # Build URL with explicit query string
188
+ # url = (
189
+ # f"{api_rest_base}/organizationalEntityFollowerStatistics"
190
+ # f"?q=organizationalEntity"
191
+ # f"&organizationalEntity={org_urn}"
192
+ # f"&timeIntervals.timeGranularityType=MONTH"
193
+ # f"&timeIntervals.timeRange.start={start_date}"
194
+ # # LinkedIn defaults the end of the timeRange to the current time if not specified.
195
+ # )
196
+ # logging.info(f"Fetching monthly follower gains from URL: {url}")
197
+
198
+ now = datetime.now(timezone.utc)
199
+ twelve_months_ago = now - timedelta(days=365)
200
+ twelve_months_ago = twelve_months_ago.replace(day=1)
201
+
202
+ start_date = int(twelve_months_ago.timestamp() * 1000)
203
+ end_date = int(now.timestamp() * 1000) # You could omit this if letting LinkedIn default
204
+
205
+ time_intervals = f"(timeRange:(start:{start_date},end:{end_date}),timeGranularityType:MONTH)"
206
+
207
+ url = (
208
+ f"{api_rest_base}/organizationalEntityFollowerStatistics"
209
+ f"?q=organizationalEntity"
210
+ f"&organizationalEntity={org_urn}"
211
+ f"&timeIntervals={time_intervals}"
212
+ )
213
 
214
  results = []
 
 
 
215
  try:
216
+ response = session.get(url)
217
+ response.raise_for_status() # Raises an HTTPError for bad responses (4XX or 5XX)
218
+ data = response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
  elements = data.get('elements', [])
221
  if not elements:
222
+ logging.info(f"No 'elements' found in API response for {org_urn} for start_ms {start_ms}.")
223
 
224
  for item in elements:
225
  time_range = item.get('timeRange', {})
 
228
  logging.warning(f"Skipping item due to missing 'start' timestamp: {item}")
229
  continue
230
 
231
+ # Convert timestamp (milliseconds) to YYYY-MM-DD date string in UTC
232
  date_obj = datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
233
  date_str = date_obj.strftime('%Y-%m-%d')
234
 
235
  gains = item.get('followerGains', {})
236
+ # It's possible 'followerGains' itself is missing or None
237
+ if gains is None:
238
+ gains = {} # Ensure gains is a dict to prevent error on .get()
239
 
240
  results.append({
241
+ 'category_name': date_str, # This is the start date of the month's data
242
  'follower_count_organic': gains.get('organicFollowerGain', 0),
243
  'follower_count_paid': gains.get('paidFollowerGain', 0),
244
  'follower_count_type': 'follower_gains_monthly',
245
  'organization_urn': org_urn
246
  })
247
+ logging.info(f"Fetched {len(results)} monthly follower entries for {org_urn} starting from {start_of_period.strftime('%Y-%m-%d')}.")
248
 
249
  except requests.exceptions.HTTPError as http_err:
250
+ # More specific error for HTTP errors
251
+ code = getattr(http_err.response, 'status_code', 'N/A')
252
+ text = getattr(http_err.response, 'text', str(http_err))
 
 
253
  logging.error(f"HTTP error fetching monthly gains for {org_urn}: {code} - {text}")
254
+ logging.error(f"Request URL: {url}")
255
+ except requests.exceptions.RequestException as e:
256
+ # Catch other request-related errors (e.g., connection issues)
257
+ code = getattr(e.response, 'status_code', 'N/A') if e.response is not None else 'N/A'
258
+ text = getattr(e.response, 'text', str(e)) if e.response is not None else str(e)
259
+ logging.error(f"Error fetching monthly gains for {org_urn}: {code} - {text}")
260
+ logging.error(f"Request URL: {url}")
 
 
 
 
 
 
 
 
 
261
  except Exception as ex:
262
+ # Catch any other unexpected errors (e.g., JSON parsing if response is not JSON)
263
+ logging.error(f"An unexpected error occurred while fetching monthly gains for {org_urn}: {str(ex)}")
264
+ logging.error(f"Request URL: {url}")
265
+
 
 
 
266
  return results
267
 
268
 
269
  def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map):
270
+ """
271
+ Fetches current follower demographics, applying Top-N for specified categories.
272
+ """
273
  final_demographics_results = []
274
+ # Parameters for the main demographics call
275
+ params = {
276
+ 'q': 'organizationalEntity',
277
+ 'organizationalEntity': org_urn
278
+ }
279
+ url = f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
280
 
281
+ logging.info(f"Fetching follower demographics from: {url} for org URN {org_urn} with params: {json.dumps(params)}")
 
 
 
282
 
283
  try:
284
+ response = session.get(url, params=params)
285
+ response.raise_for_status()
286
+ data = response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
  elements = data.get("elements", [])
289
  if not elements:
290
+ logging.warning(f"No elements found in follower demographics response for {org_urn}.")
291
  return []
292
 
293
+ stat_element = elements[0]
 
 
 
294
 
295
  def _get_entries_for_type(raw_items_list, type_name, id_map, id_field_name_in_item, org_urn_val):
296
  current_type_entries = []
297
  if not raw_items_list:
298
  logging.debug(f"No raw items for demographic type '{type_name}' for org {org_urn_val}.")
299
  return current_type_entries
300
+
301
  for item in raw_items_list:
302
  category_name_val = "Unknown"
303
+ if type_name == "follower_association":
304
  category_name_val = item.get(id_field_name_in_item, f"Unknown {id_field_name_in_item}")
305
  else:
306
  urn_val = item.get(id_field_name_in_item)
307
  entity_id = _parse_urn_to_id(urn_val)
308
+ category_name_val = id_map.get(str(entity_id), f"Unknown {type_name.split('_')[-1].capitalize()} (ID: {entity_id if entity_id else urn_val})")
309
+
 
 
 
 
310
  counts = item.get("followerCounts", {})
311
  organic_count = counts.get("organicFollowerCount", 0)
312
+ paid_count = counts.get("paidFollowerCount", 0)
313
+
314
  current_type_entries.append({
315
  "category_name": category_name_val,
316
+ "follower_count_organic": organic_count,
317
+ "follower_count_paid": paid_count,
318
  "follower_count_type": type_name,
319
  "organization_urn": org_urn_val
320
  })
321
  return current_type_entries
322
+
323
  industry_urns_to_map = [item.get("industry") for item in stat_element.get("followerCountsByIndustry", []) if item.get("industry")]
324
  geo_urns_to_map = [item.get("geo") for item in stat_element.get("followerCountsByGeoCountry", []) if item.get("geo")]
325
+
326
+ live_industries_map = get_industries_map(session, industry_urns_to_map)
327
+ live_geo_map = get_geo_map(session, geo_urns_to_map)
328
+
329
  demographic_configs = [
330
  {"items_key": "followerCountsBySeniority", "type_name": "follower_seniority", "id_map": seniorities_map, "id_field": "seniority", "top_n": 10},
331
  {"items_key": "followerCountsByFunction", "type_name": "follower_function", "id_map": functions_map, "id_field": "function", "top_n": 10},
 
333
  {"items_key": "followerCountsByGeoCountry", "type_name": "follower_geo", "id_map": live_geo_map, "id_field": "geo", "top_n": 10},
334
  {"items_key": "followerCountsByAssociationType", "type_name": "follower_association", "id_map": {}, "id_field": "associationType", "top_n": None}
335
  ]
336
+
337
  for config in demographic_configs:
338
  raw_items = stat_element.get(config["items_key"], [])
339
  processed_entries = _get_entries_for_type(raw_items, config["type_name"], config["id_map"], config["id_field"], org_urn)
340
+
341
  if config["top_n"] is not None and processed_entries:
342
+ for entry in processed_entries:
343
  if not isinstance(entry.get("follower_count_organic"), (int, float)):
 
344
  entry["follower_count_organic"] = 0
345
  sorted_entries = sorted(processed_entries, key=lambda x: x.get("follower_count_organic", 0), reverse=True)
346
  final_demographics_results.extend(sorted_entries[:config["top_n"]])
347
  logging.debug(f"Added top {config['top_n']} for {config['type_name']}. Count: {len(sorted_entries[:config['top_n']])}")
348
+ else:
349
+ final_demographics_results.extend(processed_entries)
350
  logging.debug(f"Added all for {config['type_name']}. Count: {len(processed_entries)}")
351
+
352
  logging.info(f"Processed follower demographics for {org_urn}. Total entries from all types: {len(final_demographics_results)}")
353
+
354
+ except requests.exceptions.RequestException as e:
355
+ status_code = getattr(e.response, 'status_code', 'N/A')
356
+ error_text = getattr(e.response, 'text', str(e))
357
+ logging.error(f"Error fetching follower demographics for {org_urn} (Status: {status_code}): {error_text}")
358
+ except json.JSONDecodeError as e:
359
+ logging.error(f"Error decoding JSON for follower demographics for {org_urn}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  except Exception as e:
361
  logging.error(f"Unexpected error fetching follower demographics for {org_urn}: {e}", exc_info=True)
 
 
 
 
 
362
  return final_demographics_results
363
 
364
  # --- Main Orchestration Function ---
365
 
366
  def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
367
+ """
368
+ Main function to fetch all follower statistics (monthly gains and demographics)
369
+ and format them for Bubble.
370
+ """
 
 
 
 
 
371
  if not all([comm_client_id, community_token, org_urn]):
372
+ logging.error("Client ID, token, or Organization URN is missing for get_linkedin_follower_stats.")
 
 
 
373
  return []
374
 
375
+ token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'}
376
 
377
+ session = None
 
 
 
 
 
 
 
378
  try:
379
+ session = create_session(comm_client_id, token=token_dict)
380
  session.headers.update({
381
  "X-Restli-Protocol-Version": "2.0.0",
382
+ "LinkedIn-Version": LINKEDIN_API_VERSION,
383
+ "Accept-Language": "en_US" # Explicitly set for v2 name lookups if not default in session
384
  })
 
 
 
 
 
 
 
 
 
 
 
385
  except Exception as e:
386
  logging.error(f"Failed to create session or update headers for org {org_urn}: {e}", exc_info=True)
387
+ return []
388
 
389
+ logging.info(f"Starting follower stats retrieval for org: {org_urn}")
390
 
391
+ functions_map = get_functions_map(session)
392
+ seniorities_map = get_seniorities_map(session)
393
 
394
+ if not functions_map: logging.warning(f"Functions map is empty for org {org_urn}. Function names might not be resolved.")
395
+ if not seniorities_map: logging.warning(f"Seniorities map is empty for org {org_urn}. Seniority names might not be resolved.")
396
 
397
  all_follower_data = []
398
 
399
+ monthly_gains = fetch_monthly_follower_gains(session, org_urn, API_REST_BASE)
400
  all_follower_data.extend(monthly_gains)
401
 
402
+ demographics = fetch_follower_demographics(session, org_urn, functions_map, seniorities_map)
403
  all_follower_data.extend(demographics)
404
 
405
+ logging.info(f"Successfully compiled {len(all_follower_data)} total follower stat entries for {org_urn}.")
406
+ return all_follower_data