Spaces:
Running
Running
Update linkedin_follower_stats.py
Browse files- linkedin_follower_stats.py +192 -331
linkedin_follower_stats.py
CHANGED
@@ -1,291 +1,225 @@
|
|
1 |
-
# -- coding: utf-8 --
|
2 |
import json
|
3 |
import requests
|
4 |
import logging
|
5 |
from datetime import datetime, timezone, timedelta
|
6 |
-
from urllib.parse import quote
|
7 |
-
import requests_oauthlib # For version logging
|
8 |
|
9 |
# Assuming you have a sessions.py with create_session
|
10 |
# If sessions.py or create_session is not found, it will raise an ImportError,
|
11 |
# which is appropriate for a module that depends on it.
|
12 |
-
from sessions import create_session
|
13 |
|
14 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
15 |
|
16 |
API_V2_BASE = 'https://api.linkedin.com/v2'
|
17 |
API_REST_BASE = "https://api.linkedin.com/rest"
|
18 |
-
LINKEDIN_API_VERSION = "202502"
|
19 |
|
20 |
# --- ID to Name Mapping Helper Functions ---
|
21 |
|
22 |
def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path, id_key="id"):
|
23 |
"""
|
24 |
Generic helper to fetch and map IDs to names from a LinkedIn API endpoint.
|
|
|
|
|
25 |
"""
|
26 |
mapping = {}
|
27 |
-
request_url_for_logging = url
|
28 |
-
response_obj = None
|
29 |
try:
|
30 |
-
logging.debug(f"
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
prepared_req = session.prepare_request(req)
|
35 |
-
request_url_for_logging = prepared_req.url
|
36 |
-
|
37 |
-
logging.debug(f"Fetching names from URL: {request_url_for_logging}")
|
38 |
-
logging.debug(f"Request headers for _fetch_linkedin_names: {json.dumps(dict(prepared_req.headers), indent=2)}")
|
39 |
-
|
40 |
-
response_obj = session.send(prepared_req, timeout=30)
|
41 |
-
response_obj.raise_for_status()
|
42 |
-
data = response_obj.json()
|
43 |
|
44 |
items = data
|
45 |
-
for key in result_key_path:
|
46 |
if isinstance(items, dict):
|
47 |
-
items = items.get(key)
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
else:
|
52 |
-
logging.warning(f"Expected dict to get key '{key}' but got {type(items)} at path {result_key_path} for URL {request_url_for_logging}. Check result_key_path.")
|
53 |
-
return mapping
|
54 |
-
|
55 |
-
if items is None:
|
56 |
-
logging.warning(f"Items became None after navigating result_key_path for URL {request_url_for_logging}. Path: {result_key_path}")
|
57 |
-
return mapping
|
58 |
|
59 |
if isinstance(items, dict):
|
60 |
for item_id_str, item_data in items.items():
|
61 |
name = item_data
|
62 |
-
for key_nav in name_key_path:
|
63 |
if isinstance(name, dict):
|
64 |
name = name.get(key_nav)
|
65 |
else:
|
66 |
-
name = None
|
67 |
break
|
68 |
if name:
|
69 |
-
mapping[item_id_str] = name
|
70 |
else:
|
71 |
-
logging.warning(f"No name found for ID {item_id_str} at path {name_key_path} in item: {item_data} from URL {
|
72 |
elif isinstance(items, list):
|
73 |
for item in items:
|
74 |
item_id_val = item.get(id_key)
|
75 |
name = item
|
76 |
-
for key_nav in name_key_path:
|
77 |
if isinstance(name, dict):
|
78 |
name = name.get(key_nav)
|
79 |
else:
|
80 |
-
name = None
|
81 |
break
|
82 |
if item_id_val is not None and name:
|
83 |
-
mapping[str(item_id_val)] = name
|
84 |
else:
|
85 |
-
|
86 |
-
logging.warning(f"No ID ('{id_key}') found in item: {item} from URL {request_url_for_logging}")
|
87 |
-
if name is None:
|
88 |
-
logging.warning(f"No name found at path {name_key_path} for item with ID '{item_id_val}' in item: {item} from URL {request_url_for_logging}")
|
89 |
else:
|
90 |
-
logging.warning(f"Expected list or dict of items at {result_key_path} from URL {
|
91 |
-
|
92 |
-
except requests.exceptions.
|
93 |
-
status_code =
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
logging.error(f"HTTP error in _fetch_linkedin_names from {request_url_for_logging} (Status: {status_code}): {error_text}")
|
99 |
-
logging.error(f"Response Headers: {json.dumps(response_headers, indent=2)}")
|
100 |
-
except requests.exceptions.RequestException as req_err:
|
101 |
-
logging.error(f"Request error in _fetch_linkedin_names from {request_url_for_logging}: {str(req_err)}")
|
102 |
-
if req_err.response is not None:
|
103 |
-
logging.error(f"Associated Response Status: {req_err.response.status_code}, Text: {req_err.response.text}, Headers: {json.dumps(dict(req_err.response.headers), indent=2)}")
|
104 |
-
except json.JSONDecodeError as json_err:
|
105 |
-
response_text = "Not available"
|
106 |
-
if response_obj is not None and hasattr(response_obj, 'text'): response_text = response_obj.text
|
107 |
-
logging.error(f"Error decoding JSON for names from {request_url_for_logging}: {json_err}. Response text: {response_text}")
|
108 |
except Exception as e:
|
109 |
-
logging.error(f"Unexpected error
|
110 |
-
if response_obj is not None:
|
111 |
-
logging.error(f"Associated Response (if any) Status: {getattr(response_obj, 'status_code', 'N/A')}, Text: {getattr(response_obj, 'text', 'N/A')}")
|
112 |
return mapping
|
113 |
|
114 |
def get_functions_map(session):
|
|
|
115 |
url = f"{API_V2_BASE}/functions"
|
116 |
-
params = {
|
117 |
logging.info("Fetching all LinkedIn functions.")
|
118 |
return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
|
119 |
|
120 |
def get_seniorities_map(session):
|
|
|
121 |
url = f"{API_V2_BASE}/seniorities"
|
122 |
-
params = {
|
123 |
logging.info("Fetching all LinkedIn seniorities.")
|
124 |
return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
|
125 |
|
126 |
def get_industries_map(session, industry_urns, version="DEFAULT"):
|
|
|
|
|
127 |
industry_ids = [_parse_urn_to_id(urn) for urn in industry_urns or []]
|
128 |
unique_ids = set(filter(None, industry_ids))
|
129 |
if not unique_ids:
|
130 |
return {}
|
131 |
|
|
|
132 |
url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
|
133 |
-
params = {
|
134 |
-
|
135 |
-
|
136 |
-
|
|
|
|
|
137 |
try:
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
prepared_req = session.prepare_request(req)
|
142 |
-
request_url_for_logging = prepared_req.url
|
143 |
-
logging.debug(f"Requesting all industries from URL: {request_url_for_logging}")
|
144 |
-
logging.debug(f"Request headers for get_industries_map: {json.dumps(dict(prepared_req.headers), indent=2)}")
|
145 |
-
|
146 |
-
response_obj = session.send(prepared_req, timeout=30)
|
147 |
-
response_obj.raise_for_status()
|
148 |
-
data = response_obj.json()
|
149 |
elements = data.get('elements', [])
|
150 |
|
151 |
mapping = {}
|
152 |
for el in elements:
|
153 |
el_id = el.get('id')
|
154 |
if el_id and str(el_id) in unique_ids:
|
155 |
-
|
|
|
|
|
|
|
156 |
if name:
|
157 |
mapping[str(el_id)] = name
|
158 |
else:
|
159 |
-
logging.warning(f"Industry {el_id} has no en_US name field
|
160 |
return mapping
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
error_text = http_err.response.text
|
166 |
-
response_headers = dict(http_err.response.headers)
|
167 |
-
logging.error(f"HTTP error fetching all industries from {request_url_for_logging} (Status: {status_code}): {error_text}")
|
168 |
-
logging.error(f"Response Headers: {json.dumps(response_headers, indent=2)}")
|
169 |
-
return {}
|
170 |
-
except requests.exceptions.RequestException as req_err:
|
171 |
-
logging.error(f"Request error fetching all industries from {request_url_for_logging}: {str(req_err)}")
|
172 |
-
if req_err.response is not None:
|
173 |
-
logging.error(f"Associated Response Status: {req_err.response.status_code}, Text: {req_err.response.text}, Headers: {json.dumps(dict(req_err.response.headers), indent=2)}")
|
174 |
-
return {}
|
175 |
-
except json.JSONDecodeError as json_err:
|
176 |
-
response_text = "Not available"
|
177 |
-
if response_obj is not None and hasattr(response_obj, 'text'): response_text = response_obj.text
|
178 |
-
logging.error(f"Error decoding JSON for industries from {request_url_for_logging}: {json_err}. Response text: {response_text}")
|
179 |
-
return {}
|
180 |
-
except Exception as e:
|
181 |
-
logging.error(f"Unexpected error fetching all industries from {request_url_for_logging}: {e}", exc_info=True)
|
182 |
-
if response_obj is not None:
|
183 |
-
logging.error(f"Associated Response (if any) Status: {getattr(response_obj, 'status_code', 'N/A')}, Text: {getattr(response_obj, 'text', 'N/A')}")
|
184 |
return {}
|
185 |
|
|
|
|
|
186 |
def get_geo_map(session, geo_urns):
|
|
|
187 |
if not geo_urns: return {}
|
188 |
geo_ids = [_parse_urn_to_id(urn) for urn in geo_urns if urn]
|
189 |
unique_ids = list(set(filter(None, geo_ids)))
|
190 |
if not unique_ids: return {}
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
for i in range(0, len(unique_ids), MAX_GEO_IDS_PER_CALL):
|
196 |
-
chunk_ids = unique_ids[i:i + MAX_GEO_IDS_PER_CALL]
|
197 |
-
if not chunk_ids: continue
|
198 |
|
199 |
-
|
200 |
-
|
|
|
|
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
logging.info(f"Fetching names for {len(chunk_ids)} geo IDs (chunk {i//MAX_GEO_IDS_PER_CALL + 1})")
|
206 |
-
chunk_mapping = _fetch_linkedin_names(session, url, geo_params, ["results"], ["defaultLocalizedName", "value"])
|
207 |
-
all_geo_mappings.update(chunk_mapping)
|
208 |
-
|
209 |
-
return all_geo_mappings
|
210 |
|
211 |
|
212 |
def _parse_urn_to_id(urn_string):
|
|
|
213 |
if not isinstance(urn_string, str):
|
214 |
logging.debug(f"Invalid URN type: {type(urn_string)}, value: {urn_string}. Cannot parse ID.")
|
215 |
return None
|
216 |
try:
|
217 |
return urn_string.split(':')[-1]
|
218 |
-
except IndexError:
|
219 |
logging.warning(f"Could not parse ID from URN: {urn_string}")
|
220 |
return None
|
221 |
-
except Exception as e:
|
222 |
logging.error(f"Unexpected error parsing URN {urn_string}: {e}")
|
223 |
return None
|
224 |
|
225 |
# --- Follower Data Fetching Functions ---
|
226 |
|
227 |
-
def fetch_monthly_follower_gains(
|
228 |
"""
|
229 |
-
Fetches monthly follower gains
|
|
|
230 |
"""
|
231 |
-
|
232 |
-
# Calculate start of the month, approximately 12 months ago
|
233 |
-
start_of_reporting_period = (now_utc - timedelta(days=365)).replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
234 |
-
start_ms = int(start_of_reporting_period.timestamp() * 1000)
|
235 |
-
|
236 |
-
base_url = f"{api_rest_base_url}/organizationalEntityFollowerStatistics"
|
237 |
-
# Using the same timeIntervals format as in the original failing log, as it's standard for REST
|
238 |
-
time_intervals_value = f"(timeRange:(start:{start_ms}),timeGranularityType:MONTH)"
|
239 |
-
|
240 |
-
api_params = {
|
241 |
-
"q": "organizationalEntity",
|
242 |
-
"organizationalEntity": org_urn,
|
243 |
-
"timeIntervals": time_intervals_value
|
244 |
-
}
|
245 |
-
|
246 |
-
logging.info(f"Preparing to fetch monthly follower gains for {org_urn} using a dedicated session.")
|
247 |
-
logging.debug(f"API Parameters for monthly gains: {json.dumps(api_params)}")
|
248 |
-
logging.debug(f"Received client_id for dedicated session: {comm_client_id}")
|
249 |
-
logging.debug(f"Received token_dict for dedicated session (access_token type): {type(token_dict.get('access_token')) if token_dict else 'N/A'}")
|
250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
results = []
|
253 |
-
request_url_for_logging = "Not constructed"
|
254 |
-
response_obj = None
|
255 |
-
|
256 |
try:
|
257 |
-
|
258 |
-
#
|
259 |
-
|
260 |
-
rest_session = create_session(comm_client_id, token=token_dict)
|
261 |
-
rest_session.headers.update({
|
262 |
-
"LinkedIn-Version": LINKEDIN_API_VERSION
|
263 |
-
})
|
264 |
-
logging.info(f"Dedicated REST session created for monthly gains (id: {id(rest_session)}).")
|
265 |
-
logging.debug(f"Dedicated REST session token (type): {type(rest_session.token)}, Authorized: {rest_session.authorized}")
|
266 |
-
if rest_session.token and 'access_token' in rest_session.token:
|
267 |
-
logging.debug(f"Dedicated REST session access token (partial): {str(rest_session.token['access_token'])[:20]}...")
|
268 |
-
else:
|
269 |
-
logging.warning("Dedicated REST session: token is None or 'access_token' key is missing.")
|
270 |
-
|
271 |
-
|
272 |
-
req = requests.Request('GET', base_url, params=api_params)
|
273 |
-
prepared_req = rest_session.prepare_request(req) # Use the dedicated session
|
274 |
-
request_url_for_logging = prepared_req.url
|
275 |
-
|
276 |
-
logging.info(f"Requesting monthly follower gains from URL: {request_url_for_logging}")
|
277 |
-
# Log only essential headers, avoid logging full token if present in headers directly
|
278 |
-
headers_to_log = {k: v for k, v in prepared_req.headers.items() if k.lower() != 'authorization'}
|
279 |
-
logging.debug(f"Request Headers for monthly gains (excluding Authorization): {json.dumps(headers_to_log, indent=2)}")
|
280 |
-
|
281 |
-
|
282 |
-
response_obj = rest_session.send(prepared_req, timeout=30) # Use the dedicated session
|
283 |
-
response_obj.raise_for_status()
|
284 |
-
data = response_obj.json()
|
285 |
|
286 |
elements = data.get('elements', [])
|
287 |
if not elements:
|
288 |
-
logging.info(f"No 'elements' found in
|
289 |
|
290 |
for item in elements:
|
291 |
time_range = item.get('timeRange', {})
|
@@ -294,128 +228,104 @@ def fetch_monthly_follower_gains(comm_client_id, token_dict, org_urn, api_rest_b
|
|
294 |
logging.warning(f"Skipping item due to missing 'start' timestamp: {item}")
|
295 |
continue
|
296 |
|
|
|
297 |
date_obj = datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
|
298 |
date_str = date_obj.strftime('%Y-%m-%d')
|
299 |
|
300 |
gains = item.get('followerGains', {})
|
301 |
-
|
|
|
|
|
302 |
|
303 |
results.append({
|
304 |
-
'category_name': date_str,
|
305 |
'follower_count_organic': gains.get('organicFollowerGain', 0),
|
306 |
'follower_count_paid': gains.get('paidFollowerGain', 0),
|
307 |
'follower_count_type': 'follower_gains_monthly',
|
308 |
'organization_urn': org_urn
|
309 |
})
|
310 |
-
logging.info(f"Fetched {len(results)} monthly follower entries for {org_urn} starting from {
|
311 |
|
312 |
except requests.exceptions.HTTPError as http_err:
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
text = http_err.response.text
|
317 |
-
resp_headers = dict(http_err.response.headers)
|
318 |
logging.error(f"HTTP error fetching monthly gains for {org_urn}: {code} - {text}")
|
319 |
-
logging.error(f"Request URL
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
logging.error(f"Associated Response Text: {req_err.response.text}")
|
327 |
-
logging.error(f"Associated Response Headers: {json.dumps(dict(req_err.response.headers), indent=2)}")
|
328 |
-
except json.JSONDecodeError as json_err:
|
329 |
-
response_text = "Not available"
|
330 |
-
if response_obj is not None and hasattr(response_obj, 'text'):
|
331 |
-
response_text = response_obj.text
|
332 |
-
logging.error(f"Error decoding JSON for monthly follower gains for {org_urn}: {json_err}")
|
333 |
-
logging.error(f"Request URL was: {request_url_for_logging}")
|
334 |
-
logging.error(f"Raw Response Text: {response_text}")
|
335 |
except Exception as ex:
|
336 |
-
|
337 |
-
logging.error(f"
|
338 |
-
|
339 |
-
|
340 |
-
logging.error(f"Response Text (if available): {getattr(response_obj, 'text', 'N/A')}")
|
341 |
-
logging.error(f"Response Headers (if available): {json.dumps(dict(getattr(response_obj, 'headers', {})), indent=2)}")
|
342 |
-
|
343 |
return results
|
344 |
|
345 |
|
346 |
def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map):
|
|
|
|
|
|
|
347 |
final_demographics_results = []
|
348 |
-
|
349 |
-
params = {
|
|
|
|
|
|
|
|
|
350 |
|
351 |
-
logging.info(f"
|
352 |
-
logging.debug(f"API Parameters for demographics: {json.dumps(params)}")
|
353 |
-
request_url_for_logging = "Not constructed"
|
354 |
-
response_obj = None
|
355 |
|
356 |
try:
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
if session.token and 'access_token' in session.token:
|
361 |
-
logging.debug(f"fetch_follower_demographics: Access token (partial): {str(session.token['access_token'])[:20]}...")
|
362 |
-
else:
|
363 |
-
logging.warning("fetch_follower_demographics: session.token is None or 'access_token' key is missing before prepare_request.")
|
364 |
-
|
365 |
-
req = requests.Request('GET', base_url, params=params)
|
366 |
-
prepared_req = session.prepare_request(req)
|
367 |
-
request_url_for_logging = prepared_req.url
|
368 |
-
|
369 |
-
logging.info(f"Requesting follower demographics from URL: {request_url_for_logging}")
|
370 |
-
logging.debug(f"Request Headers for demographics: {json.dumps(dict(prepared_req.headers), indent=2)}")
|
371 |
-
|
372 |
-
response_obj = session.send(prepared_req, timeout=30)
|
373 |
-
response_obj.raise_for_status()
|
374 |
-
data = response_obj.json()
|
375 |
|
376 |
elements = data.get("elements", [])
|
377 |
if not elements:
|
378 |
-
logging.warning(f"No elements found in follower demographics response for {org_urn}.
|
379 |
return []
|
380 |
|
381 |
-
stat_element = elements[0]
|
382 |
-
if not stat_element:
|
383 |
-
logging.warning(f"Elements list is empty or stat_element is None in demographics response for {org_urn}. Response data: {json.dumps(data, indent=2)}")
|
384 |
-
return []
|
385 |
|
386 |
def _get_entries_for_type(raw_items_list, type_name, id_map, id_field_name_in_item, org_urn_val):
|
387 |
current_type_entries = []
|
388 |
if not raw_items_list:
|
389 |
logging.debug(f"No raw items for demographic type '{type_name}' for org {org_urn_val}.")
|
390 |
return current_type_entries
|
|
|
391 |
for item in raw_items_list:
|
392 |
category_name_val = "Unknown"
|
393 |
-
if type_name == "follower_association":
|
394 |
category_name_val = item.get(id_field_name_in_item, f"Unknown {id_field_name_in_item}")
|
395 |
else:
|
396 |
urn_val = item.get(id_field_name_in_item)
|
397 |
entity_id = _parse_urn_to_id(urn_val)
|
398 |
-
|
399 |
-
|
400 |
-
elif urn_val:
|
401 |
-
category_name_val = f"Unmapped {type_name.split('_')[-1].capitalize()} (URN: {urn_val})"
|
402 |
-
else:
|
403 |
-
category_name_val = f"Missing URN for {type_name.split('_')[-1].capitalize()}"
|
404 |
counts = item.get("followerCounts", {})
|
405 |
organic_count = counts.get("organicFollowerCount", 0)
|
406 |
-
paid_count = counts.get("paidFollowerCount", 0)
|
|
|
407 |
current_type_entries.append({
|
408 |
"category_name": category_name_val,
|
409 |
-
"follower_count_organic": organic_count,
|
410 |
-
"follower_count_paid": paid_count,
|
411 |
"follower_count_type": type_name,
|
412 |
"organization_urn": org_urn_val
|
413 |
})
|
414 |
return current_type_entries
|
|
|
415 |
industry_urns_to_map = [item.get("industry") for item in stat_element.get("followerCountsByIndustry", []) if item.get("industry")]
|
416 |
geo_urns_to_map = [item.get("geo") for item in stat_element.get("followerCountsByGeoCountry", []) if item.get("geo")]
|
417 |
-
|
418 |
-
|
|
|
|
|
419 |
demographic_configs = [
|
420 |
{"items_key": "followerCountsBySeniority", "type_name": "follower_seniority", "id_map": seniorities_map, "id_field": "seniority", "top_n": 10},
|
421 |
{"items_key": "followerCountsByFunction", "type_name": "follower_function", "id_map": functions_map, "id_field": "function", "top_n": 10},
|
@@ -423,123 +333,74 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
|
|
423 |
{"items_key": "followerCountsByGeoCountry", "type_name": "follower_geo", "id_map": live_geo_map, "id_field": "geo", "top_n": 10},
|
424 |
{"items_key": "followerCountsByAssociationType", "type_name": "follower_association", "id_map": {}, "id_field": "associationType", "top_n": None}
|
425 |
]
|
|
|
426 |
for config in demographic_configs:
|
427 |
raw_items = stat_element.get(config["items_key"], [])
|
428 |
processed_entries = _get_entries_for_type(raw_items, config["type_name"], config["id_map"], config["id_field"], org_urn)
|
|
|
429 |
if config["top_n"] is not None and processed_entries:
|
430 |
-
for entry in processed_entries:
|
431 |
if not isinstance(entry.get("follower_count_organic"), (int, float)):
|
432 |
-
logging.warning(f"Invalid organic follower count for sorting in {config['type_name']}: {entry['follower_count_organic']}. Setting to 0.")
|
433 |
entry["follower_count_organic"] = 0
|
434 |
sorted_entries = sorted(processed_entries, key=lambda x: x.get("follower_count_organic", 0), reverse=True)
|
435 |
final_demographics_results.extend(sorted_entries[:config["top_n"]])
|
436 |
logging.debug(f"Added top {config['top_n']} for {config['type_name']}. Count: {len(sorted_entries[:config['top_n']])}")
|
437 |
-
else:
|
438 |
-
final_demographics_results.extend(processed_entries)
|
439 |
logging.debug(f"Added all for {config['type_name']}. Count: {len(processed_entries)}")
|
|
|
440 |
logging.info(f"Processed follower demographics for {org_urn}. Total entries from all types: {len(final_demographics_results)}")
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
logging.error(f"
|
448 |
-
logging.error(f"Request URL was: {request_url_for_logging}")
|
449 |
-
logging.error(f"Response Headers: {json.dumps(resp_headers, indent=2)}")
|
450 |
-
except requests.exceptions.RequestException as req_err:
|
451 |
-
logging.error(f"RequestException fetching follower demographics for {org_urn}: {str(req_err)}")
|
452 |
-
logging.error(f"Request URL was: {request_url_for_logging}")
|
453 |
-
if req_err.response is not None:
|
454 |
-
logging.error(f"Associated Response Status: {req_err.response.status_code}")
|
455 |
-
logging.error(f"Associated Response Text: {req_err.response.text}")
|
456 |
-
logging.error(f"Associated Response Headers: {json.dumps(dict(req_err.response.headers), indent=2)}")
|
457 |
-
except json.JSONDecodeError as json_err:
|
458 |
-
response_text = "Not available"
|
459 |
-
if response_obj is not None and hasattr(response_obj, 'text'):
|
460 |
-
response_text = response_obj.text
|
461 |
-
logging.error(f"Error decoding JSON for follower demographics for {org_urn}: {json_err}")
|
462 |
-
logging.error(f"Request URL was: {request_url_for_logging}")
|
463 |
-
logging.error(f"Raw Response Text: {response_text}")
|
464 |
except Exception as e:
|
465 |
logging.error(f"Unexpected error fetching follower demographics for {org_urn}: {e}", exc_info=True)
|
466 |
-
logging.error(f"Request URL was: {request_url_for_logging}")
|
467 |
-
if response_obj is not None:
|
468 |
-
logging.error(f"Response Status (if available): {getattr(response_obj, 'status_code', 'N/A')}")
|
469 |
-
logging.error(f"Response Text (if available): {getattr(response_obj, 'text', 'N/A')}")
|
470 |
-
logging.error(f"Response Headers (if available): {json.dumps(dict(getattr(response_obj, 'headers', {})), indent=2)}")
|
471 |
return final_demographics_results
|
472 |
|
473 |
# --- Main Orchestration Function ---
|
474 |
|
475 |
def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
if isinstance(community_token, str) and len(community_token) > 10:
|
481 |
-
logging.debug(f"Received community_token (partial): {community_token[:10]}...")
|
482 |
-
elif isinstance(community_token, dict):
|
483 |
-
logging.debug(f"Received community_token (dict): { {k: (v[:10] + '...' if k == 'access_token' and isinstance(v, str) and len(v)>10 else v) for k,v in community_token.items()} }")
|
484 |
-
|
485 |
if not all([comm_client_id, community_token, org_urn]):
|
486 |
-
logging.error("Client ID,
|
487 |
-
if not comm_client_id: logging.error("comm_client_id is missing.")
|
488 |
-
if not community_token: logging.error("community_token is missing or empty.")
|
489 |
-
if not org_urn: logging.error("org_urn is missing.")
|
490 |
return []
|
491 |
|
492 |
-
token_dict = community_token if isinstance(community_token, dict) else {'access_token':
|
493 |
|
494 |
-
|
495 |
-
logging.error("Failed to construct a valid token_dict: 'access_token' is empty.")
|
496 |
-
logging.debug(f"Problematic token_dict: {token_dict}")
|
497 |
-
return []
|
498 |
-
|
499 |
-
logging.debug(f"Constructed token_dict for session: { {k: (v[:10] + '...' if k == 'access_token' and isinstance(v, str) and len(v)>10 else v) for k,v in token_dict.items()} }")
|
500 |
-
|
501 |
-
session = None
|
502 |
try:
|
503 |
-
session = create_session(comm_client_id, token=token_dict)
|
504 |
session.headers.update({
|
505 |
"X-Restli-Protocol-Version": "2.0.0",
|
506 |
-
"LinkedIn-Version": LINKEDIN_API_VERSION,
|
507 |
-
"Accept-Language": "en_US"
|
508 |
})
|
509 |
-
logging.info(f"Session (id: {id(session)}) created and headers updated for org {org_urn}.")
|
510 |
-
logging.debug(f"get_linkedin_follower_stats (id: {id(session)}): Session token after creation: {session.token}, Session authorized: {session.authorized}, Expires at: {session.token.get('expires_at') if session.token else 'N/A'}")
|
511 |
-
logging.debug(f"get_linkedin_follower_stats (id: {id(session)}): Session auth object: type={type(session.auth)}, value={session.auth}")
|
512 |
-
logging.debug(f"get_linkedin_follower_stats (id: {id(session)}): Auto-refresh URL: {session.auto_refresh_url}, Auto-refresh kwargs: {session.auto_refresh_kwargs}")
|
513 |
-
|
514 |
-
if session.token and 'access_token' in session.token:
|
515 |
-
logging.debug(f"get_linkedin_follower_stats: Access token in session (partial): {str(session.token['access_token'])[:20]}...")
|
516 |
-
else:
|
517 |
-
logging.warning("get_linkedin_follower_stats: session.token is None or 'access_token' key is missing after session creation.")
|
518 |
-
|
519 |
-
|
520 |
except Exception as e:
|
521 |
logging.error(f"Failed to create session or update headers for org {org_urn}: {e}", exc_info=True)
|
522 |
-
return []
|
523 |
|
524 |
-
logging.info(f"Starting follower stats retrieval for org: {org_urn}
|
525 |
|
526 |
-
functions_map = get_functions_map(session)
|
527 |
-
seniorities_map = get_seniorities_map(session)
|
528 |
|
529 |
-
if not functions_map: logging.warning(f"Functions map is empty for org {org_urn}.")
|
530 |
-
if not seniorities_map: logging.warning(f"Seniorities map is empty for org {org_urn}.")
|
531 |
|
532 |
all_follower_data = []
|
533 |
|
534 |
-
monthly_gains = fetch_monthly_follower_gains(
|
535 |
all_follower_data.extend(monthly_gains)
|
536 |
|
537 |
-
demographics = fetch_follower_demographics(session, org_urn, functions_map, seniorities_map)
|
538 |
all_follower_data.extend(demographics)
|
539 |
|
540 |
-
|
541 |
-
|
542 |
-
else:
|
543 |
-
logging.info(f"Successfully compiled {len(all_follower_data)} total follower stat entries for {org_urn}.")
|
544 |
-
|
545 |
-
return all_follower_data
|
|
|
|
|
1 |
import json
|
2 |
import requests
|
3 |
import logging
|
4 |
from datetime import datetime, timezone, timedelta
|
5 |
+
from urllib.parse import quote
|
|
|
6 |
|
7 |
# Assuming you have a sessions.py with create_session
|
8 |
# If sessions.py or create_session is not found, it will raise an ImportError,
|
9 |
# which is appropriate for a module that depends on it.
|
10 |
+
from sessions import create_session
|
11 |
|
12 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
13 |
|
14 |
API_V2_BASE = 'https://api.linkedin.com/v2'
|
15 |
API_REST_BASE = "https://api.linkedin.com/rest"
|
16 |
+
LINKEDIN_API_VERSION = "202502" # As per user's example for follower stats
|
17 |
|
18 |
# --- ID to Name Mapping Helper Functions ---
|
19 |
|
20 |
def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path, id_key="id"):
|
21 |
"""
|
22 |
Generic helper to fetch and map IDs to names from a LinkedIn API endpoint.
|
23 |
+
result_key_path: list of keys to navigate to the list of items (e.g., ["elements"])
|
24 |
+
name_key_path: list of keys to navigate to the name within an item (e.g., ["name", "localized", "en_US"])
|
25 |
"""
|
26 |
mapping = {}
|
|
|
|
|
27 |
try:
|
28 |
+
logging.debug(f"Fetching names from URL: {url} with params: {json.dumps(params)}") # Log params for clarity
|
29 |
+
response = session.get(url, params=params)
|
30 |
+
response.raise_for_status()
|
31 |
+
data = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
items = data
|
34 |
+
for key in result_key_path:
|
35 |
if isinstance(items, dict):
|
36 |
+
items = items.get(key, [])
|
37 |
+
else:
|
38 |
+
logging.warning(f"Expected dict to get key '{key}' but got {type(items)} at path {result_key_path} for URL {url}. Check result_key_path.")
|
39 |
+
return mapping
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
if isinstance(items, dict):
|
42 |
for item_id_str, item_data in items.items():
|
43 |
name = item_data
|
44 |
+
for key_nav in name_key_path:
|
45 |
if isinstance(name, dict):
|
46 |
name = name.get(key_nav)
|
47 |
else:
|
48 |
+
name = None
|
49 |
break
|
50 |
if name:
|
51 |
+
mapping[item_id_str] = name
|
52 |
else:
|
53 |
+
logging.warning(f"No name found for ID {item_id_str} at path {name_key_path} in item: {item_data} from URL {url}")
|
54 |
elif isinstance(items, list):
|
55 |
for item in items:
|
56 |
item_id_val = item.get(id_key)
|
57 |
name = item
|
58 |
+
for key_nav in name_key_path:
|
59 |
if isinstance(name, dict):
|
60 |
name = name.get(key_nav)
|
61 |
else:
|
62 |
+
name = None
|
63 |
break
|
64 |
if item_id_val is not None and name:
|
65 |
+
mapping[str(item_id_val)] = name
|
66 |
else:
|
67 |
+
logging.warning(f"No ID ('{id_key}') or name found at path {name_key_path} in item: {item} from URL {url}")
|
|
|
|
|
|
|
68 |
else:
|
69 |
+
logging.warning(f"Expected list or dict of items at {result_key_path} from URL {url}, got {type(items)}")
|
70 |
+
|
71 |
+
except requests.exceptions.RequestException as e:
|
72 |
+
status_code = getattr(e.response, 'status_code', 'N/A')
|
73 |
+
error_text = getattr(e.response, 'text', str(e)) # Log the raw error text
|
74 |
+
logging.error(f"Error fetching names from {url} (Status: {status_code}): {error_text}")
|
75 |
+
except json.JSONDecodeError as e:
|
76 |
+
logging.error(f"Error decoding JSON for names from {url}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
except Exception as e:
|
78 |
+
logging.error(f"Unexpected error fetching names from {url}: {e}", exc_info=True)
|
|
|
|
|
79 |
return mapping
|
80 |
|
81 |
def get_functions_map(session):
|
82 |
+
"""Fetches all LinkedIn functions and returns a map of {id: name}."""
|
83 |
url = f"{API_V2_BASE}/functions"
|
84 |
+
params = {} # Relies on Accept-Language header from session
|
85 |
logging.info("Fetching all LinkedIn functions.")
|
86 |
return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
|
87 |
|
88 |
def get_seniorities_map(session):
|
89 |
+
"""Fetches all LinkedIn seniorities and returns a map of {id: name}."""
|
90 |
url = f"{API_V2_BASE}/seniorities"
|
91 |
+
params = {} # Relies on Accept-Language header from session
|
92 |
logging.info("Fetching all LinkedIn seniorities.")
|
93 |
return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
|
94 |
|
95 |
def get_industries_map(session, industry_urns, version="DEFAULT"):
|
96 |
+
"""Fetches names for a list of industry URNs by pulling ALL industries and filtering locally."""
|
97 |
+
# parse and dedupe IDs
|
98 |
industry_ids = [_parse_urn_to_id(urn) for urn in industry_urns or []]
|
99 |
unique_ids = set(filter(None, industry_ids))
|
100 |
if not unique_ids:
|
101 |
return {}
|
102 |
|
103 |
+
# we'll page through the full list; LinkedIn defaults to 10, so bump count
|
104 |
url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
|
105 |
+
params = {
|
106 |
+
'start': 0,
|
107 |
+
'count': 500 # should exceed total # of industries
|
108 |
+
}
|
109 |
+
|
110 |
+
logging.info(f"Fetching all industries (to filter {len(unique_ids)} IDs) from {url}")
|
111 |
try:
|
112 |
+
response = session.get(url, params=params)
|
113 |
+
response.raise_for_status()
|
114 |
+
data = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
elements = data.get('elements', [])
|
116 |
|
117 |
mapping = {}
|
118 |
for el in elements:
|
119 |
el_id = el.get('id')
|
120 |
if el_id and str(el_id) in unique_ids:
|
121 |
+
# drill into name.localized.en_US
|
122 |
+
name = el.get('name', {}) \
|
123 |
+
.get('localized', {}) \
|
124 |
+
.get('en_US')
|
125 |
if name:
|
126 |
mapping[str(el_id)] = name
|
127 |
else:
|
128 |
+
logging.warning(f"Industry {el_id} has no en_US name field")
|
129 |
return mapping
|
130 |
+
|
131 |
+
except requests.exceptions.RequestException as e:
|
132 |
+
status_code = getattr(e.response, 'status_code', 'N/A')
|
133 |
+
logging.error(f"Error fetching all industries: {status_code} – {getattr(e.response, 'text', str(e))}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
return {}
|
135 |
|
136 |
+
|
137 |
+
|
138 |
def get_geo_map(session, geo_urns):
|
139 |
+
"""Fetches names for a list of geo URNs. Returns a map {id: name}."""
|
140 |
if not geo_urns: return {}
|
141 |
geo_ids = [_parse_urn_to_id(urn) for urn in geo_urns if urn]
|
142 |
unique_ids = list(set(filter(None, geo_ids)))
|
143 |
if not unique_ids: return {}
|
144 |
|
145 |
+
# As per LinkedIn docs for BATCH_GET: ids=List(12345,23456)&locale=(language:en,country:US)
|
146 |
+
ids_param_string = "List(" + ",".join(map(str, unique_ids)) + ")"
|
147 |
+
locale_param_string = "(language:en,country:US)" # Must be exactly this string format
|
|
|
|
|
|
|
148 |
|
149 |
+
# Parameters must be passed in the URL string directly for this specific API format
|
150 |
+
# The `params` dict for session.get() will be empty.
|
151 |
+
url = f"{API_V2_BASE}/geo?ids={ids_param_string}&locale={locale_param_string}"
|
152 |
+
#url = f"{API_V2_BASE}/geo?ids=List({','.join(map(str, unique_ids))})&locale=(language:en,country:US)"
|
153 |
|
154 |
+
|
155 |
+
logging.info(f"Fetching names for {len(unique_ids)} unique geo IDs using URL: {url}")
|
156 |
+
return _fetch_linkedin_names(session, url, {}, ["results"], ["defaultLocalizedName", "value"])
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
|
159 |
def _parse_urn_to_id(urn_string):
|
160 |
+
"""Helper to get the last part (ID) from a URN string."""
|
161 |
if not isinstance(urn_string, str):
|
162 |
logging.debug(f"Invalid URN type: {type(urn_string)}, value: {urn_string}. Cannot parse ID.")
|
163 |
return None
|
164 |
try:
|
165 |
return urn_string.split(':')[-1]
|
166 |
+
except IndexError:
|
167 |
logging.warning(f"Could not parse ID from URN: {urn_string}")
|
168 |
return None
|
169 |
+
except Exception as e:
|
170 |
logging.error(f"Unexpected error parsing URN {urn_string}: {e}")
|
171 |
return None
|
172 |
|
173 |
# --- Follower Data Fetching Functions ---
|
174 |
|
175 |
+
def fetch_monthly_follower_gains(session, org_urn, api_rest_base):
|
176 |
"""
|
177 |
+
Fetches monthly follower gains for the last 12 full months.
|
178 |
+
The start date is set to the first day of the month, 12 months prior to the current month, at midnight UTC.
|
179 |
"""
|
180 |
+
# now = datetime.now()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
+
# twelve_months_ago = now - timedelta(days=365)
|
183 |
+
# twelve_months_ago = twelve_months_ago.replace(day=1)
|
184 |
+
|
185 |
+
# start_date = int(twelve_months_ago.timestamp() * 1000)
|
186 |
+
|
187 |
+
# # Build URL with explicit query string
|
188 |
+
# url = (
|
189 |
+
# f"{api_rest_base}/organizationalEntityFollowerStatistics"
|
190 |
+
# f"?q=organizationalEntity"
|
191 |
+
# f"&organizationalEntity={org_urn}"
|
192 |
+
# f"&timeIntervals.timeGranularityType=MONTH"
|
193 |
+
# f"&timeIntervals.timeRange.start={start_date}"
|
194 |
+
# # LinkedIn defaults the end of the timeRange to the current time if not specified.
|
195 |
+
# )
|
196 |
+
# logging.info(f"Fetching monthly follower gains from URL: {url}")
|
197 |
+
|
198 |
+
now = datetime.now(timezone.utc)
|
199 |
+
twelve_months_ago = now - timedelta(days=365)
|
200 |
+
twelve_months_ago = twelve_months_ago.replace(day=1)
|
201 |
+
|
202 |
+
start_date = int(twelve_months_ago.timestamp() * 1000)
|
203 |
+
end_date = int(now.timestamp() * 1000) # You could omit this if letting LinkedIn default
|
204 |
+
|
205 |
+
time_intervals = f"(timeRange:(start:{start_date},end:{end_date}),timeGranularityType:MONTH)"
|
206 |
+
|
207 |
+
url = (
|
208 |
+
f"{api_rest_base}/organizationalEntityFollowerStatistics"
|
209 |
+
f"?q=organizationalEntity"
|
210 |
+
f"&organizationalEntity={org_urn}"
|
211 |
+
f"&timeIntervals={time_intervals}"
|
212 |
+
)
|
213 |
|
214 |
results = []
|
|
|
|
|
|
|
215 |
try:
|
216 |
+
response = session.get(url)
|
217 |
+
response.raise_for_status() # Raises an HTTPError for bad responses (4XX or 5XX)
|
218 |
+
data = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
elements = data.get('elements', [])
|
221 |
if not elements:
|
222 |
+
logging.info(f"No 'elements' found in API response for {org_urn} for start_ms {start_ms}.")
|
223 |
|
224 |
for item in elements:
|
225 |
time_range = item.get('timeRange', {})
|
|
|
228 |
logging.warning(f"Skipping item due to missing 'start' timestamp: {item}")
|
229 |
continue
|
230 |
|
231 |
+
# Convert timestamp (milliseconds) to YYYY-MM-DD date string in UTC
|
232 |
date_obj = datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
|
233 |
date_str = date_obj.strftime('%Y-%m-%d')
|
234 |
|
235 |
gains = item.get('followerGains', {})
|
236 |
+
# It's possible 'followerGains' itself is missing or None
|
237 |
+
if gains is None:
|
238 |
+
gains = {} # Ensure gains is a dict to prevent error on .get()
|
239 |
|
240 |
results.append({
|
241 |
+
'category_name': date_str, # This is the start date of the month's data
|
242 |
'follower_count_organic': gains.get('organicFollowerGain', 0),
|
243 |
'follower_count_paid': gains.get('paidFollowerGain', 0),
|
244 |
'follower_count_type': 'follower_gains_monthly',
|
245 |
'organization_urn': org_urn
|
246 |
})
|
247 |
+
logging.info(f"Fetched {len(results)} monthly follower entries for {org_urn} starting from {start_of_period.strftime('%Y-%m-%d')}.")
|
248 |
|
249 |
except requests.exceptions.HTTPError as http_err:
|
250 |
+
# More specific error for HTTP errors
|
251 |
+
code = getattr(http_err.response, 'status_code', 'N/A')
|
252 |
+
text = getattr(http_err.response, 'text', str(http_err))
|
|
|
|
|
253 |
logging.error(f"HTTP error fetching monthly gains for {org_urn}: {code} - {text}")
|
254 |
+
logging.error(f"Request URL: {url}")
|
255 |
+
except requests.exceptions.RequestException as e:
|
256 |
+
# Catch other request-related errors (e.g., connection issues)
|
257 |
+
code = getattr(e.response, 'status_code', 'N/A') if e.response is not None else 'N/A'
|
258 |
+
text = getattr(e.response, 'text', str(e)) if e.response is not None else str(e)
|
259 |
+
logging.error(f"Error fetching monthly gains for {org_urn}: {code} - {text}")
|
260 |
+
logging.error(f"Request URL: {url}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
except Exception as ex:
|
262 |
+
# Catch any other unexpected errors (e.g., JSON parsing if response is not JSON)
|
263 |
+
logging.error(f"An unexpected error occurred while fetching monthly gains for {org_urn}: {str(ex)}")
|
264 |
+
logging.error(f"Request URL: {url}")
|
265 |
+
|
|
|
|
|
|
|
266 |
return results
|
267 |
|
268 |
|
269 |
def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map):
|
270 |
+
"""
|
271 |
+
Fetches current follower demographics, applying Top-N for specified categories.
|
272 |
+
"""
|
273 |
final_demographics_results = []
|
274 |
+
# Parameters for the main demographics call
|
275 |
+
params = {
|
276 |
+
'q': 'organizationalEntity',
|
277 |
+
'organizationalEntity': org_urn
|
278 |
+
}
|
279 |
+
url = f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
|
280 |
|
281 |
+
logging.info(f"Fetching follower demographics from: {url} for org URN {org_urn} with params: {json.dumps(params)}")
|
|
|
|
|
|
|
282 |
|
283 |
try:
|
284 |
+
response = session.get(url, params=params)
|
285 |
+
response.raise_for_status()
|
286 |
+
data = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
elements = data.get("elements", [])
|
289 |
if not elements:
|
290 |
+
logging.warning(f"No elements found in follower demographics response for {org_urn}.")
|
291 |
return []
|
292 |
|
293 |
+
stat_element = elements[0]
|
|
|
|
|
|
|
294 |
|
295 |
def _get_entries_for_type(raw_items_list, type_name, id_map, id_field_name_in_item, org_urn_val):
|
296 |
current_type_entries = []
|
297 |
if not raw_items_list:
|
298 |
logging.debug(f"No raw items for demographic type '{type_name}' for org {org_urn_val}.")
|
299 |
return current_type_entries
|
300 |
+
|
301 |
for item in raw_items_list:
|
302 |
category_name_val = "Unknown"
|
303 |
+
if type_name == "follower_association":
|
304 |
category_name_val = item.get(id_field_name_in_item, f"Unknown {id_field_name_in_item}")
|
305 |
else:
|
306 |
urn_val = item.get(id_field_name_in_item)
|
307 |
entity_id = _parse_urn_to_id(urn_val)
|
308 |
+
category_name_val = id_map.get(str(entity_id), f"Unknown {type_name.split('_')[-1].capitalize()} (ID: {entity_id if entity_id else urn_val})")
|
309 |
+
|
|
|
|
|
|
|
|
|
310 |
counts = item.get("followerCounts", {})
|
311 |
organic_count = counts.get("organicFollowerCount", 0)
|
312 |
+
paid_count = counts.get("paidFollowerCount", 0)
|
313 |
+
|
314 |
current_type_entries.append({
|
315 |
"category_name": category_name_val,
|
316 |
+
"follower_count_organic": organic_count,
|
317 |
+
"follower_count_paid": paid_count,
|
318 |
"follower_count_type": type_name,
|
319 |
"organization_urn": org_urn_val
|
320 |
})
|
321 |
return current_type_entries
|
322 |
+
|
323 |
industry_urns_to_map = [item.get("industry") for item in stat_element.get("followerCountsByIndustry", []) if item.get("industry")]
|
324 |
geo_urns_to_map = [item.get("geo") for item in stat_element.get("followerCountsByGeoCountry", []) if item.get("geo")]
|
325 |
+
|
326 |
+
live_industries_map = get_industries_map(session, industry_urns_to_map)
|
327 |
+
live_geo_map = get_geo_map(session, geo_urns_to_map)
|
328 |
+
|
329 |
demographic_configs = [
|
330 |
{"items_key": "followerCountsBySeniority", "type_name": "follower_seniority", "id_map": seniorities_map, "id_field": "seniority", "top_n": 10},
|
331 |
{"items_key": "followerCountsByFunction", "type_name": "follower_function", "id_map": functions_map, "id_field": "function", "top_n": 10},
|
|
|
333 |
{"items_key": "followerCountsByGeoCountry", "type_name": "follower_geo", "id_map": live_geo_map, "id_field": "geo", "top_n": 10},
|
334 |
{"items_key": "followerCountsByAssociationType", "type_name": "follower_association", "id_map": {}, "id_field": "associationType", "top_n": None}
|
335 |
]
|
336 |
+
|
337 |
for config in demographic_configs:
|
338 |
raw_items = stat_element.get(config["items_key"], [])
|
339 |
processed_entries = _get_entries_for_type(raw_items, config["type_name"], config["id_map"], config["id_field"], org_urn)
|
340 |
+
|
341 |
if config["top_n"] is not None and processed_entries:
|
342 |
+
for entry in processed_entries:
|
343 |
if not isinstance(entry.get("follower_count_organic"), (int, float)):
|
|
|
344 |
entry["follower_count_organic"] = 0
|
345 |
sorted_entries = sorted(processed_entries, key=lambda x: x.get("follower_count_organic", 0), reverse=True)
|
346 |
final_demographics_results.extend(sorted_entries[:config["top_n"]])
|
347 |
logging.debug(f"Added top {config['top_n']} for {config['type_name']}. Count: {len(sorted_entries[:config['top_n']])}")
|
348 |
+
else:
|
349 |
+
final_demographics_results.extend(processed_entries)
|
350 |
logging.debug(f"Added all for {config['type_name']}. Count: {len(processed_entries)}")
|
351 |
+
|
352 |
logging.info(f"Processed follower demographics for {org_urn}. Total entries from all types: {len(final_demographics_results)}")
|
353 |
+
|
354 |
+
except requests.exceptions.RequestException as e:
|
355 |
+
status_code = getattr(e.response, 'status_code', 'N/A')
|
356 |
+
error_text = getattr(e.response, 'text', str(e))
|
357 |
+
logging.error(f"Error fetching follower demographics for {org_urn} (Status: {status_code}): {error_text}")
|
358 |
+
except json.JSONDecodeError as e:
|
359 |
+
logging.error(f"Error decoding JSON for follower demographics for {org_urn}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
except Exception as e:
|
361 |
logging.error(f"Unexpected error fetching follower demographics for {org_urn}: {e}", exc_info=True)
|
|
|
|
|
|
|
|
|
|
|
362 |
return final_demographics_results
|
363 |
|
364 |
# --- Main Orchestration Function ---
|
365 |
|
366 |
def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
|
367 |
+
"""
|
368 |
+
Main function to fetch all follower statistics (monthly gains and demographics)
|
369 |
+
and format them for Bubble.
|
370 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
371 |
if not all([comm_client_id, community_token, org_urn]):
|
372 |
+
logging.error("Client ID, token, or Organization URN is missing for get_linkedin_follower_stats.")
|
|
|
|
|
|
|
373 |
return []
|
374 |
|
375 |
+
token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'}
|
376 |
|
377 |
+
session = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
try:
|
379 |
+
session = create_session(comm_client_id, token=token_dict)
|
380 |
session.headers.update({
|
381 |
"X-Restli-Protocol-Version": "2.0.0",
|
382 |
+
"LinkedIn-Version": LINKEDIN_API_VERSION,
|
383 |
+
"Accept-Language": "en_US" # Explicitly set for v2 name lookups if not default in session
|
384 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
except Exception as e:
|
386 |
logging.error(f"Failed to create session or update headers for org {org_urn}: {e}", exc_info=True)
|
387 |
+
return []
|
388 |
|
389 |
+
logging.info(f"Starting follower stats retrieval for org: {org_urn}")
|
390 |
|
391 |
+
functions_map = get_functions_map(session)
|
392 |
+
seniorities_map = get_seniorities_map(session)
|
393 |
|
394 |
+
if not functions_map: logging.warning(f"Functions map is empty for org {org_urn}. Function names might not be resolved.")
|
395 |
+
if not seniorities_map: logging.warning(f"Seniorities map is empty for org {org_urn}. Seniority names might not be resolved.")
|
396 |
|
397 |
all_follower_data = []
|
398 |
|
399 |
+
monthly_gains = fetch_monthly_follower_gains(session, org_urn, API_REST_BASE)
|
400 |
all_follower_data.extend(monthly_gains)
|
401 |
|
402 |
+
demographics = fetch_follower_demographics(session, org_urn, functions_map, seniorities_map)
|
403 |
all_follower_data.extend(demographics)
|
404 |
|
405 |
+
logging.info(f"Successfully compiled {len(all_follower_data)} total follower stat entries for {org_urn}.")
|
406 |
+
return all_follower_data
|
|
|
|
|
|
|
|