GuglielmoTor commited on
Commit
159e669
·
verified ·
1 Parent(s): eaea9c5

Delete apis/linkedin_follower_stats.py

Browse files
Files changed (1) hide show
  1. apis/linkedin_follower_stats.py +0 -415
apis/linkedin_follower_stats.py DELETED
@@ -1,415 +0,0 @@
1
- import json
2
- import requests
3
- import logging
4
- from datetime import datetime, timezone, timedelta
5
- from urllib.parse import quote
6
-
7
- # Assuming you have a sessions.py with create_session
8
- # If sessions.py or create_session is not found, it will raise an ImportError,
9
- # which is appropriate for a module that depends on it.
10
- from utils.sessions import create_session
11
-
12
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
-
14
- API_V2_BASE = 'https://api.linkedin.com/v2'
15
- API_REST_BASE = "https://api.linkedin.com/rest"
16
- LINKEDIN_API_VERSION = "202502" # As per user's example for follower stats
17
-
18
- # --- ID to Name Mapping Helper Functions ---
19
-
20
- def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path, id_key="id"):
21
- """
22
- Generic helper to fetch and map IDs to names from a LinkedIn API endpoint.
23
- result_key_path: list of keys to navigate to the list of items (e.g., ["elements"])
24
- name_key_path: list of keys to navigate to the name within an item (e.g., ["name", "localized", "en_US"])
25
- """
26
- mapping = {}
27
- try:
28
- logging.debug(f"Fetching names from URL: {url} with params: {json.dumps(params)}") # Log params for clarity
29
- response = session.get(url, params=params)
30
- response.raise_for_status()
31
- data = response.json()
32
-
33
- items = data
34
- for key in result_key_path:
35
- if isinstance(items, dict):
36
- items = items.get(key, [])
37
- else:
38
- logging.warning(f"Expected dict to get key '{key}' but got {type(items)} at path {result_key_path} for URL {url}. Check result_key_path.")
39
- return mapping
40
-
41
- if isinstance(items, dict):
42
- for item_id_str, item_data in items.items():
43
- name = item_data
44
- for key_nav in name_key_path:
45
- if isinstance(name, dict):
46
- name = name.get(key_nav)
47
- else:
48
- name = None
49
- break
50
- if name:
51
- mapping[item_id_str] = name
52
- else:
53
- logging.warning(f"No name found for ID {item_id_str} at path {name_key_path} in item: {item_data} from URL {url}")
54
- elif isinstance(items, list):
55
- for item in items:
56
- item_id_val = item.get(id_key)
57
- name = item
58
- for key_nav in name_key_path:
59
- if isinstance(name, dict):
60
- name = name.get(key_nav)
61
- else:
62
- name = None
63
- break
64
- if item_id_val is not None and name:
65
- mapping[str(item_id_val)] = name
66
- else:
67
- logging.warning(f"No ID ('{id_key}') or name found at path {name_key_path} in item: {item} from URL {url}")
68
- else:
69
- logging.warning(f"Expected list or dict of items at {result_key_path} from URL {url}, got {type(items)}")
70
-
71
- except requests.exceptions.RequestException as e:
72
- status_code = getattr(e.response, 'status_code', 'N/A')
73
- error_text = getattr(e.response, 'text', str(e)) # Log the raw error text
74
- logging.error(f"Error fetching names from {url} (Status: {status_code}): {error_text}")
75
- except json.JSONDecodeError as e:
76
- logging.error(f"Error decoding JSON for names from {url}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
77
- except Exception as e:
78
- logging.error(f"Unexpected error fetching names from {url}: {e}", exc_info=True)
79
- return mapping
80
-
81
- def get_functions_map(session):
82
- """Fetches all LinkedIn functions and returns a map of {id: name}."""
83
- url = f"{API_V2_BASE}/functions"
84
- params = {} # Relies on Accept-Language header from session
85
- logging.info("Fetching all LinkedIn functions.")
86
- return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
87
-
88
- def get_seniorities_map(session):
89
- """Fetches all LinkedIn seniorities and returns a map of {id: name}."""
90
- url = f"{API_V2_BASE}/seniorities"
91
- params = {} # Relies on Accept-Language header from session
92
- logging.info("Fetching all LinkedIn seniorities.")
93
- return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
94
-
95
- def get_industries_map(session, industry_urns, version="DEFAULT"):
96
- """Fetches names for a list of industry URNs by pulling ALL industries and filtering locally."""
97
- # parse and dedupe IDs
98
- industry_ids = [_parse_urn_to_id(urn) for urn in industry_urns or []]
99
- unique_ids = set(filter(None, industry_ids))
100
- if not unique_ids:
101
- return {}
102
-
103
- # we'll page through the full list; LinkedIn defaults to 10, so bump count
104
- url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
105
- params = {
106
- 'start': 0,
107
- 'count': 500 # should exceed total # of industries
108
- }
109
-
110
- logging.info(f"Fetching all industries (to filter {len(unique_ids)} IDs) from {url}")
111
- try:
112
- response = session.get(url, params=params)
113
- response.raise_for_status()
114
- data = response.json()
115
- elements = data.get('elements', [])
116
-
117
- mapping = {}
118
- for el in elements:
119
- el_id = el.get('id')
120
- if el_id and str(el_id) in unique_ids:
121
- # drill into name.localized.en_US
122
- name = el.get('name', {}) \
123
- .get('localized', {}) \
124
- .get('en_US')
125
- if name:
126
- mapping[str(el_id)] = name
127
- else:
128
- logging.warning(f"Industry {el_id} has no en_US name field")
129
- return mapping
130
-
131
- except requests.exceptions.RequestException as e:
132
- status_code = getattr(e.response, 'status_code', 'N/A')
133
- logging.error(f"Error fetching all industries: {status_code} – {getattr(e.response, 'text', str(e))}")
134
- return {}
135
-
136
-
137
-
138
- def get_geo_map(session, geo_urns):
139
- """Fetches names for a list of geo URNs. Returns a map {id: name}."""
140
- if not geo_urns: return {}
141
- geo_ids = [_parse_urn_to_id(urn) for urn in geo_urns if urn]
142
- unique_ids = list(set(filter(None, geo_ids)))
143
- if not unique_ids: return {}
144
-
145
- # As per LinkedIn docs for BATCH_GET: ids=List(12345,23456)&locale=(language:en,country:US)
146
- ids_param_string = "List(" + ",".join(map(str, unique_ids)) + ")"
147
- locale_param_string = "(language:en,country:US)" # Must be exactly this string format
148
-
149
- # Parameters must be passed in the URL string directly for this specific API format
150
- # The `params` dict for session.get() will be empty.
151
- url = f"{API_V2_BASE}/geo?ids={ids_param_string}&locale={locale_param_string}"
152
- #url = f"{API_V2_BASE}/geo?ids=List({','.join(map(str, unique_ids))})&locale=(language:en,country:US)"
153
-
154
-
155
- logging.info(f"Fetching names for {len(unique_ids)} unique geo IDs using URL: {url}")
156
- return _fetch_linkedin_names(session, url, {}, ["results"], ["defaultLocalizedName", "value"])
157
-
158
-
159
- def _parse_urn_to_id(urn_string):
160
- """Helper to get the last part (ID) from a URN string."""
161
- if not isinstance(urn_string, str):
162
- logging.debug(f"Invalid URN type: {type(urn_string)}, value: {urn_string}. Cannot parse ID.")
163
- return None
164
- try:
165
- return urn_string.split(':')[-1]
166
- except IndexError:
167
- logging.warning(f"Could not parse ID from URN: {urn_string}")
168
- return None
169
- except Exception as e:
170
- logging.error(f"Unexpected error parsing URN {urn_string}: {e}")
171
- return None
172
-
173
- # --- Follower Data Fetching Functions ---
174
-
175
- def fetch_monthly_follower_gains(session, org_urn, api_rest_base):
176
- """
177
- Fetches monthly follower gains for the last 12 full months.
178
- The start date is set to the first day of the month, 12 months prior to the current month, at midnight UTC.
179
- """
180
- # now = datetime.now()
181
-
182
- # twelve_months_ago = now - timedelta(days=365)
183
- # twelve_months_ago = twelve_months_ago.replace(day=1)
184
-
185
- # start_date = int(twelve_months_ago.timestamp() * 1000)
186
-
187
- # # Build URL with explicit query string
188
- # url = (
189
- # f"{api_rest_base}/organizationalEntityFollowerStatistics"
190
- # f"?q=organizationalEntity"
191
- # f"&organizationalEntity={org_urn}"
192
- # f"&timeIntervals.timeGranularityType=MONTH"
193
- # f"&timeIntervals.timeRange.start={start_date}"
194
- # # LinkedIn defaults the end of the timeRange to the current time if not specified.
195
- # )
196
- # logging.info(f"Fetching monthly follower gains from URL: {url}")
197
-
198
- session.headers.update({'LinkedIn-Version': "202502"})
199
- # Replace with your LinkedIn organization URN
200
-
201
- now = datetime.now()
202
-
203
- # Subtract 12 months
204
- twelve_months_ago = now - timedelta(days=365)
205
- twelve_months_ago = twelve_months_ago.replace(day=1)
206
-
207
- start_date1 = int(twelve_months_ago.timestamp() * 1000)
208
-
209
- # Build the URL with time interval parameters
210
- url = (
211
- "https://api.linkedin.com/rest/organizationalEntityFollowerStatistics"
212
- f"?q=organizationalEntity"
213
- f"&organizationalEntity={org_urn}"
214
- f"&timeIntervals.timeGranularityType=MONTH"
215
- f"&timeIntervals.timeRange.start={start_date1}"
216
- #f"&timeIntervals.timeRange.end={end_ms}"
217
- )
218
-
219
- results = []
220
- try:
221
-
222
- response = session.get(url)
223
- response.raise_for_status() # Raises an HTTPError for bad responses (4XX or 5XX)
224
- data = response.json()
225
-
226
- elements = data.get('elements', [])
227
- if not elements:
228
- logging.info(f"No 'elements' found in API response for {org_urn} for start_ms {start_ms}.")
229
-
230
- for item in elements:
231
- time_range = item.get('timeRange', {})
232
- ts = time_range.get('start')
233
- if ts is None:
234
- logging.warning(f"Skipping item due to missing 'start' timestamp: {item}")
235
- continue
236
-
237
- # Convert timestamp (milliseconds) to YYYY-MM-DD date string in UTC
238
- date_obj = datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
239
- date_str = date_obj.strftime('%Y-%m-%d')
240
-
241
- gains = item.get('followerGains', {})
242
- # It's possible 'followerGains' itself is missing or None
243
- if gains is None:
244
- gains = {} # Ensure gains is a dict to prevent error on .get()
245
-
246
- results.append({
247
- 'category_name': date_str, # This is the start date of the month's data
248
- 'follower_count_organic': gains.get('organicFollowerGain', 0),
249
- 'follower_count_paid': gains.get('paidFollowerGain', 0),
250
- 'follower_count_type': 'follower_gains_monthly',
251
- 'organization_urn': org_urn
252
- })
253
- logging.info(f"Fetched {len(results)} monthly follower entries for {org_urn} starting from {start_of_period.strftime('%Y-%m-%d')}.")
254
-
255
- except requests.exceptions.HTTPError as http_err:
256
- # More specific error for HTTP errors
257
- code = getattr(http_err.response, 'status_code', 'N/A')
258
- text = getattr(http_err.response, 'text', str(http_err))
259
- logging.error(f"HTTP error fetching monthly gains for {org_urn}: {code} - {text}")
260
- logging.error(f"Request URL: {url}")
261
- except requests.exceptions.RequestException as e:
262
- # Catch other request-related errors (e.g., connection issues)
263
- code = getattr(e.response, 'status_code', 'N/A') if e.response is not None else 'N/A'
264
- text = getattr(e.response, 'text', str(e)) if e.response is not None else str(e)
265
- logging.error(f"Error fetching monthly gains for {org_urn}: {code} - {text}")
266
- logging.error(f"Request URL: {url}")
267
- except Exception as ex:
268
- # Catch any other unexpected errors (e.g., JSON parsing if response is not JSON)
269
- logging.error(f"An unexpected error occurred while fetching monthly gains for {org_urn}: {str(ex)}")
270
- logging.error(f"Request URL: {url}")
271
-
272
- return results
273
-
274
-
275
- def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map):
276
- """
277
- Fetches current follower demographics, applying Top-N for specified categories.
278
- """
279
- final_demographics_results = []
280
- # Parameters for the main demographics call
281
- params = {
282
- 'q': 'organizationalEntity',
283
- 'organizationalEntity': org_urn
284
- }
285
- url = f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
286
-
287
- logging.info(f"Fetching follower demographics from: {url} for org URN {org_urn} with params: {json.dumps(params)}")
288
-
289
- try:
290
- response = session.get(url, params=params)
291
- response.raise_for_status()
292
- data = response.json()
293
-
294
- elements = data.get("elements", [])
295
- if not elements:
296
- logging.warning(f"No elements found in follower demographics response for {org_urn}.")
297
- return []
298
-
299
- stat_element = elements[0]
300
-
301
- def _get_entries_for_type(raw_items_list, type_name, id_map, id_field_name_in_item, org_urn_val):
302
- current_type_entries = []
303
- if not raw_items_list:
304
- logging.debug(f"No raw items for demographic type '{type_name}' for org {org_urn_val}.")
305
- return current_type_entries
306
-
307
- for item in raw_items_list:
308
- # Default category name
309
- category_name_val = "Unknown"
310
-
311
- # For all types (since follower_association is removed), we parse URN and map to name
312
- urn_val = item.get(id_field_name_in_item)
313
- entity_id = _parse_urn_to_id(urn_val)
314
-
315
- # Use a more descriptive unknown if ID mapping fails
316
- unknown_label_suffix = type_name.split('_')[-1].capitalize() if '_' in type_name else type_name.capitalize()
317
- category_name_val = id_map.get(str(entity_id), f"Unknown {unknown_label_suffix} (ID: {entity_id if entity_id else urn_val})")
318
-
319
- counts = item.get("followerCounts", {})
320
- organic_count = counts.get("organicFollowerCount", 0)
321
- paid_count = counts.get("paidFollowerCount", 0)
322
-
323
- current_type_entries.append({
324
- "category_name": category_name_val,
325
- "follower_count_organic": organic_count,
326
- "follower_count_paid": paid_count,
327
- "follower_count_type": type_name,
328
- "organization_urn": org_urn_val
329
- })
330
- return current_type_entries
331
-
332
- industry_urns_to_map = [item.get("industry") for item in stat_element.get("followerCountsByIndustry", []) if item.get("industry")]
333
- geo_urns_to_map = [item.get("geo") for item in stat_element.get("followerCountsByGeoCountry", []) if item.get("geo")]
334
-
335
- live_industries_map = get_industries_map(session, industry_urns_to_map)
336
- live_geo_map = get_geo_map(session, geo_urns_to_map)
337
-
338
- demographic_configs = [
339
- {"items_key": "followerCountsBySeniority", "type_name": "follower_seniority", "id_map": seniorities_map, "id_field": "seniority", "top_n": 10},
340
- {"items_key": "followerCountsByFunction", "type_name": "follower_function", "id_map": functions_map, "id_field": "function", "top_n": 10},
341
- {"items_key": "followerCountsByIndustry", "type_name": "follower_industry", "id_map": live_industries_map, "id_field": "industry", "top_n": 10},
342
- {"items_key": "followerCountsByGeoCountry", "type_name": "follower_geo", "id_map": live_geo_map, "id_field": "geo", "top_n": 10}
343
- ]
344
-
345
- for config in demographic_configs:
346
- raw_items = stat_element.get(config["items_key"], [])
347
- processed_entries = _get_entries_for_type(raw_items, config["type_name"], config["id_map"], config["id_field"], org_urn)
348
-
349
- if config["top_n"] is not None and processed_entries:
350
- for entry in processed_entries:
351
- if not isinstance(entry.get("follower_count_organic"), (int, float)):
352
- entry["follower_count_organic"] = 0
353
- sorted_entries = sorted(processed_entries, key=lambda x: x.get("follower_count_organic", 0), reverse=True)
354
- final_demographics_results.extend(sorted_entries[:config["top_n"]])
355
- logging.debug(f"Added top {config['top_n']} for {config['type_name']}. Count: {len(sorted_entries[:config['top_n']])}")
356
- else:
357
- final_demographics_results.extend(processed_entries)
358
- logging.debug(f"Added all for {config['type_name']}. Count: {len(processed_entries)}")
359
-
360
- logging.info(f"Processed follower demographics for {org_urn}. Total entries from all types: {len(final_demographics_results)}")
361
-
362
- except requests.exceptions.RequestException as e:
363
- status_code = getattr(e.response, 'status_code', 'N/A')
364
- error_text = getattr(e.response, 'text', str(e))
365
- logging.error(f"Error fetching follower demographics for {org_urn} (Status: {status_code}): {error_text}")
366
- except json.JSONDecodeError as e:
367
- logging.error(f"Error decoding JSON for follower demographics for {org_urn}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
368
- except Exception as e:
369
- logging.error(f"Unexpected error fetching follower demographics for {org_urn}: {e}", exc_info=True)
370
- return final_demographics_results
371
-
372
- # --- Main Orchestration Function ---
373
-
374
- def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
375
- """
376
- Main function to fetch all follower statistics (monthly gains and demographics)
377
- and format them for Bubble.
378
- """
379
- if not all([comm_client_id, community_token, org_urn]):
380
- logging.error("Client ID, token, or Organization URN is missing for get_linkedin_follower_stats.")
381
- return []
382
-
383
- token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'}
384
-
385
- session = None
386
- try:
387
- session = create_session(comm_client_id, token=token_dict)
388
- session.headers.update({
389
- "X-Restli-Protocol-Version": "2.0.0",
390
- "LinkedIn-Version": LINKEDIN_API_VERSION,
391
- "Accept-Language": "en_US" # Explicitly set for v2 name lookups if not default in session
392
- })
393
- except Exception as e:
394
- logging.error(f"Failed to create session or update headers for org {org_urn}: {e}", exc_info=True)
395
- return []
396
-
397
- logging.info(f"Starting follower stats retrieval for org: {org_urn}")
398
-
399
- functions_map = get_functions_map(session)
400
- seniorities_map = get_seniorities_map(session)
401
-
402
- if not functions_map: logging.warning(f"Functions map is empty for org {org_urn}. Function names might not be resolved.")
403
- if not seniorities_map: logging.warning(f"Seniorities map is empty for org {org_urn}. Seniority names might not be resolved.")
404
-
405
- all_follower_data = []
406
-
407
- demographics = fetch_follower_demographics(session, org_urn, functions_map, seniorities_map)
408
- all_follower_data.extend(demographics)
409
-
410
- session = create_session(comm_client_id, token=token_dict) #try a new session with base header because follower gains is not working
411
- monthly_gains = fetch_monthly_follower_gains(session, org_urn, API_REST_BASE)
412
- all_follower_data.extend(monthly_gains)
413
-
414
- logging.info(f"Successfully compiled {len(all_follower_data)} total follower stat entries for {org_urn}.")
415
- return all_follower_data