Spaces:
Running
Running
Update linkedin_follower_stats.py
Browse files- linkedin_follower_stats.py +52 -45
linkedin_follower_stats.py
CHANGED
@@ -27,20 +27,20 @@ def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path,
|
|
27 |
"""
|
28 |
mapping = {}
|
29 |
try:
|
30 |
-
logging.debug(f"Fetching names from URL: {url} with params: {params}")
|
31 |
response = session.get(url, params=params)
|
32 |
response.raise_for_status()
|
33 |
data = response.json()
|
34 |
|
35 |
items = data
|
36 |
-
for key in result_key_path:
|
37 |
if isinstance(items, dict):
|
38 |
items = items.get(key, [])
|
39 |
else:
|
40 |
logging.warning(f"Expected dict to get key '{key}' but got {type(items)} at path {result_key_path} for URL {url}. Check result_key_path.")
|
41 |
return mapping
|
42 |
|
43 |
-
if isinstance(items, dict):
|
44 |
for item_id_str, item_data in items.items():
|
45 |
name = item_data
|
46 |
for key_nav in name_key_path:
|
@@ -53,7 +53,7 @@ def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path,
|
|
53 |
mapping[item_id_str] = name
|
54 |
else:
|
55 |
logging.warning(f"No name found for ID {item_id_str} at path {name_key_path} in item: {item_data} from URL {url}")
|
56 |
-
elif isinstance(items, list):
|
57 |
for item in items:
|
58 |
item_id_val = item.get(id_key)
|
59 |
name = item
|
@@ -72,7 +72,7 @@ def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path,
|
|
72 |
|
73 |
except requests.exceptions.RequestException as e:
|
74 |
status_code = getattr(e.response, 'status_code', 'N/A')
|
75 |
-
error_text = getattr(e.response, 'text', str(e))
|
76 |
logging.error(f"Error fetching names from {url} (Status: {status_code}): {error_text}")
|
77 |
except json.JSONDecodeError as e:
|
78 |
logging.error(f"Error decoding JSON for names from {url}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
|
@@ -83,16 +83,14 @@ def _fetch_linkedin_names(session, url, params, result_key_path, name_key_path,
|
|
83 |
def get_functions_map(session):
|
84 |
"""Fetches all LinkedIn functions and returns a map of {id: name}."""
|
85 |
url = f"{API_V2_BASE}/functions"
|
86 |
-
#
|
87 |
-
params = {}
|
88 |
logging.info("Fetching all LinkedIn functions.")
|
89 |
return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
|
90 |
|
91 |
def get_seniorities_map(session):
|
92 |
"""Fetches all LinkedIn seniorities and returns a map of {id: name}."""
|
93 |
url = f"{API_V2_BASE}/seniorities"
|
94 |
-
#
|
95 |
-
params = {}
|
96 |
logging.info("Fetching all LinkedIn seniorities.")
|
97 |
return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
|
98 |
|
@@ -104,8 +102,13 @@ def get_industries_map(session, industry_urns, version="DEFAULT"):
|
|
104 |
if not unique_ids: return {}
|
105 |
|
106 |
url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
109 |
return _fetch_linkedin_names(session, url, params, ["results"], ["name", "localized", "en_US"])
|
110 |
|
111 |
|
@@ -116,9 +119,14 @@ def get_geo_map(session, geo_urns):
|
|
116 |
unique_ids = list(set(filter(None, geo_ids)))
|
117 |
if not unique_ids: return {}
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
122 |
logging.info(f"Fetching names for {len(unique_ids)} unique geo IDs using URL: {url}")
|
123 |
return _fetch_linkedin_names(session, url, {}, ["results"], ["defaultLocalizedName", "value"])
|
124 |
|
@@ -142,24 +150,29 @@ def _parse_urn_to_id(urn_string):
|
|
142 |
def fetch_monthly_follower_gains(session, org_urn):
|
143 |
"""
|
144 |
Fetches monthly follower gains for the last 12-13 months to ensure full coverage.
|
|
|
145 |
"""
|
146 |
results = []
|
147 |
now = datetime.now(timezone.utc)
|
|
|
|
|
148 |
thirteen_months_ago = now - relativedelta(months=13)
|
149 |
start_of_period = thirteen_months_ago.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
150 |
start_ms = int(start_of_period.timestamp() * 1000)
|
151 |
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
160 |
|
161 |
try:
|
162 |
-
response = session.get(url)
|
163 |
response.raise_for_status()
|
164 |
data = response.json()
|
165 |
|
@@ -171,7 +184,7 @@ def fetch_monthly_follower_gains(session, org_urn):
|
|
171 |
continue
|
172 |
|
173 |
date_obj = datetime.fromtimestamp(start_timestamp_ms / 1000, tz=timezone.utc)
|
174 |
-
date_str = date_obj.strftime('%Y-%m-%d')
|
175 |
|
176 |
follower_gains = item.get("followerGains", {})
|
177 |
organic_gain = follower_gains.get("organicFollowerGain", 0)
|
@@ -201,14 +214,17 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
|
|
201 |
Fetches current follower demographics, applying Top-N for specified categories.
|
202 |
"""
|
203 |
final_demographics_results = []
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
209 |
|
210 |
try:
|
211 |
-
response = session.get(url)
|
212 |
response.raise_for_status()
|
213 |
data = response.json()
|
214 |
|
@@ -217,9 +233,8 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
|
|
217 |
logging.warning(f"No elements found in follower demographics response for {org_urn}.")
|
218 |
return []
|
219 |
|
220 |
-
stat_element = elements[0]
|
221 |
|
222 |
-
# Helper to convert raw API items for a single demographic type into our structured list
|
223 |
def _get_entries_for_type(raw_items_list, type_name, id_map, id_field_name_in_item, org_urn_val):
|
224 |
current_type_entries = []
|
225 |
if not raw_items_list:
|
@@ -228,13 +243,11 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
|
|
228 |
|
229 |
for item in raw_items_list:
|
230 |
category_name_val = "Unknown"
|
231 |
-
# For associationType, the id_field_name_in_item is the direct name
|
232 |
if type_name == "follower_association":
|
233 |
category_name_val = item.get(id_field_name_in_item, f"Unknown {id_field_name_in_item}")
|
234 |
-
else:
|
235 |
urn_val = item.get(id_field_name_in_item)
|
236 |
entity_id = _parse_urn_to_id(urn_val)
|
237 |
-
# Use str(entity_id) for map lookup as map keys were stored as strings
|
238 |
category_name_val = id_map.get(str(entity_id), f"Unknown {type_name.split('_')[-1].capitalize()} (ID: {entity_id if entity_id else urn_val})")
|
239 |
|
240 |
counts = item.get("followerCounts", {})
|
@@ -250,7 +263,6 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
|
|
250 |
})
|
251 |
return current_type_entries
|
252 |
|
253 |
-
# Fetch live maps for industries and geo as they depend on URNs from the current API response
|
254 |
industry_urns_to_map = [item.get("industry") for item in stat_element.get("followerCountsByIndustry", []) if item.get("industry")]
|
255 |
geo_urns_to_map = [item.get("geo") for item in stat_element.get("followerCountsByGeoCountry", []) if item.get("geo")]
|
256 |
|
@@ -262,7 +274,7 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
|
|
262 |
{"items_key": "followerCountsByFunction", "type_name": "follower_function", "id_map": functions_map, "id_field": "function", "top_n": 10},
|
263 |
{"items_key": "followerCountsByIndustry", "type_name": "follower_industry", "id_map": live_industries_map, "id_field": "industry", "top_n": 10},
|
264 |
{"items_key": "followerCountsByGeoCountry", "type_name": "follower_geo", "id_map": live_geo_map, "id_field": "geo", "top_n": 10},
|
265 |
-
{"items_key": "followerCountsByAssociationType", "type_name": "follower_association", "id_map": {}, "id_field": "associationType", "top_n": None}
|
266 |
]
|
267 |
|
268 |
for config in demographic_configs:
|
@@ -270,15 +282,14 @@ def fetch_follower_demographics(session, org_urn, functions_map, seniorities_map
|
|
270 |
processed_entries = _get_entries_for_type(raw_items, config["type_name"], config["id_map"], config["id_field"], org_urn)
|
271 |
|
272 |
if config["top_n"] is not None and processed_entries:
|
273 |
-
|
274 |
-
for entry in processed_entries: # Ensure numeric for sorting
|
275 |
if not isinstance(entry.get("follower_count_organic"), (int, float)):
|
276 |
entry["follower_count_organic"] = 0
|
277 |
sorted_entries = sorted(processed_entries, key=lambda x: x.get("follower_count_organic", 0), reverse=True)
|
278 |
final_demographics_results.extend(sorted_entries[:config["top_n"]])
|
279 |
logging.debug(f"Added top {config['top_n']} for {config['type_name']}. Count: {len(sorted_entries[:config['top_n']])}")
|
280 |
else:
|
281 |
-
final_demographics_results.extend(processed_entries)
|
282 |
logging.debug(f"Added all for {config['type_name']}. Count: {len(processed_entries)}")
|
283 |
|
284 |
logging.info(f"Processed follower demographics for {org_urn}. Total entries from all types: {len(final_demographics_results)}")
|
@@ -312,7 +323,7 @@ def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
|
|
312 |
session.headers.update({
|
313 |
"X-Restli-Protocol-Version": "2.0.0",
|
314 |
"LinkedIn-Version": LINKEDIN_API_VERSION,
|
315 |
-
|
316 |
})
|
317 |
except Exception as e:
|
318 |
logging.error(f"Failed to create session or update headers for org {org_urn}: {e}", exc_info=True)
|
@@ -320,9 +331,6 @@ def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
|
|
320 |
|
321 |
logging.info(f"Starting follower stats retrieval for org: {org_urn}")
|
322 |
|
323 |
-
# These maps are fetched once per call to get_linkedin_follower_stats
|
324 |
-
# For industries and geo, the maps will be fetched live within fetch_follower_demographics
|
325 |
-
# as they depend on URNs from the API response itself.
|
326 |
functions_map = get_functions_map(session)
|
327 |
seniorities_map = get_seniorities_map(session)
|
328 |
|
@@ -334,7 +342,6 @@ def get_linkedin_follower_stats(comm_client_id, community_token, org_urn):
|
|
334 |
monthly_gains = fetch_monthly_follower_gains(session, org_urn)
|
335 |
all_follower_data.extend(monthly_gains)
|
336 |
|
337 |
-
# Pass pre-fetched function and seniority maps. Industry and Geo maps are fetched inside.
|
338 |
demographics = fetch_follower_demographics(session, org_urn, functions_map, seniorities_map)
|
339 |
all_follower_data.extend(demographics)
|
340 |
|
|
|
27 |
"""
|
28 |
mapping = {}
|
29 |
try:
|
30 |
+
logging.debug(f"Fetching names from URL: {url} with params: {json.dumps(params)}") # Log params for clarity
|
31 |
response = session.get(url, params=params)
|
32 |
response.raise_for_status()
|
33 |
data = response.json()
|
34 |
|
35 |
items = data
|
36 |
+
for key in result_key_path:
|
37 |
if isinstance(items, dict):
|
38 |
items = items.get(key, [])
|
39 |
else:
|
40 |
logging.warning(f"Expected dict to get key '{key}' but got {type(items)} at path {result_key_path} for URL {url}. Check result_key_path.")
|
41 |
return mapping
|
42 |
|
43 |
+
if isinstance(items, dict):
|
44 |
for item_id_str, item_data in items.items():
|
45 |
name = item_data
|
46 |
for key_nav in name_key_path:
|
|
|
53 |
mapping[item_id_str] = name
|
54 |
else:
|
55 |
logging.warning(f"No name found for ID {item_id_str} at path {name_key_path} in item: {item_data} from URL {url}")
|
56 |
+
elif isinstance(items, list):
|
57 |
for item in items:
|
58 |
item_id_val = item.get(id_key)
|
59 |
name = item
|
|
|
72 |
|
73 |
except requests.exceptions.RequestException as e:
|
74 |
status_code = getattr(e.response, 'status_code', 'N/A')
|
75 |
+
error_text = getattr(e.response, 'text', str(e)) # Log the raw error text
|
76 |
logging.error(f"Error fetching names from {url} (Status: {status_code}): {error_text}")
|
77 |
except json.JSONDecodeError as e:
|
78 |
logging.error(f"Error decoding JSON for names from {url}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
|
|
|
83 |
def get_functions_map(session):
|
84 |
"""Fetches all LinkedIn functions and returns a map of {id: name}."""
|
85 |
url = f"{API_V2_BASE}/functions"
|
86 |
+
params = {} # Relies on Accept-Language header from session
|
|
|
87 |
logging.info("Fetching all LinkedIn functions.")
|
88 |
return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
|
89 |
|
90 |
def get_seniorities_map(session):
|
91 |
"""Fetches all LinkedIn seniorities and returns a map of {id: name}."""
|
92 |
url = f"{API_V2_BASE}/seniorities"
|
93 |
+
params = {} # Relies on Accept-Language header from session
|
|
|
94 |
logging.info("Fetching all LinkedIn seniorities.")
|
95 |
return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
|
96 |
|
|
|
102 |
if not unique_ids: return {}
|
103 |
|
104 |
url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
|
105 |
+
# As per LinkedIn docs for BATCH_GET: ids={id1}&ids={id2}&locale.language=en&locale.country=US
|
106 |
+
params = {
|
107 |
+
'ids': unique_ids, # requests library will format this as ids=id1&ids=id2...
|
108 |
+
'locale.language': 'en',
|
109 |
+
'locale.country': 'US'
|
110 |
+
}
|
111 |
+
logging.info(f"Fetching names for {len(unique_ids)} unique industry IDs using BATCH_GET.")
|
112 |
return _fetch_linkedin_names(session, url, params, ["results"], ["name", "localized", "en_US"])
|
113 |
|
114 |
|
|
|
119 |
unique_ids = list(set(filter(None, geo_ids)))
|
120 |
if not unique_ids: return {}
|
121 |
|
122 |
+
# As per LinkedIn docs for BATCH_GET: ids=List(12345,23456)&locale=(language:en,country:US)
|
123 |
+
ids_param_string = "List(" + ",".join(map(str, unique_ids)) + ")"
|
124 |
+
locale_param_string = "(language:en,country:US)" # Must be exactly this string format
|
125 |
+
|
126 |
+
# Parameters must be passed in the URL string directly for this specific API format
|
127 |
+
# The `params` dict for session.get() will be empty.
|
128 |
+
url = f"{API_V2_BASE}/geo?ids={quote(ids_param_string)}&locale={quote(locale_param_string)}"
|
129 |
+
|
130 |
logging.info(f"Fetching names for {len(unique_ids)} unique geo IDs using URL: {url}")
|
131 |
return _fetch_linkedin_names(session, url, {}, ["results"], ["defaultLocalizedName", "value"])
|
132 |
|
|
|
150 |
def fetch_monthly_follower_gains(session, org_urn):
|
151 |
"""
|
152 |
Fetches monthly follower gains for the last 12-13 months to ensure full coverage.
|
153 |
+
Uses parameter names as confirmed by user's working script.
|
154 |
"""
|
155 |
results = []
|
156 |
now = datetime.now(timezone.utc)
|
157 |
+
# Go back 13 months to ensure we capture at least 12 full previous months
|
158 |
+
# and have a buffer, as LinkedIn might report based on full previous months.
|
159 |
thirteen_months_ago = now - relativedelta(months=13)
|
160 |
start_of_period = thirteen_months_ago.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
161 |
start_ms = int(start_of_period.timestamp() * 1000)
|
162 |
|
163 |
+
# Parameters as per user's working script and common LinkedIn patterns for time-bound stats
|
164 |
+
params = {
|
165 |
+
'q': 'organizationalEntity',
|
166 |
+
'organizationalEntity': org_urn,
|
167 |
+
'timeIntervals.timeGranularityType': 'MONTH',
|
168 |
+
'timeIntervals.timeRange.start': start_ms
|
169 |
+
}
|
170 |
+
url = f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
|
171 |
+
|
172 |
+
logging.info(f"Fetching monthly follower gains from: {url} with params: {json.dumps(params)}")
|
173 |
|
174 |
try:
|
175 |
+
response = session.get(url, params=params)
|
176 |
response.raise_for_status()
|
177 |
data = response.json()
|
178 |
|
|
|
184 |
continue
|
185 |
|
186 |
date_obj = datetime.fromtimestamp(start_timestamp_ms / 1000, tz=timezone.utc)
|
187 |
+
date_str = date_obj.strftime('%Y-%m-%d') # First day of the month
|
188 |
|
189 |
follower_gains = item.get("followerGains", {})
|
190 |
organic_gain = follower_gains.get("organicFollowerGain", 0)
|
|
|
214 |
Fetches current follower demographics, applying Top-N for specified categories.
|
215 |
"""
|
216 |
final_demographics_results = []
|
217 |
+
# Parameters for the main demographics call
|
218 |
+
params = {
|
219 |
+
'q': 'organizationalEntity',
|
220 |
+
'organizationalEntity': org_urn
|
221 |
+
}
|
222 |
+
url = f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
|
223 |
+
|
224 |
+
logging.info(f"Fetching follower demographics from: {url} for org URN {org_urn} with params: {json.dumps(params)}")
|
225 |
|
226 |
try:
|
227 |
+
response = session.get(url, params=params)
|
228 |
response.raise_for_status()
|
229 |
data = response.json()
|
230 |
|
|
|
233 |
logging.warning(f"No elements found in follower demographics response for {org_urn}.")
|
234 |
return []
|
235 |
|
236 |
+
stat_element = elements[0]
|
237 |
|
|
|
238 |
def _get_entries_for_type(raw_items_list, type_name, id_map, id_field_name_in_item, org_urn_val):
|
239 |
current_type_entries = []
|
240 |
if not raw_items_list:
|
|
|
243 |
|
244 |
for item in raw_items_list:
|
245 |
category_name_val = "Unknown"
|
|
|
246 |
if type_name == "follower_association":
|
247 |
category_name_val = item.get(id_field_name_in_item, f"Unknown {id_field_name_in_item}")
|
248 |
+
else:
|
249 |
urn_val = item.get(id_field_name_in_item)
|
250 |
entity_id = _parse_urn_to_id(urn_val)
|
|
|
251 |
category_name_val = id_map.get(str(entity_id), f"Unknown {type_name.split('_')[-1].capitalize()} (ID: {entity_id if entity_id else urn_val})")
|
252 |
|
253 |
counts = item.get("followerCounts", {})
|
|
|
263 |
})
|
264 |
return current_type_entries
|
265 |
|
|
|
266 |
industry_urns_to_map = [item.get("industry") for item in stat_element.get("followerCountsByIndustry", []) if item.get("industry")]
|
267 |
geo_urns_to_map = [item.get("geo") for item in stat_element.get("followerCountsByGeoCountry", []) if item.get("geo")]
|
268 |
|
|
|
274 |
{"items_key": "followerCountsByFunction", "type_name": "follower_function", "id_map": functions_map, "id_field": "function", "top_n": 10},
|
275 |
{"items_key": "followerCountsByIndustry", "type_name": "follower_industry", "id_map": live_industries_map, "id_field": "industry", "top_n": 10},
|
276 |
{"items_key": "followerCountsByGeoCountry", "type_name": "follower_geo", "id_map": live_geo_map, "id_field": "geo", "top_n": 10},
|
277 |
+
{"items_key": "followerCountsByAssociationType", "type_name": "follower_association", "id_map": {}, "id_field": "associationType", "top_n": None}
|
278 |
]
|
279 |
|
280 |
for config in demographic_configs:
|
|
|
282 |
processed_entries = _get_entries_for_type(raw_items, config["type_name"], config["id_map"], config["id_field"], org_urn)
|
283 |
|
284 |
if config["top_n"] is not None and processed_entries:
|
285 |
+
for entry in processed_entries:
|
|
|
286 |
if not isinstance(entry.get("follower_count_organic"), (int, float)):
|
287 |
entry["follower_count_organic"] = 0
|
288 |
sorted_entries = sorted(processed_entries, key=lambda x: x.get("follower_count_organic", 0), reverse=True)
|
289 |
final_demographics_results.extend(sorted_entries[:config["top_n"]])
|
290 |
logging.debug(f"Added top {config['top_n']} for {config['type_name']}. Count: {len(sorted_entries[:config['top_n']])}")
|
291 |
else:
|
292 |
+
final_demographics_results.extend(processed_entries)
|
293 |
logging.debug(f"Added all for {config['type_name']}. Count: {len(processed_entries)}")
|
294 |
|
295 |
logging.info(f"Processed follower demographics for {org_urn}. Total entries from all types: {len(final_demographics_results)}")
|
|
|
323 |
session.headers.update({
|
324 |
"X-Restli-Protocol-Version": "2.0.0",
|
325 |
"LinkedIn-Version": LINKEDIN_API_VERSION,
|
326 |
+
"Accept-Language": "en_US" # Explicitly set for v2 name lookups if not default in session
|
327 |
})
|
328 |
except Exception as e:
|
329 |
logging.error(f"Failed to create session or update headers for org {org_urn}: {e}", exc_info=True)
|
|
|
331 |
|
332 |
logging.info(f"Starting follower stats retrieval for org: {org_urn}")
|
333 |
|
|
|
|
|
|
|
334 |
functions_map = get_functions_map(session)
|
335 |
seniorities_map = get_seniorities_map(session)
|
336 |
|
|
|
342 |
monthly_gains = fetch_monthly_follower_gains(session, org_urn)
|
343 |
all_follower_data.extend(monthly_gains)
|
344 |
|
|
|
345 |
demographics = fetch_follower_demographics(session, org_urn, functions_map, seniorities_map)
|
346 |
all_follower_data.extend(demographics)
|
347 |
|