GuglielmoTor commited on
Commit
c737aea
·
verified ·
1 Parent(s): 7cb2559

Update linkedin_follower_stats.py

Browse files
Files changed (1) hide show
  1. linkedin_follower_stats.py +51 -15
linkedin_follower_stats.py CHANGED
@@ -172,49 +172,85 @@ def _parse_urn_to_id(urn_string):
172
 
173
  # --- Follower Data Fetching Functions ---
174
 
175
- def fetch_monthly_follower_gains(session, org_urn):
176
  """
177
- Fetches monthly follower gains using URL-concatenated timeInterval param,
178
- matching the old working approach.
179
  """
180
  now = datetime.now(timezone.utc)
181
- thirteen_months_ago = now - relativedelta(months=13)
182
- start_of_period = thirteen_months_ago.replace(day=1, tzinfo=timezone.utc)
 
 
 
 
 
 
183
  start_ms = int(start_of_period.timestamp() * 1000)
184
 
185
  # Build URL with explicit query string
186
  url = (
187
- f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
188
  f"?q=organizationalEntity"
189
  f"&organizationalEntity={org_urn}"
190
  f"&timeIntervals.timeGranularityType=MONTH"
191
  f"&timeIntervals.timeRange.start={start_ms}"
 
192
  )
193
  logging.info(f"Fetching monthly follower gains from URL: {url}")
194
 
195
  results = []
196
  try:
197
  response = session.get(url)
198
- response.raise_for_status()
199
  data = response.json()
200
- for item in data.get('elements', []):
201
- ts = item.get('timeRange', {}).get('start')
 
 
 
 
 
 
202
  if ts is None:
 
203
  continue
204
- date_str = datetime.fromtimestamp(ts/1000, tz=timezone.utc).strftime('%Y-%m-%d')
 
 
 
 
205
  gains = item.get('followerGains', {})
 
 
 
 
206
  results.append({
207
- 'category_name': date_str,
208
  'follower_count_organic': gains.get('organicFollowerGain', 0),
209
  'follower_count_paid': gains.get('paidFollowerGain', 0),
210
  'follower_count_type': 'follower_gains_monthly',
211
  'organization_urn': org_urn
212
  })
213
- logging.info(f"Fetched {len(results)} monthly follower entries for {org_urn}")
 
 
 
 
 
 
 
214
  except requests.exceptions.RequestException as e:
215
- code = getattr(e.response, 'status_code', 'N/A')
216
- text = getattr(e.response, 'text', str(e))
217
- logging.error(f"Error fetching monthly gains: {code} - {text}")
 
 
 
 
 
 
 
218
  return results
219
 
220
 
 
172
 
173
  # --- Follower Data Fetching Functions ---
174
 
175
+ def fetch_monthly_follower_gains(session, org_urn, api_rest_base):
176
  """
177
+ Fetches monthly follower gains for the last 12 full months.
178
+ The start date is set to the first day of the month, 12 months prior to the current month, at midnight UTC.
179
  """
180
  now = datetime.now(timezone.utc)
181
+
182
+ # Calculate the first day of the month, 12 months ago.
183
+ # For example, if now is 2025-05-13:
184
+ # - twelve_months_ago_date becomes 2024-05-13.
185
+ # - start_of_period becomes 2024-05-01 00:00:00 UTC.
186
+ twelve_months_ago_date = now - relativedelta(months=12)
187
+ start_of_period = twelve_months_ago_date.replace(day=1, hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
188
+
189
  start_ms = int(start_of_period.timestamp() * 1000)
190
 
191
  # Build URL with explicit query string
192
  url = (
193
+ f"{api_rest_base}/organizationalEntityFollowerStatistics"
194
  f"?q=organizationalEntity"
195
  f"&organizationalEntity={org_urn}"
196
  f"&timeIntervals.timeGranularityType=MONTH"
197
  f"&timeIntervals.timeRange.start={start_ms}"
198
+ # LinkedIn defaults the end of the timeRange to the current time if not specified.
199
  )
200
  logging.info(f"Fetching monthly follower gains from URL: {url}")
201
 
202
  results = []
203
  try:
204
  response = session.get(url)
205
+ response.raise_for_status() # Raises an HTTPError for bad responses (4XX or 5XX)
206
  data = response.json()
207
+
208
+ elements = data.get('elements', [])
209
+ if not elements:
210
+ logging.info(f"No 'elements' found in API response for {org_urn} for start_ms {start_ms}.")
211
+
212
+ for item in elements:
213
+ time_range = item.get('timeRange', {})
214
+ ts = time_range.get('start')
215
  if ts is None:
216
+ logging.warning(f"Skipping item due to missing 'start' timestamp: {item}")
217
  continue
218
+
219
+ # Convert timestamp (milliseconds) to YYYY-MM-DD date string in UTC
220
+ date_obj = datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
221
+ date_str = date_obj.strftime('%Y-%m-%d')
222
+
223
  gains = item.get('followerGains', {})
224
+ # It's possible 'followerGains' itself is missing or None
225
+ if gains is None:
226
+ gains = {} # Ensure gains is a dict to prevent error on .get()
227
+
228
  results.append({
229
+ 'category_name': date_str, # This is the start date of the month's data
230
  'follower_count_organic': gains.get('organicFollowerGain', 0),
231
  'follower_count_paid': gains.get('paidFollowerGain', 0),
232
  'follower_count_type': 'follower_gains_monthly',
233
  'organization_urn': org_urn
234
  })
235
+ logging.info(f"Fetched {len(results)} monthly follower entries for {org_urn} starting from {start_of_period.strftime('%Y-%m-%d')}.")
236
+
237
+ except requests.exceptions.HTTPError as http_err:
238
+ # More specific error for HTTP errors
239
+ code = getattr(http_err.response, 'status_code', 'N/A')
240
+ text = getattr(http_err.response, 'text', str(http_err))
241
+ logging.error(f"HTTP error fetching monthly gains for {org_urn}: {code} - {text}")
242
+ logging.error(f"Request URL: {url}")
243
  except requests.exceptions.RequestException as e:
244
+ # Catch other request-related errors (e.g., connection issues)
245
+ code = getattr(e.response, 'status_code', 'N/A') if e.response is not None else 'N/A'
246
+ text = getattr(e.response, 'text', str(e)) if e.response is not None else str(e)
247
+ logging.error(f"Error fetching monthly gains for {org_urn}: {code} - {text}")
248
+ logging.error(f"Request URL: {url}")
249
+ except Exception as ex:
250
+ # Catch any other unexpected errors (e.g., JSON parsing if response is not JSON)
251
+ logging.error(f"An unexpected error occurred while fetching monthly gains for {org_urn}: {str(ex)}")
252
+ logging.error(f"Request URL: {url}")
253
+
254
  return results
255
 
256