GuglielmoTor commited on
Commit
b0d28b8
·
verified ·
1 Parent(s): 543fdff

Update linkedin_follower_stats.py

Browse files
Files changed (1) hide show
  1. linkedin_follower_stats.py +69 -58
linkedin_follower_stats.py CHANGED
@@ -95,21 +95,48 @@ def get_seniorities_map(session):
95
  return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
96
 
97
  def get_industries_map(session, industry_urns, version="DEFAULT"):
98
- """Fetches names for a list of industry URNs. Returns a map {id: name}."""
99
- if not industry_urns: return {}
100
- industry_ids = [_parse_urn_to_id(urn) for urn in industry_urns if urn]
101
- unique_ids = list(set(filter(None, industry_ids)))
102
- if not unique_ids: return {}
103
-
 
 
104
  url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
105
- # As per LinkedIn docs for BATCH_GET: ids={id1}&ids={id2}&locale.language=en&locale.country=US
106
  params = {
107
- 'ids': unique_ids, # requests library will format this as ids=id1&ids=id2...
108
- 'locale.language': 'en',
109
- 'locale.country': 'US'
 
110
  }
111
- logging.info(f"Fetching names for {len(unique_ids)} unique industry IDs using BATCH_GET.")
112
- return _fetch_linkedin_names(session, url, params, ["results"], ["name", "localized", "en_US"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
115
  def get_geo_map(session, geo_urns):
@@ -149,63 +176,47 @@ def _parse_urn_to_id(urn_string):
149
 
150
  def fetch_monthly_follower_gains(session, org_urn):
151
  """
152
- Fetches monthly follower gains for the last 12-13 months to ensure full coverage.
153
- Uses parameter names as confirmed by user's working script.
154
  """
155
- results = []
156
  now = datetime.now(timezone.utc)
157
- # Go back 13 months to ensure we capture at least 12 full previous months
158
- # and have a buffer, as LinkedIn might report based on full previous months.
159
- thirteen_months_ago = now - relativedelta(months=13)
160
- start_of_period = thirteen_months_ago.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
161
  start_ms = int(start_of_period.timestamp() * 1000)
162
-
163
- # Parameters as per user's working script and common LinkedIn patterns for time-bound stats
164
- params = {
165
- 'q': 'organizationalEntity',
166
- 'organizationalEntity': org_urn,
167
- 'timeIntervals.timeGranularityType': 'MONTH',
168
- 'timeIntervals.timeRange.start': start_ms
169
- }
170
- url = f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
171
-
172
- logging.info(f"Fetching monthly follower gains from: {url} with params: {json.dumps(params)}")
173
 
 
 
 
 
 
 
 
 
 
 
 
174
  try:
175
- response = session.get(url, params=params)
176
  response.raise_for_status()
177
  data = response.json()
178
-
179
- for item in data.get("elements", []):
180
- time_range = item.get("timeRange", {})
181
- start_timestamp_ms = time_range.get("start")
182
- if start_timestamp_ms is None:
183
- logging.warning("Skipping item due to missing start timestamp in monthly gains.")
184
  continue
185
-
186
- date_obj = datetime.fromtimestamp(start_timestamp_ms / 1000, tz=timezone.utc)
187
- date_str = date_obj.strftime('%Y-%m-%d') # First day of the month
188
-
189
- follower_gains = item.get("followerGains", {})
190
- organic_gain = follower_gains.get("organicFollowerGain", 0)
191
- paid_gain = follower_gains.get("paidFollowerGain", 0)
192
-
193
  results.append({
194
- "category_name": date_str,
195
- "follower_count_organic": organic_gain,
196
- "follower_count_paid": paid_gain,
197
- "follower_count_type": "follower_gains_monthly",
198
- "organization_urn": org_urn
199
  })
200
- logging.info(f"Fetched {len(results)} monthly follower gain entries for org URN {org_urn}.")
201
  except requests.exceptions.RequestException as e:
202
- status_code = getattr(e.response, 'status_code', 'N/A')
203
- error_text = getattr(e.response, 'text', str(e))
204
- logging.error(f"Error fetching monthly follower gains for {org_urn} (Status: {status_code}): {error_text}")
205
- except json.JSONDecodeError as e:
206
- logging.error(f"Error decoding JSON for monthly follower gains for {org_urn}: {e}. Response: {response.text if 'response' in locals() else 'N/A'}")
207
- except Exception as e:
208
- logging.error(f"Unexpected error fetching monthly follower gains for {org_urn}: {e}", exc_info=True)
209
  return results
210
 
211
 
 
95
  return _fetch_linkedin_names(session, url, params, ["elements"], ["name", "localized", "en_US"], "id")
96
 
97
  def get_industries_map(session, industry_urns, version="DEFAULT"):
98
+ """Fetches names for a list of industry URNs by pulling ALL industries and filtering locally."""
99
+ # parse and dedupe IDs
100
+ industry_ids = [_parse_urn_to_id(urn) for urn in industry_urns or []]
101
+ unique_ids = set(filter(None, industry_ids))
102
+ if not unique_ids:
103
+ return {}
104
+
105
+ # we'll page through the full list; LinkedIn defaults to 10, so bump count
106
  url = f"{API_V2_BASE}/industryTaxonomyVersions/{version}/industries"
 
107
  params = {
108
+ # use the single 'locale' param like the GET_ALL example
109
+ 'locale': '(language:en,country:US)',
110
+ 'start': 0,
111
+ 'count': 500 # should exceed total # of industries
112
  }
113
+
114
+ logging.info(f"Fetching all industries (to filter {len(unique_ids)} IDs) from {url}")
115
+ try:
116
+ response = session.get(url, params=params)
117
+ response.raise_for_status()
118
+ data = response.json()
119
+ elements = data.get('elements', [])
120
+
121
+ mapping = {}
122
+ for el in elements:
123
+ el_id = el.get('id')
124
+ if el_id and str(el_id) in unique_ids:
125
+ # drill into name.localized.en_US
126
+ name = el.get('name', {}) \
127
+ .get('localized', {}) \
128
+ .get('en_US')
129
+ if name:
130
+ mapping[str(el_id)] = name
131
+ else:
132
+ logging.warning(f"Industry {el_id} has no en_US name field")
133
+ return mapping
134
+
135
+ except requests.exceptions.RequestException as e:
136
+ status_code = getattr(e.response, 'status_code', 'N/A')
137
+ logging.error(f"Error fetching all industries: {status_code} – {getattr(e.response, 'text', str(e))}")
138
+ return {}
139
+
140
 
141
 
142
  def get_geo_map(session, geo_urns):
 
176
 
177
  def fetch_monthly_follower_gains(session, org_urn):
178
  """
179
+ Fetches monthly follower gains using URL-concatenated timeInterval param,
180
+ matching the old working approach.
181
  """
 
182
  now = datetime.now(timezone.utc)
183
+ thirteen_months_ago = now - relativedelta(months=13)
184
+ start_of_period = thirteen_months_ago.replace(day=1, tzinfo=timezone.utc)
 
 
185
  start_ms = int(start_of_period.timestamp() * 1000)
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ # Build URL with explicit query string
188
+ url = (
189
+ f"{API_REST_BASE}/organizationalEntityFollowerStatistics"
190
+ f"?q=organizationalEntity"
191
+ f"&organizationalEntity={org_urn}"
192
+ f"&timeIntervals.timeGranularityType=MONTH"
193
+ f"&timeIntervals.timeRange.start={start_ms}"
194
+ )
195
+ logging.info(f"Fetching monthly follower gains from URL: {url}")
196
+
197
+ results = []
198
  try:
199
+ response = session.get(url)
200
  response.raise_for_status()
201
  data = response.json()
202
+ for item in data.get('elements', []):
203
+ ts = item.get('timeRange', {}).get('start')
204
+ if ts is None:
 
 
 
205
  continue
206
+ date_str = datetime.fromtimestamp(ts/1000, tz=timezone.utc).strftime('%Y-%m-%d')
207
+ gains = item.get('followerGains', {})
 
 
 
 
 
 
208
  results.append({
209
+ 'category_name': date_str,
210
+ 'follower_count_organic': gains.get('organicFollowerGain', 0),
211
+ 'follower_count_paid': gains.get('paidFollowerGain', 0),
212
+ 'follower_count_type': 'follower_gains_monthly',
213
+ 'organization_urn': org_urn
214
  })
215
+ logging.info(f"Fetched {len(results)} monthly follower entries for {org_urn}")
216
  except requests.exceptions.RequestException as e:
217
+ code = getattr(e.response, 'status_code', 'N/A')
218
+ text = getattr(e.response, 'text', str(e))
219
+ logging.error(f"Error fetching monthly gains: {code} - {text}")
 
 
 
 
220
  return results
221
 
222