GuglielmoTor commited on
Commit
f023388
Β·
verified Β·
1 Parent(s): 3431b44

Update Linkedin_Data_API_Calls.py

Browse files
Files changed (1) hide show
  1. Linkedin_Data_API_Calls.py +283 -115
Linkedin_Data_API_Calls.py CHANGED
@@ -4,180 +4,348 @@ import html
4
  from datetime import datetime
5
  from collections import defaultdict
6
  from transformers import pipeline
 
7
  from sessions import create_session
8
  from error_handling import display_error
9
  from posts_categorization import batch_summarize_and_classify
10
  import logging
11
 
12
- logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 
13
 
14
  API_V2_BASE = 'https://api.linkedin.com/v2'
15
- API_REST_BASE = "https://api.linkedin.com/rest"
16
 
 
17
  sentiment_pipeline = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def fetch_comments(comm_client_id, token_dict, post_urns, stats_map):
21
- from requests_oauthlib import OAuth2Session
22
- linkedin = OAuth2Session(comm_client_id, token=token_dict)
23
- linkedin.headers.update({'LinkedIn-Version': "202502"})
24
- all_comments = {}
 
 
 
 
 
 
 
 
 
 
 
25
  for post_urn in post_urns:
 
26
  if stats_map.get(post_urn, {}).get('commentCount', 0) == 0:
 
 
27
  continue
 
28
  try:
 
 
 
29
  url = f"{API_REST_BASE}/socialActions/{post_urn}/comments"
30
- response = linkedin.get(url)
 
 
31
  if response.status_code == 200:
32
  elements = response.json().get('elements', [])
33
- all_comments[post_urn] = [c.get('message', {}).get('text') for c in elements if c.get('message')]
 
 
 
 
 
 
 
 
 
 
 
 
34
  else:
35
- all_comments[post_urn] = []
36
- except Exception:
37
- all_comments[post_urn] = []
38
- return all_comments
39
-
40
- def analyze_sentiment(comments_data):
41
- results = {}
42
- for post_urn, comments in comments_data.items():
 
 
 
 
 
 
 
 
 
 
 
 
43
  sentiment_counts = defaultdict(int)
44
- total = 0
45
- for comment in comments:
46
- if not comment:
 
 
 
 
 
47
  continue
48
  try:
49
- result = sentiment_pipeline(comment)
50
- label = result[0]['label'].upper()
 
 
 
51
  if label in ['POSITIVE', 'VERY POSITIVE']:
52
  sentiment_counts['Positive πŸ‘'] += 1
53
  elif label in ['NEGATIVE', 'VERY NEGATIVE']:
54
  sentiment_counts['Negative πŸ‘Ž'] += 1
55
  elif label == 'NEUTRAL':
56
  sentiment_counts['Neutral 😐'] += 1
57
- else:
58
  sentiment_counts['Unknown'] += 1
59
- total += 1
60
- except:
 
61
  sentiment_counts['Error'] += 1
62
- dominant = max(sentiment_counts, key=sentiment_counts.get, default='Neutral 😐')
63
- percentage = round((sentiment_counts[dominant] / total) * 100, 1) if total else 0.0
64
- results[post_urn] = {"sentiment": dominant, "percentage": percentage}
65
- return results
 
 
 
 
 
66
 
67
- def fetch_posts_and_stats(comm_client_id, community_token, org_urn, count=100):
68
- token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'}
69
- session = create_session(comm_client_id, token=token_dict)
70
- #org_urn, org_name = fetch_org_urn(comm_client_id, token_dict)
71
- org_name = "GRLS"
 
 
 
72
 
73
- posts_url = f"{API_REST_BASE}/posts?author={org_urn}&q=author&count={count}&sortBy=LAST_MODIFIED"
74
- try:
75
- resp = session.get(posts_url)
76
- resp.raise_for_status()
77
- raw_posts = resp.json().get("elements", [])
78
- except requests.exceptions.RequestException as e:
79
- status = getattr(e.response, 'status_code', 'N/A')
80
- raise ValueError(f"Failed to fetch posts (Status: {status})") from e
81
-
82
- if not raw_posts:
83
- return [], org_name, {}
84
-
85
- post_urns = [p["id"] for p in raw_posts if ":share:" in p["id"] or ":ugcPost:" in p["id"]]
86
- stats_map = {}
87
- post_texts = [{"text": p.get("commentary") or p.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {}).get("shareCommentaryV2", {}).get("text", "")} for p in raw_posts]
88
- structured_results = batch_summarize_and_classify(post_texts)
89
-
90
- for i in range(0, len(post_urns), 20):
91
- batch = post_urns[i:i+20]
92
- params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
93
- for idx, urn in enumerate(batch):
94
- key = f"shares[{idx}]" if ":share:" in urn else f"ugcPosts[{idx}]"
95
- params[key] = urn
96
- try:
97
- stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
98
- stat_resp.raise_for_status()
99
- for stat in stat_resp.json().get("elements", []):
100
- urn = stat.get("share") or stat.get("ugcPost")
101
- if urn:
102
- stats_map[urn] = stat.get("totalShareStatistics", {})
103
- except:
104
- continue
105
 
106
- comments = fetch_comments(comm_client_id, token_dict, post_urns, stats_map)
107
- sentiments = analyze_sentiment(comments)
108
- posts = []
109
-
110
- for post in raw_posts:
111
- post_id = post.get("id")
 
 
112
  stats = stats_map.get(post_id, {})
113
- timestamp = post.get("publishedAt") or post.get("createdAt")
114
- when = datetime.fromtimestamp(timestamp / 1000).strftime("%Y-%m-%d %H:%M") if timestamp else "Unknown"
115
- text = post.get("commentary") or post.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {}).get("shareCommentaryV2", {}).get("text") or "[No text]"
116
- text = html.escape(text[:250]).replace("\n", "<br>") + ("..." if len(text) > 250 else "")
117
 
118
  likes = stats.get("likeCount", 0)
119
- comments_count = stats.get("commentCount", 0)
 
 
 
120
  clicks = stats.get("clickCount", 0)
121
  shares = stats.get("shareCount", 0)
122
  impressions = stats.get("impressionCount", 0)
123
- engagement = stats.get("engagement", likes + comments_count + clicks + shares) / impressions * 100 if impressions else 0.0
124
 
125
- sentiment_info = sentiments.get(post_id, {"sentiment": "Neutral 😐", "percentage": 0.0})
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
- posts.append({
128
  "id": post_id,
129
- "when": when,
130
- "text": text,
 
131
  "likes": likes,
132
- "comments": comments_count,
133
  "clicks": clicks,
134
  "shares": shares,
135
  "impressions": impressions,
136
- "engagement": f"{engagement:.2f}%",
 
 
137
  "sentiment": sentiment_info["sentiment"],
138
- "sentiment_percent": sentiment_info["percentage"]
 
 
 
 
 
 
 
139
  })
140
- logging.info(f"Appended post data for {post_id}: Likes={likes}, Comments={comments_count}, Shares={shares}, Clicks={clicks}")
141
-
142
- for post, structured in zip(posts, structured_results):
143
- post["summary"] = structured["summary"]
144
- post["category"] = structured["category"]
145
 
146
- return posts, org_name, sentiments
147
 
148
- def prepare_data_for_bubble(posts, sentiments):
 
 
 
 
 
149
  li_posts = []
150
  li_post_stats = []
151
- li_post_comments = []
 
152
 
153
- for post in posts:
 
154
  li_posts.append({
155
- "author_urn": post["author_urn"],
156
- "id": post["id"],
157
- "is_ad": post["is_ad"],
158
- "media_type": post["media_type"],
159
- "published_at": post["published_at"],
160
- "sentiment": sentiments.get(post["id"], {}).get("sentiment", "Neutral"),
161
- "text": post["text"]
 
 
 
162
  })
163
 
 
164
  li_post_stats.append({
165
- "clickCount": post["clicks"],
166
- "commentCount": post["comments"],
167
- "engagement": post["engagement"],
168
- "impressionCount": post["impressions"],
169
- "likeCount": post["likes"],
170
- "shareCount": post["shares"],
171
- "uniqueImpressionsCount": post.get("uniqueImpressionsCount", 0),
172
- "post_id": post["id"]
173
  })
174
 
175
- for comment in post.get("comments_data", []):
176
- message = comment.get('message', {}).get('text')
177
- if message:
 
 
178
  li_post_comments.append({
179
- "comment_text": message,
180
- "post_id": post["id"]
 
181
  })
182
-
183
- return li_posts, li_post_stats, li_post_comments
 
 
4
  from datetime import datetime
5
  from collections import defaultdict
6
  from transformers import pipeline
7
+
8
  from sessions import create_session
9
  from error_handling import display_error
10
  from posts_categorization import batch_summarize_and_classify
11
  import logging
12
 
13
+
14
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
 
16
  API_V2_BASE = 'https://api.linkedin.com/v2'
17
+ API_REST_BASE = "https://api.linkedin.com/rest" # Corrected from API_REST_BASE to API_REST_BASE
18
 
19
+ # Initialize sentiment pipeline (consider loading it once globally if this module is imported multiple times)
20
  sentiment_pipeline = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
21
 
22
+ def fetch_linkedin_posts_core(comm_client_id, community_token, org_urn, count=100):
23
+ """
24
+ Fetches raw posts, their basic statistics, and performs summarization/categorization.
25
+ Does NOT fetch comments or analyze sentiment.
26
+ """
27
+ token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'}
28
+ session = create_session(comm_client_id, token=token_dict)
29
+ org_name = "GRLS" # Placeholder or fetch if necessary
30
+
31
+ posts_url = f"{API_REST_BASE}/posts?author={org_urn}&q=author&count={count}&sortBy=LAST_MODIFIED"
32
+ logging.info(f"Fetching posts from URL: {posts_url}")
33
+ try:
34
+ resp = session.get(posts_url)
35
+ resp.raise_for_status()
36
+ raw_posts_api = resp.json().get("elements", [])
37
+ logging.info(f"Fetched {len(raw_posts_api)} raw posts from API.")
38
+ except requests.exceptions.RequestException as e:
39
+ status = getattr(e.response, 'status_code', 'N/A')
40
+ logging.error(f"Failed to fetch posts (Status: {status}): {e}")
41
+ raise ValueError(f"Failed to fetch posts (Status: {status})") from e
42
+
43
+ if not raw_posts_api:
44
+ logging.info("No raw posts found.")
45
+ return [], {}, org_name
46
+
47
+ # Filter for valid post types if necessary, e.g., shares or ugcPosts
48
+ # post_urns_for_stats = [p["id"] for p in raw_posts_api if ":share:" in p["id"] or ":ugcPost:" in p["id"]]
49
+ post_urns_for_stats = [p["id"] for p in raw_posts_api if p.get("id")]
50
+
51
+
52
+ # Prepare texts for summarization/classification
53
+ post_texts_for_nlp = []
54
+ for p in raw_posts_api:
55
+ text_content = p.get("commentary") or \
56
+ p.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {}).get("shareCommentaryV2", {}).get("text", "") or \
57
+ "[No text content]"
58
+ post_texts_for_nlp.append({"text": text_content, "id": p.get("id")})
59
+
60
+ logging.info(f"Prepared {len(post_texts_for_nlp)} posts for NLP.")
61
+ structured_results_list = batch_summarize_and_classify(post_texts_for_nlp)
62
+ # Create a dictionary for easy lookup of structured results by post ID
63
+ structured_results_map = {res["id"]: res for res in structured_results_list if "id" in res}
64
+
65
+
66
+ # Fetch statistics
67
+ stats_map = {}
68
+ if post_urns_for_stats:
69
+ for i in range(0, len(post_urns_for_stats), 20): # LinkedIn API often has batch limits
70
+ batch_urns = post_urns_for_stats[i:i+20]
71
+ params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
72
+ for idx, urn_str in enumerate(batch_urns):
73
+ # Determine if it's a share or ugcPost based on URN structure (simplified)
74
+ key_prefix = "shares" if ":share:" in urn_str else "ugcPosts"
75
+ params[f"{key_prefix}[{idx}]"] = urn_str
76
+
77
+ try:
78
+ logging.info(f"Fetching stats for batch starting with URN: {batch_urns[0]}")
79
+ stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
80
+ stat_resp.raise_for_status()
81
+ for stat_element in stat_resp.json().get("elements", []):
82
+ urn = stat_element.get("share") or stat_element.get("ugcPost")
83
+ if urn:
84
+ stats_map[urn] = stat_element.get("totalShareStatistics", {})
85
+ logging.info(f"Successfully fetched stats for {len(batch_urns)} URNs. Current stats_map size: {len(stats_map)}")
86
+ except requests.exceptions.RequestException as e:
87
+ logging.warning(f"Failed to fetch stats for a batch: {e}. Response: {e.response.text if e.response else 'No response'}")
88
+ # Continue to next batch, some stats might be missing
89
+ except json.JSONDecodeError as e:
90
+ logging.warning(f"Failed to decode JSON from stats response: {e}. Response: {stat_resp.text if stat_resp else 'No response text'}")
91
+
92
+
93
+ processed_raw_posts = []
94
+ for p in raw_posts_api:
95
+ post_id = p.get("id")
96
+ if not post_id:
97
+ logging.warning("Skipping raw post due to missing ID.")
98
+ continue
99
+
100
+ text_content = p.get("commentary") or \
101
+ p.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {}).get("shareCommentaryV2", {}).get("text", "") or \
102
+ "[No text content]"
103
+
104
+ timestamp = p.get("publishedAt") or p.get("createdAt")
105
+ published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
106
+
107
+ structured_res = structured_results_map.get(post_id, {"summary": "N/A", "category": "N/A"})
108
+
109
+ processed_raw_posts.append({
110
+ "id": post_id,
111
+ "raw_text": text_content,
112
+ "summary": structured_res["summary"],
113
+ "category": structured_res["category"],
114
+ "published_at_timestamp": timestamp,
115
+ "published_at_iso": published_at_iso,
116
+ # These are placeholders for actual fields from LinkedIn API response. Verify field names.
117
+ "author_urn": p.get("author", "urn:li:unknown"), # e.g., "urn:li:person:xxxx" or "urn:li:organization:xxxx"
118
+ "is_ad": p.get("isSponsored", False), # LinkedIn might use a different field like 'sponsored' or 'promoted'
119
+ "media_type": p.get("mediaCategory", "NONE") # e.g., ARTICLE, IMAGE, VIDEO, NONE
120
+ })
121
+ logging.info(f"Processed {len(processed_raw_posts)} posts with core data.")
122
+ return processed_raw_posts, stats_map, org_name
123
+
124
 
125
  def fetch_comments(comm_client_id, token_dict, post_urns, stats_map):
126
+ """
127
+ Fetches comments for a list of post URNs.
128
+ Uses stats_map to potentially skip posts with 0 comments.
129
+ """
130
+ from requests_oauthlib import OAuth2Session # Keep import here if OAuth2Session is specific to this
131
+
132
+ linkedin_session = OAuth2Session(comm_client_id, token=token_dict)
133
+ # LinkedIn API versions can change, ensure this is up-to-date.
134
+ # Using a recent version like "202402" or as per current LinkedIn docs.
135
+ # The user had "202502", which might be a future version. Using a slightly older one for safety.
136
+ linkedin_session.headers.update({'LinkedIn-Version': "202405", 'X-Restli-Protocol-Version': '2.0.0'})
137
+
138
+ all_comments_by_post = {}
139
+ logging.info(f"Fetching comments for {len(post_urns)} posts.")
140
+
141
  for post_urn in post_urns:
142
+ # Optimization: if stats show 0 comments, skip API call for this post's comments
143
  if stats_map.get(post_urn, {}).get('commentCount', 0) == 0:
144
+ logging.info(f"Skipping comment fetch for {post_urn} as commentCount is 0 in stats_map.")
145
+ all_comments_by_post[post_urn] = []
146
  continue
147
+
148
  try:
149
+ # According to LinkedIn docs, comments are often under /socialActions/{activityUrn}/comments
150
+ # or /commentsV2?q=entity&entity={activityUrn}
151
+ # The user's URL was /socialActions/{post_urn}/comments - this seems plausible for URNs like ugcPost URNs.
152
  url = f"{API_REST_BASE}/socialActions/{post_urn}/comments"
153
+ logging.debug(f"Fetching comments from URL: {url} for post URN: {post_urn}")
154
+ response = linkedin_session.get(url)
155
+
156
  if response.status_code == 200:
157
  elements = response.json().get('elements', [])
158
+ comments_texts = [
159
+ c.get('message', {}).get('text')
160
+ for c in elements
161
+ if c.get('message') and c.get('message', {}).get('text')
162
+ ]
163
+ all_comments_by_post[post_urn] = comments_texts
164
+ logging.info(f"Fetched {len(comments_texts)} comments for {post_urn}.")
165
+ elif response.status_code == 403: # Forbidden, often permissions or versioning
166
+ logging.warning(f"Forbidden (403) to fetch comments for {post_urn}. URL: {url}. Response: {response.text}")
167
+ all_comments_by_post[post_urn] = []
168
+ elif response.status_code == 404: # Not found
169
+ logging.warning(f"Comments not found (404) for {post_urn}. URL: {url}. Response: {response.text}")
170
+ all_comments_by_post[post_urn] = []
171
  else:
172
+ logging.error(f"Error fetching comments for {post_urn}. Status: {response.status_code}. Response: {response.text}")
173
+ all_comments_by_post[post_urn] = []
174
+ except requests.exceptions.RequestException as e:
175
+ logging.error(f"RequestException fetching comments for {post_urn}: {e}")
176
+ all_comments_by_post[post_urn] = []
177
+ except Exception as e: # Catch any other unexpected errors
178
+ logging.error(f"Unexpected error fetching comments for {post_urn}: {e}")
179
+ all_comments_by_post[post_urn] = []
180
+
181
+ return all_comments_by_post
182
+
183
+ def analyze_sentiment(all_comments_data):
184
+ """
185
+ Analyzes sentiment for comments grouped by post_urn.
186
+ all_comments_data is a dict: {post_urn: [comment_text_1, comment_text_2,...]}
187
+ Returns a dict: {post_urn: {"sentiment": "DominantSentiment", "percentage": X.X}}
188
+ """
189
+ results_by_post = {}
190
+ logging.info(f"Analyzing sentiment for comments from {len(all_comments_data)} posts.")
191
+ for post_urn, comments_list in all_comments_data.items():
192
  sentiment_counts = defaultdict(int)
193
+ total_valid_comments_for_post = 0
194
+
195
+ if not comments_list:
196
+ results_by_post[post_urn] = {"sentiment": "Neutral 😐", "percentage": 0.0, "details": sentiment_counts}
197
+ continue
198
+
199
+ for comment_text in comments_list:
200
+ if not comment_text or not comment_text.strip(): # Skip empty comments
201
  continue
202
  try:
203
+ # The pipeline expects a string or list of strings.
204
+ # Ensure comment_text is a string.
205
+ analysis_result = sentiment_pipeline(str(comment_text))
206
+ label = analysis_result[0]['label'].upper()
207
+
208
  if label in ['POSITIVE', 'VERY POSITIVE']:
209
  sentiment_counts['Positive πŸ‘'] += 1
210
  elif label in ['NEGATIVE', 'VERY NEGATIVE']:
211
  sentiment_counts['Negative πŸ‘Ž'] += 1
212
  elif label == 'NEUTRAL':
213
  sentiment_counts['Neutral 😐'] += 1
214
+ else: # Other labels from the model
215
  sentiment_counts['Unknown'] += 1
216
+ total_valid_comments_for_post += 1
217
+ except Exception as e:
218
+ logging.error(f"Sentiment analysis failed for comment under {post_urn}: '{comment_text[:50]}...'. Error: {e}")
219
  sentiment_counts['Error'] += 1
220
+
221
+ if total_valid_comments_for_post > 0:
222
+ dominant_sentiment = max(sentiment_counts, key=sentiment_counts.get, default='Neutral 😐')
223
+ percentage = round((sentiment_counts[dominant_sentiment] / total_valid_comments_for_post) * 100, 1)
224
+ else: # No valid comments to analyze
225
+ dominant_sentiment = 'Neutral 😐'
226
+ percentage = 0.0
227
+ if sentiment_counts['Error'] > 0 : # If there were only errors
228
+ dominant_sentiment = 'Error'
229
 
230
+ results_by_post[post_urn] = {
231
+ "sentiment": dominant_sentiment,
232
+ "percentage": percentage,
233
+ "details": dict(sentiment_counts) # Store counts for more detailed reporting if needed
234
+ }
235
+ logging.debug(f"Sentiment for {post_urn}: {results_by_post[post_urn]}")
236
+
237
+ return results_by_post
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ def compile_detailed_posts(processed_raw_posts, stats_map, sentiments_per_post):
241
+ """
242
+ Combines processed raw post data with their statistics and overall sentiment.
243
+ """
244
+ detailed_post_list = []
245
+ logging.info(f"Compiling detailed data for {len(processed_raw_posts)} posts.")
246
+ for proc_post in processed_raw_posts:
247
+ post_id = proc_post["id"]
248
  stats = stats_map.get(post_id, {})
 
 
 
 
249
 
250
  likes = stats.get("likeCount", 0)
251
+ # Use 'commentSummary' from stats for comment count if available, else 'commentCount'
252
+ # LinkedIn sometimes has commentSummary.totalComments
253
+ comments_stat_count = stats.get("commentSummary", {}).get("totalComments") if "commentSummary" in stats else stats.get("commentCount", 0)
254
+
255
  clicks = stats.get("clickCount", 0)
256
  shares = stats.get("shareCount", 0)
257
  impressions = stats.get("impressionCount", 0)
258
+ unique_impressions = stats.get("uniqueImpressionsCount", 0) # Ensure this field is in API response
259
 
260
+ # Calculate engagement: (likes + comments + clicks + shares) / impressions
261
+ # Ensure impressions is not zero to avoid DivisionByZeroError
262
+ engagement_numerator = likes + comments_stat_count + clicks + shares
263
+ engagement_rate = (engagement_numerator / impressions * 100) if impressions else 0.0
264
+
265
+ sentiment_info = sentiments_per_post.get(post_id, {"sentiment": "Neutral 😐", "percentage": 0.0})
266
+
267
+ # Format text for display (escaped and truncated)
268
+ display_text = html.escape(proc_post["raw_text"][:250]).replace("\n", "<br>") + \
269
+ ("..." if len(proc_post["raw_text"]) > 250 else "")
270
+
271
+ when_formatted = datetime.fromtimestamp(proc_post["published_at_timestamp"] / 1000).strftime("%Y-%m-%d %H:%M") \
272
+ if proc_post["published_at_timestamp"] else "Unknown"
273
 
274
+ detailed_post_list.append({
275
  "id": post_id,
276
+ "when": when_formatted,
277
+ "text_for_display": display_text, # Shortened, escaped text
278
+ "raw_text": proc_post["raw_text"], # Full original text
279
  "likes": likes,
280
+ "comments_stat_count": comments_stat_count, # Count from post statistics
281
  "clicks": clicks,
282
  "shares": shares,
283
  "impressions": impressions,
284
+ "uniqueImpressionsCount": unique_impressions,
285
+ "engagement": f"{engagement_rate:.2f}%", # Formatted string
286
+ "engagement_raw": engagement_rate, # Raw float for potential calculations
287
  "sentiment": sentiment_info["sentiment"],
288
+ "sentiment_percent": sentiment_info["percentage"],
289
+ "sentiment_details": sentiment_info.get("details", {}), # Detailed counts
290
+ "summary": proc_post["summary"],
291
+ "category": proc_post["category"],
292
+ "author_urn": proc_post["author_urn"],
293
+ "is_ad": proc_post["is_ad"],
294
+ "media_type": proc_post["media_type"],
295
+ "published_at": proc_post["published_at_iso"] # ISO format datetime string
296
  })
297
+ logging.info(f"Compiled {len(detailed_post_list)} detailed posts.")
298
+ return detailed_post_list
 
 
 
299
 
 
300
 
301
+ def prepare_data_for_bubble(detailed_posts, all_actual_comments_data):
302
+ """
303
+ Prepares data lists for uploading to Bubble.
304
+ - detailed_posts: List of comprehensively compiled post objects.
305
+ - all_actual_comments_data: Dict of {post_urn: [comment_texts]} from fetch_comments.
306
+ """
307
  li_posts = []
308
  li_post_stats = []
309
+ li_post_comments = [] # For individual comments
310
+ logging.info("Preparing data for Bubble.")
311
 
312
+ for post_data in detailed_posts:
313
+ # Data for LI_post table in Bubble
314
  li_posts.append({
315
+ "author_urn": post_data["author_urn"],
316
+ "id": post_data["id"], # Post URN
317
+ "is_ad": post_data["is_ad"],
318
+ "media_type": post_data["media_type"],
319
+ "published_at": post_data["published_at"], # ISO datetime string
320
+ "sentiment_overall": post_data["sentiment"], # Overall sentiment of the post based on its comments
321
+ "text_content": post_data["raw_text"], # Storing the full raw text
322
+ "summary_text": post_data["summary"],
323
+ "category_assigned": post_data["category"],
324
+ # Add any other fields from post_data needed for LI_post table
325
  })
326
 
327
+ # Data for LI_post_stats table in Bubble
328
  li_post_stats.append({
329
+ "clickCount": post_data["clicks"],
330
+ "commentCount_from_stats": post_data["comments_stat_count"], # From post's own stats
331
+ "engagement_rate": post_data["engagement"], # Formatted string e.g., "12.34%"
332
+ "impressionCount": post_data["impressions"],
333
+ "likeCount": post_data["likes"],
334
+ "shareCount": post_data["shares"],
335
+ "uniqueImpressionsCount": post_data["uniqueImpressionsCount"],
336
+ "post_id": post_data["id"] # Foreign key to LI_post
337
  })
338
 
339
+ # Data for LI_post_comments table in Bubble (individual comments)
340
+ # This iterates through the actual comments fetched, not just the count.
341
+ for post_urn, comments_text_list in all_actual_comments_data.items():
342
+ for single_comment_text in comments_text_list:
343
+ if single_comment_text and single_comment_text.strip(): # Ensure comment text is not empty
344
  li_post_comments.append({
345
+ "comment_text": single_comment_text,
346
+ "post_id": post_urn # Foreign key to LI_post
347
+ # Could add sentiment per comment here if analyzed at that granularity
348
  })
349
+
350
+ logging.info(f"Prepared {len(li_posts)} posts, {len(li_post_stats)} stats entries, and {len(li_post_comments)} comments for Bubble.")
351
+ return li_posts, li_post_stats, li_post_comments