Spaces:

GuglielmoTor
/

LinkedinMonitor

Sleeping

App Files Files Community

GuglielmoTor commited on Jun 11

Commit

eaea9c5

verified ·

1 Parent(s): 762599c

Delete apis/Linkedin_Data_API_Calls.py

Browse files

Files changed (1) hide show

apis/Linkedin_Data_API_Calls.py +0 -864

apis/Linkedin_Data_API_Calls.py DELETED Viewed

@@ -1,864 +0,0 @@
-import json
-import requests
-import html
-import time # Added for potential rate limiting if needed
-from datetime import datetime
-from collections import defaultdict
-from urllib.parse import quote # Added for URL encoding
-from transformers import pipeline
-from utils.sessions import create_session
-from utils.error_handling import display_error
-from data_processing.posts_categorization import batch_summarize_and_classify
-import logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-API_V2_BASE = 'https://api.linkedin.com/v2'
-API_REST_BASE = "https://api.linkedin.com/rest"
-# Initialize sentiment pipeline (loaded once globally)
-sentiment_pipeline = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
-# --- Utility Function ---
-def extract_text_from_mention_commentary(commentary):
-    """
-    Extracts clean text from a commentary string, removing potential placeholders like {mention}.
-    """
-    import re
-    if not commentary:
-        return ""
-    return re.sub(r"{.*?}", "", commentary).strip()
-# --- Core Sentiment Analysis Helper ---
-def _get_sentiment_from_text(text_to_analyze):
-    """
-    Analyzes a single piece of text and returns its sentiment label and raw counts.
-    Returns a dict: {"label": "Sentiment Label", "counts": defaultdict(int)}
-    """
-    sentiment_counts = defaultdict(int)
-    dominant_sentiment_label = "Neutral 😐" # Default
-    if not text_to_analyze or not text_to_analyze.strip():
-        return {"label": dominant_sentiment_label, "counts": sentiment_counts}
-    try:
-        # Truncate to avoid issues with very long texts for the model
-        analysis_result = sentiment_pipeline(str(text_to_analyze)[:512])
-        label = analysis_result[0]['label'].upper()
-        if label in ['POSITIVE', 'VERY POSITIVE']:
-            dominant_sentiment_label = 'Positive 👍'
-            sentiment_counts['Positive 👍'] += 1
-        elif label in ['NEGATIVE', 'VERY NEGATIVE']:
-            dominant_sentiment_label = 'Negative 👎'
-            sentiment_counts['Negative 👎'] += 1
-        elif label == 'NEUTRAL':
-            dominant_sentiment_label = 'Neutral 😐' # Already default, but for clarity
-            sentiment_counts['Neutral 😐'] += 1
-        else:
-            dominant_sentiment_label = 'Unknown' # Catch any other labels from the model
-            sentiment_counts['Unknown'] += 1
-    except Exception as e:
-        # Log the error with more context if possible
-        logging.error(f"Sentiment analysis failed for text snippet '{str(text_to_analyze)[:50]}...'. Error: {e}")
-        sentiment_counts['Error'] += 1
-        dominant_sentiment_label = "Error" # Indicate error in sentiment
-    return {"label": dominant_sentiment_label, "counts": sentiment_counts}
-def get_post_media_category(post_content):
-    """
-    Determines the media category from the post's content object.
-    Args:
-        post_content (dict or None): The content dictionary of the post.
-    Returns:
-        str: The determined media category (e.g., "Video", "Article", "Document", "Image", "Multi-Image", "NONE").
-    """
-    if not post_content:
-        return "NONE"
-    # 1. Check for specific LinkedIn Video Component (from your original logic)
-    # You might want to refine this if 'mediaCategory' within the video component is more specific
-    if "com.linkedin.voyager.feed.render.LinkedInVideoComponent" in post_content:
-        # video_component_data = post_content.get("com.linkedin.voyager.feed.render.LinkedInVideoComponent", {})
-        # return video_component_data.get("mediaCategory", "Video") # Example if you want to use its specific category
-        return "Video"
-    # 2. Check for Article (based on your "old code" and examples)
-    if 'article' in post_content:
-        return "Article"
-    # 3. Check for Multi-Image (based on your "old code")
-    if 'multiImage' in post_content:
-        return "Multi-Image"
-    # 4. Check for Media (Document or Image - based on your "old code" and examples)
-    if 'media' in post_content:
-        media_item = post_content['media']
-        # Heuristic from your "old code": if 'title' is present, it's likely a Document.
-        if 'title' in media_item:
-            # Example: "content": {"media": {"title": "...", "id": "urn:li:document:..."}}
-            return "Document"
-        # Else, if 'id' is present (and no title was found for Document), assume Image.
-        elif 'id' in media_item:
-            # Example: "content": {"media": {"altText": "", "id": "urn:li:image:..."}}
-            return "Image"
-    return "NONE"
-# --- Post Retrieval Functions ---
-def fetch_linkedin_posts_core(comm_client_id, community_token, org_urn, count):
-    """
-    Fetches raw posts, their basic statistics, and performs summarization/categorization.
-    Does NOT fetch comments or analyze sentiment of comments here.
-    """
-    token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'}
-    session = create_session(comm_client_id, token=token_dict)
-    session.headers.update({
-        "LinkedIn-Version": "202502"
-    })
-    posts_url = f"{API_REST_BASE}/posts?author={org_urn}&q=author&count={count}&sortBy=LAST_MODIFIED"
-    logging.info(f"Fetching posts from URL: {posts_url}")
-    try:
-        resp = session.get(posts_url)
-        resp.raise_for_status()
-        raw_posts_api = resp.json().get("elements", [])
-        logging.info(f"Fetched {len(raw_posts_api)} raw posts from API.")
-    except requests.exceptions.RequestException as e:
-        status = getattr(e.response, 'status_code', 'N/A')
-        text = getattr(e.response, 'text', 'No response text')
-        logging.error(f"Failed to fetch posts (Status: {status}): {e}. Response: {text}")
-        raise ValueError(f"Failed to fetch posts (Status: {status})") from e
-    except json.JSONDecodeError as e:
-        logging.error(f"Failed to decode JSON from posts response: {e}. Response text: {resp.text if resp else 'No response object'}")
-        raise ValueError("Failed to decode JSON from posts response") from e
-    if not raw_posts_api:
-        logging.info("No raw posts found.")
-        return [], {}, "DefaultOrgName"
-    post_urns_for_stats = [p["id"] for p in raw_posts_api if p.get("id")]
-    post_texts_for_nlp = []
-    for p in raw_posts_api:
-        text_content = p.get("commentary") or \
-                       p.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {}).get("shareCommentaryV2", {}).get("text", "") or \
-                       "[No text content]"
-        post_texts_for_nlp.append({"text": text_content, "id": p.get("id")})
-    logging.info(f"Prepared {len(post_texts_for_nlp)} posts for NLP (summarization/classification).")
-    if 'batch_summarize_and_classify' in globals():
-        structured_results_list = batch_summarize_and_classify(post_texts_for_nlp)
-    else:
-        logging.warning("batch_summarize_and_classify not found, using fallback.")
-        structured_results_list = [{"id": p["id"], "summary": "N/A", "category": "N/A"} for p in post_texts_for_nlp]
-    structured_results_map = {res["id"]: res for res in structured_results_list if "id" in res}
-    stats_map = {}
-    if post_urns_for_stats:
-        batch_size_stats = 20
-        for i in range(0, len(post_urns_for_stats), batch_size_stats):
-            batch_urns = post_urns_for_stats[i:i+batch_size_stats]
-            params = {'q': 'organizationalEntity', 'organizationalEntity': org_urn}
-            share_idx = 0 # Index for share URNs in the current batch's params
-            ugc_idx = 0   # Index for ugcPost URNs in the current batch's params
-            # Keep track of URNs actually added to this batch's parameters for logging
-            urns_in_current_api_call = []
-            for urn_str in batch_urns:
-                if ":share:" in urn_str:
-                    params[f"shares[{share_idx}]"] = urn_str
-                    share_idx += 1
-                    urns_in_current_api_call.append(urn_str)
-                elif ":ugcPost:" in urn_str:
-                    params[f"ugcPosts[{ugc_idx}]"] = urn_str
-                    ugc_idx += 1
-                    urns_in_current_api_call.append(urn_str)
-                else:
-                    logging.warning(f"URN {urn_str} is not a recognized share or ugcPost type for stats. Skipping.")
-                    continue
-            # If no valid URNs were prepared for this batch, skip the API call
-            if not share_idx and not ugc_idx: # or check 'if not urns_in_current_api_call:'
-                logging.info(f"Skipping API call for an empty or invalid batch of URNs (original batch segment size: {len(batch_urns)}).")
-                continue
-            try:
-                # Log the URNs being sent in this specific API call
-                logging.info(f"Fetching stats for batch of {len(urns_in_current_api_call)} URNs. First URN in call: {urns_in_current_api_call[0] if urns_in_current_api_call else 'N/A'}")
-                # Actual API call
-                stat_resp = session.get(f"{API_REST_BASE}/organizationalEntityShareStatistics", params=params)
-                stat_resp.raise_for_status() # Raises an HTTPError for bad responses (4XX or 5XX)
-                stats_data = stat_resp.json()
-                # --- Corrected Parsing Logic ---
-                # LinkedIn API for batch stats often returns an "elements" list.
-                elements_from_api = stats_data.get("elements")
-                if isinstance(elements_from_api, list):
-                    if not elements_from_api:
-                        logging.info(f"API returned 'elements' but it's an empty list for the URNs in this call.")
-                    processed_urns_in_batch = 0
-                    for item in elements_from_api:
-                        urn_in_item = None
-                        # Determine the URN key (e.g., 'share' or 'ugcPost')
-                        if "share" in item:
-                            urn_in_item = item.get("share")
-                        elif "ugcPost" in item:
-                            urn_in_item = item.get("ugcPost")
-                        # Add other URN types if necessary, e.g., elif "article" in item: ...
-                        if urn_in_item:
-                            stats_values = item.get("totalShareStatistics", {})
-                            if stats_values: # Only add if there are actual stats
-                                stats_map[urn_in_item] = stats_values
-                                processed_urns_in_batch +=1
-                            else:
-                                # It's possible an URN is returned without stats, or with empty stats
-                                logging.debug(f"No 'totalShareStatistics' data found for URN: {urn_in_item} in API item: {item}")
-                                stats_map[urn_in_item] = {} # Store empty stats if URN was processed but had no data
-                        else:
-                            logging.warning(f"Could not extract a recognized URN key from API element: {item}")
-                    logging.info(f"Successfully processed {processed_urns_in_batch} URNs with stats from the API response for this batch. Current total stats_map size: {len(stats_map)}")
-                elif elements_from_api is None and "results" in stats_data:
-                    # Fallback or alternative check if your API version *does* use "results"
-                    # This was your original attempt. If "elements" is consistently missing,
-                    # you might need to debug the exact structure of "results".
-                    logging.warning(f"API response does not contain 'elements' key, but 'results' key is present. Attempting to parse 'results'. Response keys: {stats_data.keys()}")
-                    results_dict = stats_data.get("results", {})
-                    if isinstance(results_dict, dict):
-                        for urn_key, stat_element_values in results_dict.items():
-                            stats_map[urn_key] = stat_element_values.get("totalShareStatistics", {})
-                        logging.info(f"Processed stats from 'results' dictionary. Current stats_map size: {len(stats_map)}")
-                    else:
-                        logging.error(f"'results' key found but is not a dictionary. Type: {type(results_dict)}")
-                else:
-                    # Neither "elements" (as list) nor "results" (as dict) found as expected
-                    logging.error(f"API response structure not recognized. Expected 'elements' (list) or 'results' (dict). Got keys: {stats_data.keys()}. Full response sample: {str(stats_data)[:500]}")
-                # --- End Corrected Parsing Logic ---
-                # Check for specific errors reported by the API within the JSON response
-                if stats_data.get("errors"):
-                    for urn_errored, error_detail in stats_data.get("errors", {}).items():
-                        logging.warning(f"API reported error for URN {urn_errored}: {error_detail.get('message', 'Unknown API error message')}")
-                # This log might be slightly misleading if parsing failed but no exception occurred.
-                # The more specific log after parsing 'elements' is better.
-                # logging.info(f"Successfully processed stats response for {len(urns_in_current_api_call)} URNs. Current stats_map size: {len(stats_map)}")
-            except requests.exceptions.HTTPError as e:
-                # Specific handling for HTTP errors (4xx, 5xx)
-                status_code = e.response.status_code
-                response_text = e.response.text
-                logging.warning(f"HTTP error fetching stats for a batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text[:500]}") # Log first 500 chars of response
-            except requests.exceptions.RequestException as e:
-                # Catch other requests-related errors (e.g., connection issues)
-                status_code = getattr(e.response, 'status_code', 'N/A')
-                response_text = getattr(e.response, 'text', 'No response text')
-                logging.warning(f"Request failed for stats batch (Status: {status_code}): {e}. Params: {params}. Response: {response_text[:500]}")
-            except json.JSONDecodeError as e:
-                # Handle cases where the response is not valid JSON
-                response_text_for_json_error = stat_resp.text if 'stat_resp' in locals() and hasattr(stat_resp, 'text') else 'Response object not available or no text attribute'
-                logging.warning(f"Failed to decode JSON from stats response: {e}. Response text: {response_text_for_json_error[:500]}") # Log first 500 chars
-            except Exception as e:
-                # Catch any other unexpected errors during the batch processing
-                logging.error(f"An unexpected error occurred processing stats batch: {e}", exc_info=True)
-        logging.info(f"Finished processing all URN batches. Final stats_map size: {len(stats_map)}")
-    processed_raw_posts = []
-    for p in raw_posts_api:
-        post_id = p.get("id")
-        if not post_id:
-            logging.warning("Skipping raw post due to missing ID.")
-            continue
-        text_content = p.get("commentary") or \
-                       p.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {}).get("shareCommentaryV2", {}).get("text", "") or \
-                       "[No text content]"
-        timestamp = p.get("publishedAt") or p.get("createdAt") or p.get("firstPublishedAt")
-        published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
-        structured_res = structured_results_map.get(post_id, {"summary": "N/A", "category": "N/A"})
-        processed_raw_posts.append({
-            "id": post_id,
-            "raw_text": text_content,
-            "summary": structured_res["summary"],
-            "category": structured_res["category"],
-            "published_at_timestamp": timestamp,
-            "published_at_iso": published_at_iso,
-            "organization_urn": p.get("author", f"urn:li:organization:{org_urn.split(':')[-1]}"),
-            "is_ad": 'adContext' in p,
-            "media_category": get_post_media_category(p.get("content")),
-        })
-    logging.info(f"Processed {len(processed_raw_posts)} posts with core data.")
-    return processed_raw_posts, stats_map, "DefaultOrgName"
-def fetch_comments(comm_client_id, community_token, post_urns, stats_map):
-    """
-    Fetches comments for a list of post URNs using the socialActions endpoint.
-    Uses stats_map to potentially skip posts with 0 comments.
-    """
-    # Ensure community_token is in the expected dictionary format for create_session
-    if isinstance(community_token, str):
-        token_dict = {'access_token': community_token, 'token_type': 'Bearer'}
-    elif isinstance(community_token, dict) and 'access_token' in community_token:
-        token_dict = community_token
-    else:
-        logging.error("Invalid community_token format. Expected a string or a dict with 'access_token'.")
-        return {urn: [] for urn in post_urns} # Return empty for all if token is bad
-    linkedin_session = create_session(comm_client_id, token=token_dict)
-    # Set the LinkedIn API version header
-    # This is crucial for API compatibility.
-    linkedin_session.headers.update({
-        'LinkedIn-Version': "202502" # Or your target version
-    })
-    all_comments_by_post = {}
-    logging.info(f"Fetching comments for {len(post_urns)} posts.")
-    for post_urn in post_urns:
-        post_stats = stats_map.get(post_urn, {})
-        # Try to get comment count from "commentSummary" first, then fallback to "commentCount"
-        comment_summary = post_stats.get("commentSummary", {})
-        comment_count_from_stats = comment_summary.get("totalComments", post_stats.get('commentCount', 0))
-        if comment_count_from_stats == 0:
-            logging.info(f"Skipping comment fetch for {post_urn} as commentCount is 0 in stats_map.")
-            all_comments_by_post[post_urn] = []
-            continue
-        try:
-            # IMPORTANT: Use the correct endpoint structure from your working code.
-            # The post_urn goes directly into the path and should NOT be URL-encoded here.
-            url = f"{API_REST_BASE}/socialActions/{post_urn}/comments"
-            # If you want to add other parameters like 'count' or 'start', append them, e.g.,
-            # url = f"{API_REST_BASE}/socialActions/{post_urn}/comments?sortOrder=CHRONOLOGICAL&count=10"
-            logging.debug(f"Fetching comments from URL: {url} for post URN: {post_urn}")
-            response = linkedin_session.get(url)
-            if response.status_code == 200:
-                elements = response.json().get('elements', [])
-                comments_texts = []
-                for c in elements:
-                    # Extracting comment text. Adjust if the structure is different.
-                    # The original working code stored `data.get('elements', [])`
-                    # If you need the full comment object, store 'c' instead of 'comment_text'.
-                    message_obj = c.get('message', {})
-                    if isinstance(message_obj, dict): # Ensure message is a dict before .get('text')
-                        comment_text = message_obj.get('text')
-                        if comment_text:
-                            comments_texts.append(comment_text)
-                    elif isinstance(message_obj, str): # Sometimes message might be just a string
-                        comments_texts.append(message_obj)
-                all_comments_by_post[post_urn] = comments_texts
-                logging.info(f"Fetched {len(comments_texts)} comments for {post_urn}.")
-            elif response.status_code == 403:
-                logging.warning(f"Forbidden (403) to fetch comments for {post_urn}. URL: {url}. Response: {response.text}. Check permissions or API version.")
-                all_comments_by_post[post_urn] = [] # Or some error indicator
-            elif response.status_code == 404:
-                logging.warning(f"Comments not found (404) for {post_urn}. URL: {url}. Response: {response.text}")
-                all_comments_by_post[post_urn] = []
-            else:
-                logging.error(f"Error fetching comments for {post_urn}. Status: {response.status_code}. URL: {url}. Response: {response.text}")
-                all_comments_by_post[post_urn] = [] # Or some error indicator
-        except requests.exceptions.RequestException as e:
-            logging.error(f"RequestException fetching comments for {post_urn}: {e}")
-            all_comments_by_post[post_urn] = []
-        except json.JSONDecodeError as e:
-            # Log the response text if it's available and JSON decoding fails
-            response_text_for_log = 'N/A'
-            if 'response' in locals() and hasattr(response, 'text'):
-                response_text_for_log = response.text
-            logging.error(f"JSONDecodeError fetching comments for {post_urn}. Response: {response_text_for_log}. Error: {e}")
-            all_comments_by_post[post_urn] = []
-        except Exception as e:
-            # Catch any other unexpected errors
-            logging.error(f"Unexpected error fetching comments for {post_urn}: {e}", exc_info=True) # exc_info=True for traceback
-            all_comments_by_post[post_urn] = []
-    return all_comments_by_post
-def analyze_sentiment(all_comments_data):
-    """
-    Analyzes sentiment for comments grouped by post_urn using the helper function.
-    all_comments_data is a dict: {post_urn: [comment_text_1, comment_text_2,...]}
-    Returns a dict: {post_urn: {"sentiment": "DominantOverallSentiment", "percentage": X.X, "details": {aggregated_counts}}}
-    """
-    results_by_post = {}
-    logging.info(f"Analyzing aggregated sentiment for comments from {len(all_comments_data)} posts.")
-    for post_urn, comments_list in all_comments_data.items():
-        aggregated_sentiment_counts = defaultdict(int)
-        total_valid_comments_for_post = 0
-        if not comments_list:
-            results_by_post[post_urn] = {"sentiment": "Neutral 😐", "percentage": 0.0, "details": dict(aggregated_sentiment_counts)}
-            continue
-        for comment_text in comments_list:
-            if not comment_text or not comment_text.strip():
-                continue
-            # Use the helper for individual comment sentiment
-            single_comment_sentiment = _get_sentiment_from_text(comment_text)
-            # Aggregate counts
-            for label, count in single_comment_sentiment["counts"].items():
-                aggregated_sentiment_counts[label] += count
-            if single_comment_sentiment["label"] != "Error": # Count valid analyses
-                total_valid_comments_for_post +=1
-        dominant_overall_sentiment = "Neutral 😐" # Default
-        percentage = 0.0
-        if total_valid_comments_for_post > 0:
-            # Determine dominant sentiment from aggregated_sentiment_counts
-            # Exclude 'Error' from being a dominant sentiment unless it's the only category with counts
-            valid_sentiments = {k: v for k, v in aggregated_sentiment_counts.items() if k != 'Error' and v > 0}
-            if not valid_sentiments:
-                dominant_overall_sentiment = 'Error' if aggregated_sentiment_counts['Error'] > 0 else 'Neutral 😐'
-            else:
-                # Simple max count logic for dominance
-                dominant_overall_sentiment = max(valid_sentiments, key=valid_sentiments.get)
-            if dominant_overall_sentiment != 'Error':
-                percentage = round((aggregated_sentiment_counts[dominant_overall_sentiment] / total_valid_comments_for_post) * 100, 1)
-            else: # if dominant is 'Error' or only errors were found
-                percentage = 0.0
-        elif aggregated_sentiment_counts['Error'] > 0 : # No valid comments, but errors occurred
-             dominant_overall_sentiment = 'Error'
-        results_by_post[post_urn] = {
-            "sentiment": dominant_overall_sentiment,
-            "percentage": percentage,
-            "details": dict(aggregated_sentiment_counts) # Store aggregated counts
-        }
-        logging.debug(f"Aggregated sentiment for post {post_urn}: {results_by_post[post_urn]}")
-    return results_by_post
-def compile_detailed_posts(processed_raw_posts, stats_map, sentiments_per_post):
-    """
-    Combines processed raw post data with their statistics and overall comment sentiment.
-    """
-    detailed_post_list = []
-    logging.info(f"Compiling detailed data for {len(processed_raw_posts)} posts.")
-    for proc_post in processed_raw_posts:
-        post_id = proc_post["id"]
-        stats = stats_map.get(post_id, {})
-        likes = stats.get("likeCount", 0)
-        comments_stat_count = stats.get("commentSummary", {}).get("totalComments", stats.get("commentCount", 0))
-        clicks = stats.get("clickCount", 0)
-        shares = stats.get("shareCount", 0)
-        impressions = stats.get("impressionCount", 0)
-        unique_impressions = stats.get("uniqueImpressionsCount", stats.get("impressionCount", 0))
-        engagement_numerator = likes + comments_stat_count + clicks + shares
-        engagement_rate = (engagement_numerator / impressions * 100) if impressions and impressions > 0 else 0.0
-        sentiment_info = sentiments_per_post.get(post_id, {"sentiment": "Neutral 😐", "percentage": 0.0, "details": {}})
-        display_text = html.escape(proc_post["raw_text"][:250]).replace("\n", "<br>") + \
-                       ("..." if len(proc_post["raw_text"]) > 250 else "")
-        when_formatted = datetime.fromtimestamp(proc_post["published_at_timestamp"] / 1000).strftime("%Y-%m-%d %H:%M") \
-            if proc_post["published_at_timestamp"] else "Unknown"
-        detailed_post_list.append({
-            "id": post_id,
-            "when": when_formatted,
-            "text_for_display": display_text,
-            "raw_text": proc_post["raw_text"],
-            "likes": likes,
-            "comments_stat_count": comments_stat_count,
-            "clicks": clicks,
-            "shares": shares,
-            "impressions": impressions,
-            "uniqueImpressionsCount": unique_impressions,
-            "engagement": f"{engagement_rate:.2f}%",
-            "engagement_raw": engagement_rate,
-            "sentiment": sentiment_info["sentiment"],
-            "sentiment_percent": sentiment_info["percentage"],
-            "sentiment_details": sentiment_info.get("details", {}),
-            "summary": proc_post["summary"],
-            "category": proc_post["category"],
-            "organization_urn": proc_post["organization_urn"],
-            "is_ad": proc_post["is_ad"],
-            "media_category": proc_post.get("media_category", "NONE"),
-            "published_at": proc_post["published_at_iso"]
-        })
-    logging.info(f"Compiled {len(detailed_post_list)} detailed posts.")
-    return detailed_post_list
-def prepare_data_for_bubble(detailed_posts, all_actual_comments_data):
-    """
-    Prepares data lists for uploading to Bubble.
-    - detailed_posts: List of comprehensively compiled post objects.
-    - all_actual_comments_data: Dict of {post_urn: [comment_texts]} from fetch_comments.
-    """
-    li_posts = []
-    li_post_stats = []
-    li_post_comments = []
-    logging.info("Preparing posts data for Bubble.")
-    if not detailed_posts:
-        logging.warning("No detailed posts to prepare for Bubble.")
-        return [], [], []
-    org_urn_default = detailed_posts[0]["organization_urn"] if detailed_posts else "urn:li:organization:UNKNOWN"
-    for post_data in detailed_posts:
-        li_posts.append({
-            "organization_urn": post_data["organization_urn"],
-            "id": post_data["id"],
-            "is_ad": post_data["is_ad"],
-            "media_type": post_data.get("media_category", "NONE"),
-            "published_at": post_data["published_at"],
-            "sentiment": post_data["sentiment"],
-            "text": post_data["raw_text"],
-            #"summary_text": post_data["summary"],
-            "li_eb_label": post_data["category"]
-        })
-        li_post_stats.append({
-            "clickCount": post_data["clicks"],
-            "commentCount": post_data["comments_stat_count"],
-            "engagement": post_data["engagement_raw"],
-            "impressionCount": post_data["impressions"],
-            "likeCount": post_data["likes"],
-            "shareCount": post_data["shares"],
-            "uniqueImpressionsCount": post_data["uniqueImpressionsCount"],
-            "post_id": post_data["id"],
-            "organization_urn": post_data["organization_urn"]
-        })
-    for post_urn, comments_text_list in all_actual_comments_data.items():
-        current_post_org_urn = org_urn_default
-        for p in detailed_posts:
-            if p["id"] == post_urn:
-                current_post_org_urn = p["organization_urn"]
-                break
-        for single_comment_text in comments_text_list:
-            if single_comment_text and single_comment_text.strip():
-                li_post_comments.append({
-                    "comment_text": single_comment_text,
-                    "post_id": post_urn,
-                    "organization_urn": current_post_org_urn
-                })
-    logging.info(f"Prepared {len(li_posts)} posts, {len(li_post_stats)} stats entries, and {len(li_post_comments)} comments for Bubble.")
-    return li_posts, li_post_stats, li_post_comments
-# --- Mentions Retrieval Functions ---
-def fetch_linkedin_mentions_core(comm_client_id, community_token, org_urn, count=20):
-    """
-    Fetches raw mention notifications and the details of the posts where the organization was mentioned.
-    Returns a list of processed mention data (internal structure).
-    """
-    token_dict = community_token if isinstance(community_token, dict) else {'access_token': community_token, 'token_type': 'Bearer'}
-    session = create_session(comm_client_id, token=token_dict)
-    session.headers.update({
-        "X-Restli-Protocol-Version": "2.0.0",
-        "LinkedIn-Version": "202502"
-    })
-    encoded_org_urn = quote(org_urn, safe='')
-    notifications_url_base = (
-        f"{API_REST_BASE}/organizationalEntityNotifications"
-        f"?q=criteria"
-        f"&actions=List(SHARE_MENTION)"
-        f"&organizationalEntity={encoded_org_urn}"
-        f"&count={count}"
-    )
-    all_notifications = []
-    start_index = 0
-    processed_mentions_internal = []
-    page_count = 0
-    max_pages = 10
-    while page_count < max_pages:
-        current_url = f"{notifications_url_base}&start={start_index}"
-        logging.info(f"Fetching notifications page {page_count + 1} from URL: {current_url}")
-        try:
-            resp = session.get(current_url)
-            resp.raise_for_status()
-            data = resp.json()
-            elements = data.get("elements", [])
-            if not elements:
-                logging.info(f"No more notifications found on page {page_count + 1}. Total notifications fetched: {len(all_notifications)}.")
-                break
-            all_notifications.extend(elements)
-            paging = data.get("paging", {})
-            if 'start' not in paging or 'count' not in paging or len(elements) < paging.get('count', count):
-                logging.info(f"Last page of notifications fetched. Total notifications: {len(all_notifications)}.")
-                break
-            start_index = paging['start'] + paging['count']
-            page_count += 1
-        except requests.exceptions.RequestException as e:
-            status = getattr(e.response, 'status_code', 'N/A')
-            text = getattr(e.response, 'text', 'No response text')
-            logging.error(f"Failed to fetch notifications (Status: {status}): {e}. Response: {text}")
-            break
-        except json.JSONDecodeError as e:
-            logging.error(f"Failed to decode JSON from notifications response: {e}. Response: {resp.text if resp else 'No resp obj'}")
-            break
-        if page_count >= max_pages:
-            logging.info(f"Reached max_pages ({max_pages}) for fetching notifications.")
-            break
-    if not all_notifications:
-        logging.info("No mention notifications found after fetching.")
-        return []
-    mention_share_urns = list(set([
-        n.get("generatedActivity") for n in all_notifications
-        if n.get("action") == "SHARE_MENTION" and n.get("generatedActivity")
-    ]))
-    logging.info(f"Found {len(mention_share_urns)} unique share URNs from SHARE_MENTION notifications.")
-    # for share_urn in mention_share_urns:
-    #     encoded_share_urn = quote(share_urn, safe='')
-    #     post_detail_url = f"{API_REST_BASE}/posts/{encoded_share_urn}"
-    #     logging.info(f"Fetching details for mentioned post: {post_detail_url}")
-    #     try:
-    #         post_resp = session.get(post_detail_url)
-    #         post_resp.raise_for_status()
-    #         post_data = post_resp.json()
-    #         commentary_raw = post_data.get("commentary")
-    #         if not commentary_raw and "specificContent" in post_data:
-    #              share_content = post_data.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {})
-    #              commentary_raw = share_content.get("shareCommentaryV2", {}).get("text", "")
-    #         if not commentary_raw:
-    #             logging.warning(f"No commentary found for share URN {share_urn}. Skipping.")
-    #             continue
-    #         mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
-    #         timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
-    #         published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
-    #         author_urn = post_data.get("author", "urn:li:unknown")
-    #         processed_mentions_internal.append({
-    #             "mention_id": f"mention_{share_urn}",
-    #             "share_urn": share_urn,
-    #             "mention_text_raw": commentary_raw,
-    #             "mention_text_cleaned": mention_text_cleaned,
-    #             "published_at_timestamp": timestamp,
-    #             "published_at_iso": published_at_iso,
-    #             "mentioned_by_author_urn": author_urn,
-    #             "organization_urn_mentioned": org_urn
-    #         })
-    #     except requests.exceptions.RequestException as e:
-    #         status = getattr(e.response, 'status_code', 'N/A')
-    #         text = getattr(e.response, 'text', 'No response text')
-    #         logging.warning(f"Failed to fetch post details for share URN {share_urn} (Status: {status}): {e}. Response: {text}")
-    #     except json.JSONDecodeError as e:
-    #         logging.warning(f"Failed to decode JSON for post details {share_urn}: {e}. Response: {post_resp.text if post_resp else 'No resp obj'}")
-    if mention_share_urns:
-        # Encode URNs for the batch request URL
-        encoded_urns = [quote(urn, safe='') for urn in mention_share_urns]
-        formatted_urns = ",".join(encoded_urns)
-        # Construct the URL for batch fetching post details
-        # API_REST_BASE should be the base URL like "https://api.linkedin.com/rest"
-        batch_posts_url = f"{API_REST_BASE}/posts?ids=List({formatted_urns})"
-        logging.info(f"Fetching details for {len(mention_share_urns)} posts in a batch: {batch_posts_url}")
-        try:
-            batch_resp = session.get(batch_posts_url)
-            batch_resp.raise_for_status() # Raise an exception for HTTP errors
-            batch_data = batch_resp.json()
-            results = batch_data.get("results", {}) # Contains post details keyed by URN
-            errors = batch_data.get("errors", {})   # Contains errors for specific URNs
-            statuses = batch_data.get("statuses", {}) # Contains HTTP statuses for specific URNs
-            # Process each share URN using the data from the batch response
-            for share_urn in mention_share_urns:
-                if share_urn not in results:
-                    # Log if a URN was requested but not found in the results
-                    logging.warning(
-                        f"Post details for share URN {share_urn} not found in batch response. "
-                        f"Status: {statuses.get(share_urn)}, Error: {errors.get(share_urn)}"
-                    )
-                    continue
-                post_data = results[share_urn]
-                # Extract commentary - try direct 'commentary' field first, then fallback
-                commentary_raw = post_data.get("commentary")
-                if not commentary_raw and "specificContent" in post_data:
-                    # Fallback for older structures or specific share types if 'commentary' is not top-level
-                    share_content = post_data.get("specificContent", {}).get("com.linkedin.ugc.ShareContent", {})
-                    commentary_raw = share_content.get("shareCommentaryV2", {}).get("text", "")
-                if not commentary_raw:
-                    logging.warning(f"No commentary found for share URN {share_urn} in batch data. Skipping.")
-                    continue
-                # Clean the commentary text (assuming this function is defined)
-                mention_text_cleaned = extract_text_from_mention_commentary(commentary_raw)
-                # Extract timestamp and convert to ISO format
-                timestamp = post_data.get("publishedAt") or post_data.get("createdAt") or post_data.get("firstPublishedAt")
-                published_at_iso = datetime.fromtimestamp(timestamp / 1000).isoformat() if timestamp else None
-                # Extract author URN
-                author_urn = post_data.get("author", "urn:li:unknown") # Default if author is not found
-                # Append processed mention data
-                processed_mentions_internal.append({
-                    "mention_id": f"mention_{share_urn}", # Create a unique ID for the mention
-                    "share_urn": share_urn,
-                    "mention_text_raw": commentary_raw,
-                    "mention_text_cleaned": mention_text_cleaned,
-                    "published_at_timestamp": timestamp,
-                    "published_at_iso": published_at_iso,
-                    "mentioned_by_author_urn": author_urn,
-                    "organization_urn_mentioned": org_urn # The URN of the organization that was mentioned
-                })
-        except requests.exceptions.RequestException as e:
-            status = getattr(e.response, 'status_code', 'N/A')
-            text = getattr(e.response, 'text', 'No response text')
-            logging.error(f"Failed to fetch batch post details (Status: {status}): {e}. Response: {text}")
-        except json.JSONDecodeError as e:
-            # Log error if JSON decoding fails for the batch response
-            logging.error(f"Failed to decode JSON from batch posts response: {e}. Response: {batch_resp.text if batch_resp else 'No resp obj'}")
-    logging.info(f"Processed {len(processed_mentions_internal)} mentions with their post details.")
-    return processed_mentions_internal
-def analyze_mentions_sentiment(processed_mentions_list):
-    """
-    Analyzes sentiment for the text of each processed mention using the helper function.
-    Input: list of processed_mention dicts (internal structure from fetch_linkedin_mentions_core).
-    Returns: a dict {mention_id: {"sentiment_label": "DominantSentiment", "percentage": 100.0, "details": {counts}}}
-    """
-    mention_sentiments_map = {}
-    logging.info(f"Analyzing individual sentiment for {len(processed_mentions_list)} mentions.")
-    for mention_data in processed_mentions_list:
-        mention_internal_id = mention_data["mention_id"] # Internal ID from fetch_linkedin_mentions_core
-        text_to_analyze = mention_data.get("mention_text_cleaned", "")
-        sentiment_result = _get_sentiment_from_text(text_to_analyze)
-        # For single text, percentage is 100% for the dominant label if not error
-        percentage = 0.0
-        if sentiment_result["label"] != "Error" and any(sentiment_result["counts"].values()):
-            percentage = 100.0
-        mention_sentiments_map[mention_internal_id] = {
-            "sentiment_label": sentiment_result["label"], # The dominant sentiment label
-            "percentage": percentage,
-            "details": dict(sentiment_result["counts"]) # Raw counts for this specific mention
-        }
-        logging.debug(f"Individual sentiment for mention {mention_internal_id}: {mention_sentiments_map[mention_internal_id]}")
-    return mention_sentiments_map
-def compile_detailed_mentions(processed_mentions_list, mention_sentiments_map):
-    """
-    Combines processed mention data (internal structure) with their sentiment analysis
-    into the user-specified output format.
-    processed_mentions_list: list of dicts from fetch_linkedin_mentions_core
-    mention_sentiments_map: dict from analyze_mentions_sentiment, keyed by "mention_id" (internal)
-                           and contains "sentiment_label".
-    """
-    detailed_mentions_output = []
-    logging.info(f"Compiling detailed data for {len(processed_mentions_list)} mentions into specified format.")
-    for mention_core_data in processed_mentions_list:
-        mention_internal_id = mention_core_data["mention_id"]
-        sentiment_info = mention_sentiments_map.get(mention_internal_id, {"sentiment_label": "Neutral 😐"})
-        date_formatted = "Unknown"
-        if mention_core_data["published_at_timestamp"]:
-            try:
-                date_formatted = datetime.fromtimestamp(mention_core_data["published_at_timestamp"] / 1000).strftime("%Y-%m-%d %H:%M")
-            except TypeError:
-                logging.warning(f"Could not format timestamp for mention_id {mention_internal_id}")
-        detailed_mentions_output.append({
-            "date": date_formatted, # User-specified field name
-            "id": mention_core_data["share_urn"], # User-specified field name (URN of the post with mention)
-            "mention_text": mention_core_data["mention_text_cleaned"], # User-specified field name
-            "organization_urn": mention_core_data["organization_urn_mentioned"], # User-specified field name
-            "sentiment_label": sentiment_info["sentiment_label"] # User-specified field name
-        })
-    logging.info(f"Compiled {len(detailed_mentions_output)} detailed mentions with specified fields.")
-    return detailed_mentions_output
-def prepare_mentions_for_bubble(compiled_detailed_mentions_list):
-    """
-    Prepares mention data for uploading to a Bubble table.
-    The input `compiled_detailed_mentions_list` is already in the user-specified format:
-    [{"date": ..., "id": ..., "mention_text": ..., "organization_urn": ..., "sentiment_label": ...}, ...]
-    This function directly uses these fields as per user's selection for Bubble upload.
-    """
-    li_mentions_bubble = []
-    logging.info(f"Preparing {len(compiled_detailed_mentions_list)} compiled mentions for Bubble upload.")
-    if not compiled_detailed_mentions_list:
-        return []
-    for mention_data in compiled_detailed_mentions_list:
-        # The mention_data dictionary already has the keys:
-        # "date", "id", "mention_text", "organization_urn", "sentiment_label"
-        # These are used directly for the Bubble upload list.
-        li_mentions_bubble.append({
-            "date": mention_data["date"],
-            "id": mention_data["id"],
-            "mention_text": mention_data["mention_text"],
-            "organization_urn": mention_data["organization_urn"],
-            "sentiment_label": mention_data["sentiment_label"]
-            # If Bubble table has different field names, mapping would be done here.
-            # Example: "bubble_mention_date": mention_data["date"],
-            # For now, using direct mapping as per user's selected code for the append.
-        })
-    logging.info(f"Prepared {len(li_mentions_bubble)} mention entries for Bubble, using direct field names from compiled data.")
-    return li_mentions_bubble