Spaces:

GuglielmoTor
/

LinkedinMonitor

Running

App Files Files Community

GuglielmoTor commited on May 20

Commit

617c2c1

verified ·

1 Parent(s): deb2291

Update analytics_plot_generator.py

Browse files

Files changed (1) hide show

analytics_plot_generator.py +275 -202

analytics_plot_generator.py CHANGED Viewed

@@ -5,6 +5,7 @@ from io import BytesIO
 import base64
 import numpy as np
 import matplotlib.ticker as mticker
 # Configure logging for this module
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
@@ -184,13 +185,9 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
             return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
         fig, ax = plt.subplots(figsize=(8, 5))
-        # Using a more distinct color map if available, or fallback
-        try:
-            colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
-        except ValueError: # Fallback if Pastel1 doesn't have enough colors or isn't available
-            colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
         pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
         ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
         ax.set_title('Mention Sentiment Distribution')
         ax.axis('equal')
@@ -449,10 +446,13 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
         ax.set_xlabel('Date')
         ax.set_ylabel('Engagement Rate')
         # Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
-        max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0
-        formatter_xmax = 1.0 if max_rate_val <= 1.5 and max_rate_val !=0 else 100.0 # Heuristic: if max is small, assume 0-1 scale
-        if max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
             formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
         ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
         ax.grid(True, linestyle='--', alpha=0.7)
         plt.xticks(rotation=45)
@@ -525,7 +525,7 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
         df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
         if df_copy.empty: # After dropping NaNs for essential columns
-            return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
         impressions_over_time = df_copy.resample('D')[impressions_col].sum()
@@ -544,34 +544,27 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
     finally:
         plt.close('all')
-# --- NEW PLOT FUNCTIONS ---
 def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
-    """Generates a plot for likes (reactions) over time."""
     title = "Reactions (Likes) Over Time"
     logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
     if df is None or df.empty:
         return create_placeholder_plot(title=title, message="No post data for likes.")
     required_cols = [date_column, likes_col]
-    missing_cols = [col for col in required_cols if col not in df.columns]
-    if missing_cols:
-        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
     try:
         df_copy = df.copy()
         df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
         df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce')
         df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
         if df_copy.empty:
-            return create_placeholder_plot(title=title, message="No valid data after cleaning for likes plot.")
-        likes_over_time = df_copy.resample('D')[likes_col].sum()
         fig, ax = plt.subplots(figsize=(10, 5))
-        ax.plot(likes_over_time.index, likes_over_time.values, marker='.', linestyle='-', color='crimson')
         ax.set_title(title)
         ax.set_xlabel('Date')
         ax.set_ylabel('Total Likes')
@@ -585,73 +578,36 @@ def generate_likes_over_time_plot(df, date_column='published_at', likes_col='lik
     finally:
         plt.close('all')
 def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
-    """Generates a plot for clicks over time (distinct from general reach if needed)."""
     title = "Clicks Over Time"
     logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
-    if df is None or df.empty:
-        return create_placeholder_plot(title=title, message="No post data for clicks.")
-    required_cols = [date_column, clicks_col]
-    missing_cols = [col for col in required_cols if col not in df.columns]
-    if missing_cols:
-        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
-    try:
-        df_copy = df.copy()
-        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
-        df_copy[clicks_col] = pd.to_numeric(df_copy[clicks_col], errors='coerce')
-        df_copy = df_copy.dropna(subset=[date_column, clicks_col]).set_index(date_column)
-        if df_copy.empty:
-            return create_placeholder_plot(title=title, message="No valid data after cleaning for clicks plot.")
-        clicks_over_time = df_copy.resample('D')[clicks_col].sum()
-        fig, ax = plt.subplots(figsize=(10, 5))
-        ax.plot(clicks_over_time.index, clicks_over_time.values, marker='.', linestyle='-', color='teal')
-        ax.set_title(title)
-        ax.set_xlabel('Date')
-        ax.set_ylabel('Total Clicks')
-        ax.grid(True, linestyle='--', alpha=0.7)
-        plt.xticks(rotation=45)
-        plt.tight_layout()
-        return fig
-    except Exception as e:
-        logging.error(f"Error generating {title}: {e}", exc_info=True)
-        return create_placeholder_plot(title=f"{title} Error", message=str(e))
-    finally:
-        plt.close('all')
 def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
     """Generates a plot for shares over time."""
     title = "Shares Over Time"
     logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
     if df is None or df.empty:
         return create_placeholder_plot(title=title, message="No post data for shares.")
     required_cols = [date_column, shares_col]
-    missing_cols = [col for col in required_cols if col not in df.columns]
-    if missing_cols:
-        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
     try:
         df_copy = df.copy()
         df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
         df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce')
         df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column)
         if df_copy.empty:
-            return create_placeholder_plot(title=title, message="No valid data after cleaning for shares plot.")
-        shares_over_time = df_copy.resample('D')[shares_col].sum()
         fig, ax = plt.subplots(figsize=(10, 5))
-        ax.plot(shares_over_time.index, shares_over_time.values, marker='.', linestyle='-', color='goldenrod')
         ax.set_title(title)
         ax.set_xlabel('Date')
         ax.set_ylabel('Total Shares')
@@ -665,33 +621,26 @@ def generate_shares_over_time_plot(df, date_column='published_at', shares_col='s
     finally:
         plt.close('all')
 def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
     """Generates a plot for comments over time."""
     title = "Comments Over Time"
     logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
     if df is None or df.empty:
         return create_placeholder_plot(title=title, message="No post data for comments.")
     required_cols = [date_column, comments_col]
-    missing_cols = [col for col in required_cols if col not in df.columns]
-    if missing_cols:
-        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
     try:
         df_copy = df.copy()
         df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
         df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce')
         df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column)
         if df_copy.empty:
-            return create_placeholder_plot(title=title, message="No valid data after cleaning for comments plot.")
-        comments_over_time = df_copy.resample('D')[comments_col].sum()
         fig, ax = plt.subplots(figsize=(10, 5))
-        ax.plot(comments_over_time.index, comments_over_time.values, marker='.', linestyle='-', color='forestgreen')
         ax.set_title(title)
         ax.set_xlabel('Date')
         ax.set_ylabel('Total Comments')
@@ -705,53 +654,190 @@ def generate_comments_over_time_plot(df, date_column='published_at', comments_co
     finally:
         plt.close('all')
-def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', text_column_for_check='comment_text'):
     """
     Generates a pie chart for comment sentiment distribution.
-    Expects a DataFrame with a 'sentiment_column' for sentiment labels of individual comments.
-    'text_column_for_check' is used to infer if the DataFrame might contain comment-level data.
     """
     title = "Breakdown of Comments by Sentiment"
     logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
     if df is None or df.empty:
-        return create_placeholder_plot(title=title, message="No data available for comment sentiment analysis.")
-    # Check if the expected sentiment column exists.
-    # Also check for a text column as a heuristic, as sentiment often comes with text.
     if sentiment_column not in df.columns:
-        msg = (f"Column '{sentiment_column}' for comment sentiment not found. "
-               f"This plot requires a DataFrame with pre-analyzed comment sentiments. "
-               f"Available columns: {df.columns.tolist()}")
-        logging.warning(msg)
-        return create_placeholder_plot(title=title, message=msg)
-    # Optional: A light check if it seems like post-level data instead of comment-level
-    # if text_column_for_check not in df.columns and 'commentCount' in df.columns:
-    #     logging.warning(f"'{sentiment_column}' found, but '{text_column_for_check}' is missing. Ensure '{sentiment_column}' refers to individual comment sentiments, not post sentiment.")
     try:
         df_copy = df.copy()
-        # Ensure sentiment column is treated as categorical/string
         df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
-        sentiment_counts = df_copy[sentiment_column].value_counts()
         if sentiment_counts.empty or sentiment_counts.sum() == 0:
             return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
         fig, ax = plt.subplots(figsize=(8, 5))
-        try:
-            colors_map = plt.cm.get_cmap('Pastel2', len(sentiment_counts))
-        except ValueError:
-            colors_map = plt.cm.get_cmap('Accent', len(sentiment_counts))
         pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
-        ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140, colors=pie_colors)
         ax.set_title(title)
         ax.axis('equal')
         plt.tight_layout()
         logging.info(f"Successfully generated {title} plot.")
         return fig
@@ -765,27 +851,28 @@ def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sen
 if __name__ == '__main__':
     # Create dummy data for testing
     posts_data = {
-        'id': [f'post{i}' for i in range(1, 8)], # Extended to 7 for more data points
         'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
         'likeCount': [10, 5, 12, 8, 15, 3, 20],
         'commentCount': [2, 1, 3, 1, 4, 0, 5],
         'shareCount': [1, 0, 1, 1, 2, 0, 1],
         'clickCount': [20, 15, 30, 22, 40, 10, 50],
         'impressionCount': [200, 150, 300, 220, 400, 100, 500],
-        'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08]
     }
     sample_merged_posts_df = pd.DataFrame(posts_data)
-    # Dummy data for comments with sentiment (hypothetical)
-    comments_sentiment_data = {
-        'comment_id': range(10),
-        'post_id': ['post1']*3 + ['post2']*2 + ['post3']*5,
-        'comment_text': ['Great post!', 'I disagree.', 'Nice work.', 'Interesting.', 'Could be better.', 'Loved it!', 'Not sure.', 'Thanks!', 'Helpful.', 'Okay.'],
-        'comment_sentiment': ['Positive', 'Negative', 'Positive', 'Neutral', 'Negative', 'Positive', 'Neutral', 'Positive', 'Positive', 'Neutral']
-    }
-    sample_comments_with_sentiment_df = pd.DataFrame(comments_sentiment_data)
     # Updated Follower Stats Data
     follower_data = {
         'follower_count_type': [
@@ -795,118 +882,104 @@ if __name__ == '__main__':
             'follower_industry', 'follower_industry',
             'follower_seniority', 'follower_seniority'
         ],
-        # 'category_name' now holds dates for time-series, and actual categories for demographics
         'category_name': [
             '2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
-            'USA', 'Canada', 'UK',                    # Geo
-            'Engineering', 'Sales',                   # Function/Role
-            'Tech', 'Finance',                        # Industry
-            'Senior', 'Junior'                        # Seniority
         ],
         'follower_count_organic': [
-            100, 110, 125,    # Organic monthly gains
-            500, 300, 150,    # Organic Geo counts
-            400, 200,         # Organic Role counts
-            250, 180,         # Organic Industry counts
-            300, 220          # Organic Seniority counts
         ],
         'follower_count_paid': [
-            20, 30, 25,       # Paid monthly gains
-            50, 40, 60,       # Paid Geo counts
-            30, 20,           # Paid Role counts
-            45, 35,           # Paid Industry counts
-            60, 40            # Paid Seniority counts
         ]
     }
     sample_follower_stats_df = pd.DataFrame(follower_data)
-    logging.info("--- Testing Updated Follower Plot Generations ---")
-    fig_followers_count = generate_followers_count_over_time_plot(
-        sample_follower_stats_df.copy(),
-        type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
-    )
-    if fig_followers_count and not isinstance(fig_followers_count, str): logging.info("Followers Count Over Time (monthly, organic/paid) plot generated.")
-    fig_followers_rate = generate_followers_growth_rate_plot(
-        sample_follower_stats_df.copy(),
-        type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
-    )
-    if fig_followers_rate and not isinstance(fig_followers_rate, str): logging.info("Followers Growth Rate (monthly, organic/paid) plot generated.")
-    fig_geo = generate_followers_by_demographics_plot(
-        sample_follower_stats_df.copy(),
-        type_value='follower_geo', # category_col defaults to 'category_name'
-        plot_title="Followers by Location (Organic/Paid)"
-    )
-    if fig_geo and not isinstance(fig_geo, str): logging.info("Followers by Location (grouped organic/paid) plot generated.")
-    fig_role = generate_followers_by_demographics_plot(
-        sample_follower_stats_df.copy(),
-        type_value='follower_function',
-        plot_title="Followers by Role (Organic/Paid)"
-    )
-    if fig_role and not isinstance(fig_role, str): logging.info("Followers by Role (grouped organic/paid) plot generated.")
-    fig_industry = generate_followers_by_demographics_plot(
-        sample_follower_stats_df.copy(),
-        type_value='follower_industry',
-        plot_title="Followers by Industry (Organic/Paid)"
-    )
-    if fig_industry and not isinstance(fig_industry, str): logging.info("Followers by Industry (grouped organic/paid) plot generated.")
-    fig_seniority = generate_followers_by_demographics_plot(
-        sample_follower_stats_df.copy(),
-        type_value='follower_seniority',
-        plot_title="Followers by Seniority (Organic/Paid)"
-    )
-    if fig_seniority and not isinstance(fig_seniority, str): logging.info("Followers by Seniority (grouped organic/paid) plot generated.")
-    logging.info("--- Testing Other Plot Generations (No Changes to these) ---")
     fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
-    if fig_posts_activity and not isinstance(fig_posts_activity, str): logging.info("Posts activity plot generated.")
     fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
-    if fig_engagement_type and not isinstance(fig_engagement_type, str): logging.info("Engagement type plot generated.")
-    # Dummy mentions for testing
     mentions_data = {
         'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
         'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
     }
     sample_mentions_df = pd.DataFrame(mentions_data)
     fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
-    if fig_mentions_activity and not isinstance(fig_mentions_activity, str): logging.info("Mentions activity plot generated.")
     fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
-    if fig_mention_sentiment and not isinstance(fig_mention_sentiment, str): logging.info("Mention sentiment plot generated.")
-    fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
-    if fig_eng_rate and not isinstance(fig_eng_rate, str): logging.info("Engagement Rate Over Time plot generated.")
     fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
-    if fig_reach and not isinstance(fig_reach, str): logging.info("Reach Over Time (Clicks) plot generated.")
     fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
-    if fig_impressions and not isinstance(fig_impressions, str): logging.info("Impressions Over Time plot generated.")
-    logging.info("--- Testing NEW Plot Generations ---")
     fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
-    if fig_likes_time and not isinstance(fig_likes_time, str): logging.info("Likes Over Time plot generated.")
-    fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy())
-    if fig_clicks_time and not isinstance(fig_clicks_time, str): logging.info("Clicks Over Time plot generated.")
-    fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
-    if fig_shares_time and not isinstance(fig_shares_time, str): logging.info("Shares Over Time plot generated.")
-    fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy())
-    if fig_comments_time and not isinstance(fig_comments_time, str): logging.info("Comments Over Time plot generated.")
-    fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_comments_with_sentiment_df.copy())
-    if fig_comments_sentiment and not isinstance(fig_comments_sentiment, str): logging.info("Comments Sentiment Breakdown plot generated (with dummy comment sentiment data).")
-    fig_comments_sentiment_no_data = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy()) # Test with df lacking the sentiment col
-    if fig_comments_sentiment_no_data and not isinstance(fig_comments_sentiment_no_data, str) : logging.info("Comments Sentiment Breakdown plot generated (placeholder, as expected).")
     logging.info("Test script finished. Review plots if displayed locally or saved.")

 import base64
 import numpy as np
 import matplotlib.ticker as mticker
+import ast # For safely evaluating string representations of lists
 # Configure logging for this module
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
             return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
         fig, ax = plt.subplots(figsize=(8, 5))
+        # Using a qualitative colormap like 'Pastel1' or 'Set3' can be good for categorical data
+        colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
         pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
         ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
         ax.set_title('Mention Sentiment Distribution')
         ax.axis('equal')
         ax.set_xlabel('Date')
         ax.set_ylabel('Engagement Rate')
         # Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
+        max_rate_val = engagement_over_time.max()
+        formatter_xmax = 1.0 if max_rate_val <= 1.5 and max_rate_val >=0 else 100.0 # Heuristic for 0-1 vs 0-100 scale
+        if max_rate_val > 1.5 and formatter_xmax == 1.0: # If data seems to be percentage but formatted as decimal
+             formatter_xmax = 100.0
+        elif max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
             formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
         ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
         ax.grid(True, linestyle='--', alpha=0.7)
         plt.xticks(rotation=45)
         df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
         if df_copy.empty: # After dropping NaNs for essential columns
+                return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
         impressions_over_time = df_copy.resample('D')[impressions_col].sum()
     finally:
         plt.close('all')
+# --- New Plot Functions from User Request ---
 def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
+    """Generates a plot for likes over time."""
     title = "Reactions (Likes) Over Time"
     logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
     if df is None or df.empty:
         return create_placeholder_plot(title=title, message="No post data for likes.")
     required_cols = [date_column, likes_col]
+    if any(col not in df.columns for col in required_cols):
+        return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
     try:
         df_copy = df.copy()
         df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
         df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce')
         df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
         if df_copy.empty:
+            return create_placeholder_plot(title=title, message="No valid data after cleaning.")
+        data_over_time = df_copy.resample('D')[likes_col].sum()
         fig, ax = plt.subplots(figsize=(10, 5))
+        ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson')
         ax.set_title(title)
         ax.set_xlabel('Date')
         ax.set_ylabel('Total Likes')
     finally:
         plt.close('all')
 def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
+    """Generates a plot for clicks over time (can be same as reach if clicks are primary reach metric)."""
+    # This is essentially the same as generate_reach_over_time_plot if reach_col is 'clickCount'.
+    # For clarity, keeping it separate if user wants to distinguish or use a different column later.
     title = "Clicks Over Time"
     logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
+    # Reusing logic from generate_reach_over_time_plot
+    return generate_reach_over_time_plot(df, date_column, clicks_col)
 def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
     """Generates a plot for shares over time."""
     title = "Shares Over Time"
     logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
     if df is None or df.empty:
         return create_placeholder_plot(title=title, message="No post data for shares.")
     required_cols = [date_column, shares_col]
+    if any(col not in df.columns for col in required_cols):
+        return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
     try:
         df_copy = df.copy()
         df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
         df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce')
         df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column)
         if df_copy.empty:
+            return create_placeholder_plot(title=title, message="No valid data after cleaning.")
+        data_over_time = df_copy.resample('D')[shares_col].sum()
         fig, ax = plt.subplots(figsize=(10, 5))
+        ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal')
         ax.set_title(title)
         ax.set_xlabel('Date')
         ax.set_ylabel('Total Shares')
     finally:
         plt.close('all')
 def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
     """Generates a plot for comments over time."""
     title = "Comments Over Time"
     logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
     if df is None or df.empty:
         return create_placeholder_plot(title=title, message="No post data for comments.")
     required_cols = [date_column, comments_col]
+    if any(col not in df.columns for col in required_cols):
+        return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
     try:
         df_copy = df.copy()
         df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
         df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce')
         df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column)
         if df_copy.empty:
+            return create_placeholder_plot(title=title, message="No valid data after cleaning.")
+        data_over_time = df_copy.resample('D')[comments_col].sum()
         fig, ax = plt.subplots(figsize=(10, 5))
+        ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold')
         ax.set_title(title)
         ax.set_xlabel('Date')
         ax.set_ylabel('Total Comments')
     finally:
         plt.close('all')
+def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', date_column=None):
     """
     Generates a pie chart for comment sentiment distribution.
+    Assumes df might be post-level with an aggregated or example sentiment,
+    or ideally, a comment-level df with sentiment per comment.
+    If date_column is provided, it's for logging/context but not directly used for filtering here.
     """
     title = "Breakdown of Comments by Sentiment"
     logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
     if df is None or df.empty:
+        return create_placeholder_plot(title=title, message="No data for comment sentiment.")
     if sentiment_column not in df.columns:
+        # Check for a common alternative if the primary is missing (e.g. from post-level data)
+        if 'sentiment' in df.columns and sentiment_column != 'sentiment':
+            logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.")
+            sentiment_column = 'sentiment' # Use fallback
+        else:
+            return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}")
+    # If the sentiment column has no valid data (all NaNs, or not convertible)
+    if df[sentiment_column].isnull().all():
+        return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.")
     try:
         df_copy = df.copy()
+        # Ensure the sentiment column is treated as categorical (string)
         df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
+        sentiment_counts = df_copy[sentiment_column].value_counts().dropna() # Dropna for safety
         if sentiment_counts.empty or sentiment_counts.sum() == 0:
             return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
         fig, ax = plt.subplots(figsize=(8, 5))
+        colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
         pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
+        ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
         ax.set_title(title)
         ax.axis('equal')
+        plt.tight_layout()
+        return fig
+    except Exception as e:
+        logging.error(f"Error generating {title}: {e}", exc_info=True)
+        return create_placeholder_plot(title=f"{title} Error", message=str(e))
+    finally:
+        plt.close('all')
+# --- NEW PLOT FUNCTIONS FOR CONTENT STRATEGY ---
+def generate_post_frequency_plot(df, date_column='published_at', resample_period='D'):
+    """Generates a plot for post frequency over time (e.g., daily, weekly, monthly)."""
+    title = f"Post Frequency Over Time ({resample_period})"
+    logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
+    if df is None or df.empty:
+        return create_placeholder_plot(title=title, message="No data available.")
+    if date_column not in df.columns:
+        return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.")
+    try:
+        df_copy = df.copy()
+        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
+            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
+        df_copy = df_copy.dropna(subset=[date_column])
+        if df_copy.empty:
+            return create_placeholder_plot(title=title, message="No valid date entries found.")
+        post_frequency = df_copy.set_index(date_column).resample(resample_period).size()
+        if post_frequency.empty:
+            return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.")
+        fig, ax = plt.subplots(figsize=(10, 5))
+        post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None)
+        ax.set_title(title)
+        ax.set_xlabel('Date' if resample_period == 'D' else 'Period')
+        ax.set_ylabel('Number of Posts')
+        ax.grid(True, linestyle='--', alpha=0.7)
+        plt.xticks(rotation=45)
+        plt.tight_layout()
+        logging.info(f"Successfully generated {title} plot.")
+        return fig
+    except Exception as e:
+        logging.error(f"Error generating {title}: {e}", exc_info=True)
+        return create_placeholder_plot(title=f"{title} Error", message=str(e))
+    finally:
+        plt.close('all')
+def generate_content_format_breakdown_plot(df, format_col='media_type'):
+    """Generates a bar chart for breakdown of content by format."""
+    title = "Breakdown of Content by Format"
+    logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}")
+    if df is None or df.empty:
+        return create_placeholder_plot(title=title, message="No data available.")
+    if format_col not in df.columns:
+        return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}")
+    try:
+        df_copy = df.copy()
+        format_counts = df_copy[format_col].value_counts().dropna()
+        if format_counts.empty:
+            return create_placeholder_plot(title=title, message="No content format data available.")
+        fig, ax = plt.subplots(figsize=(8, 6))
+        format_counts.plot(kind='bar', ax=ax, color='skyblue')
+        ax.set_title(title)
+        ax.set_xlabel('Media Type')
+        ax.set_ylabel('Number of Posts')
+        ax.grid(axis='y', linestyle='--', alpha=0.7)
+        plt.xticks(rotation=45, ha="right")
+        plt.tight_layout()
+        # Add counts on top of bars
+        for i, v in enumerate(format_counts):
+            ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom')
+        logging.info(f"Successfully generated {title} plot.")
+        return fig
+    except Exception as e:
+        logging.error(f"Error generating {title}: {e}", exc_info=True)
+        return create_placeholder_plot(title=f"{title} Error", message=str(e))
+    finally:
+        plt.close('all')
+def _parse_eb_label(label_data):
+    """Helper to parse eb_labels which might be lists or string representations of lists."""
+    if isinstance(label_data, list):
+        return label_data
+    if isinstance(label_data, str):
+        try:
+            # Try to evaluate as a list
+            parsed = ast.literal_eval(label_data)
+            if isinstance(parsed, list):
+                return parsed
+            # If it's a single string not in list format, treat as a single label
+            return [str(parsed)]
+        except (ValueError, SyntaxError):
+            # If not a list string, treat the whole string as one label
+            return [label_data] if label_data.strip() else []
+    if pd.isna(label_data):
+        return []
+    return [] # Default for other types
+def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
+    """Generates a horizontal bar chart for breakdown of content by topics."""
+    title = f"Breakdown of Content by Topics (Top {top_n})"
+    logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}")
+    if df is None or df.empty:
+        return create_placeholder_plot(title=title, message="No data available.")
+    if topics_col not in df.columns:
+        return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}")
+    try:
+        df_copy = df.copy()
+        # Apply parsing and explode
+        parsed_labels = df_copy[topics_col].apply(_parse_eb_label)
+        exploded_labels = parsed_labels.explode().dropna()
+        if exploded_labels.empty:
+            return create_placeholder_plot(title=title, message="No topic data found after processing labels.")
+        topic_counts = exploded_labels.value_counts()
+        if topic_counts.empty:
+            return create_placeholder_plot(title=title, message="No topics to display after counting.")
+        # Take top N and sort for plotting (descending for horizontal bar)
+        top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True)
+        fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6))
+        top_topics.plot(kind='barh', ax=ax, color='mediumseagreen')
+        ax.set_title(title)
+        ax.set_xlabel('Number of Posts')
+        ax.set_ylabel('Topic')
+        # Add counts next to bars
+        for i, (topic, count) in enumerate(top_topics.items()):
+            ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center')
         plt.tight_layout()
         logging.info(f"Successfully generated {title} plot.")
         return fig
 if __name__ == '__main__':
     # Create dummy data for testing
     posts_data = {
+        'id': [f'post{i}' for i in range(1, 8)], # Increased to 7 for more data
         'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
         'likeCount': [10, 5, 12, 8, 15, 3, 20],
         'commentCount': [2, 1, 3, 1, 4, 0, 5],
         'shareCount': [1, 0, 1, 1, 2, 0, 1],
         'clickCount': [20, 15, 30, 22, 40, 10, 50],
         'impressionCount': [200, 150, 300, 220, 400, 100, 500],
+        'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08],
+        'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'], # New column
+        'eb_labels': [ # New column with various formats
+            "['AI', 'Tech']",
+            ['Innovation'],
+            'General',
+            None,
+            ['Tech', 'Future'],
+            "['AI', 'Development']",
+            ['Tech']
+        ],
+        'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive'] # For comment sentiment plot
     }
     sample_merged_posts_df = pd.DataFrame(posts_data)
     # Updated Follower Stats Data
     follower_data = {
         'follower_count_type': [
             'follower_industry', 'follower_industry',
             'follower_seniority', 'follower_seniority'
         ],
         'category_name': [
             '2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
+            'USA', 'Canada', 'UK',                   # Geo
+            'Engineering', 'Sales',                  # Function/Role
+            'Tech', 'Finance',                       # Industry
+            'Senior', 'Junior'                       # Seniority
         ],
         'follower_count_organic': [
+            100, 110, 125,   # Organic monthly gains
+            500, 300, 150,   # Organic Geo counts
+            400, 200,        # Organic Role counts
+            250, 180,        # Organic Industry counts
+            300, 220         # Organic Seniority counts
         ],
         'follower_count_paid': [
+            20, 30, 25,    # Paid monthly gains
+            50, 40, 60,    # Paid Geo counts
+            30, 20,        # Paid Role counts
+            45, 35,        # Paid Industry counts
+            60, 40         # Paid Seniority counts
         ]
     }
     sample_follower_stats_df = pd.DataFrame(follower_data)
+    logging.info("--- Testing Existing Plot Generations ---")
+    # ... (keep existing tests for older plots) ...
     fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
+    if fig_posts_activity: logging.info("Posts activity plot generated.")
     fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
+    if fig_engagement_type: logging.info("Engagement type plot generated.")
     mentions_data = {
         'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
         'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
     }
     sample_mentions_df = pd.DataFrame(mentions_data)
     fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
+    if fig_mentions_activity: logging.info("Mentions activity plot generated.")
     fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
+    if fig_mention_sentiment: logging.info("Mention sentiment plot generated.")
+    fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly')
+    if fig_followers_count: logging.info("Followers Count Over Time plot generated.")
+    fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly')
+    if fig_followers_rate: logging.info("Followers Growth Rate plot generated.")
+    fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
+    if fig_geo: logging.info("Followers by Location plot generated.")
+    # ... add other follower demographic tests ...
+    fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
+    if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
     fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
+    if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
     fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
+    if fig_impressions: logging.info("Impressions Over Time plot generated.")
     fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
+    if fig_likes_time: logging.info("Likes Over Time plot generated.")
+    fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy()) # Uses reach logic
+    if fig_clicks_time: logging.info("Clicks Over Time plot generated.")
+    fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
+    if fig_shares_time: logging.info("Shares Over Time plot generated.")
+    fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy())
+    if fig_comments_time: logging.info("Comments Over Time plot generated.")
+    fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy(), sentiment_column='comment_sentiment')
+    if fig_comments_sentiment: logging.info("Comments Sentiment Breakdown plot generated.")
+    logging.info("--- Testing NEW Plot Generations for Content Strategy ---")
+    fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D')
+    if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.")
+    fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W')
+    if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.")
+    fig_content_format = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='media_type')
+    if fig_content_format: logging.info("Content Format Breakdown plot generated.")
+    fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5)
+    if fig_content_topics: logging.info("Content Topic Breakdown plot generated.")
+    # Test with missing columns / empty data for new plots
+    logging.info("--- Testing NEW Plot Generations with Edge Cases ---")
+    empty_df = pd.DataFrame()
+    fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy())
+    if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.")
+    fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col')
+    if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.")
+    fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels') # eb_labels won't exist
+    if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.")
+    df_no_topics_data = sample_merged_posts_df.copy()
+    df_no_topics_data['eb_labels'] = None
+    fig_content_topics_all_none = generate_content_topic_breakdown_plot(df_no_topics_data, topics_col='eb_labels')
+    if fig_content_topics_all_none: logging.info("Content Topic (all None labels) placeholder generated.")
     logging.info("Test script finished. Review plots if displayed locally or saved.")