diff --git "a/analytics_plot_generator.py" "b/analytics_plot_generator.py" --- "a/analytics_plot_generator.py" +++ "b/analytics_plot_generator.py" @@ -5,975 +5,980 @@ from io import BytesIO import base64 import numpy as np import matplotlib.ticker as mticker +import matplotlib.patches as patches # Added for rounded corners import ast # For safely evaluating string representations of lists # Configure logging for this module logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s') +# Helper function to clean non-printable characters from the entire file content if needed +# For now, I will manually ensure the code below is clean. +# If the error persists, you might need a script to clean the .py file itself. + +def _apply_rounded_corners_and_transparent_bg(fig, ax): + """Helper to apply rounded corners to axes and transparent background.""" + fig.patch.set_alpha(0.0) # Make figure background transparent + ax.patch.set_alpha(0.0) # Make default axes background transparent + + # Turn off original spines, as we'll draw a new background + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.spines['bottom'].set_visible(False) + ax.spines['left'].set_visible(False) + + # Add a new rounded background for the axes + # Using FancyBboxPatch to create a rounded rectangle background for the plot area + # Coordinates are relative to axes (0,0 is bottom-left, 1,1 is top-right) + rounded_rect_bg = patches.FancyBboxPatch( + (0, 0), # (x,y) position of the bounding box + 1, # width of the bounding box + 1, # height of the bounding box + boxstyle="round,pad=0,rounding_size=0.015", # Style: round, no padding, size of rounding + transform=ax.transAxes, # Coordinates are relative to the axes + facecolor='whitesmoke', # Background color of the rounded area + edgecolor='lightgray', # Border color for the rounded area + linewidth=0.5, # Border line width + zorder=-1 # Put it behind other plot elements like gridlines and data + ) + ax.add_patch(rounded_rect_bg) + + # Ensure grid is drawn on top of the new background if used + if ax.axison and any(line.get_visible() for line in ax.get_xgridlines() + ax.get_ygridlines()): + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) # Redraw grid with zorder + def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."): -    """Creates a placeholder Matplotlib plot indicating no data or an error.""" -    try: -        fig, ax = plt.subplots(figsize=(8, 4)) -        ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True) -        ax.axis('off') -        fig.tight_layout() # MODIFIED -        # Add spacing for consistency, though it might be less critical for placeholders -        fig.subplots_adjust(top=0.90) -        return fig -    except Exception as e: -        logging.error(f"Error creating placeholder plot: {e}") -        # Fallback placeholder if the above fails -        fig_err, ax_err = plt.subplots() -        ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center') -        ax_err.axis('off') -        fig_err.tight_layout() # MODIFIED -        fig_err.subplots_adjust(top=0.90) -        return fig_err -    # No plt.close(fig) here as Gradio handles the figure object. + """Creates a placeholder Matplotlib plot indicating no data or an error.""" + try: + fig, ax = plt.subplots(figsize=(8, 4)) + _apply_rounded_corners_and_transparent_bg(fig, ax) # Apply rounded corners and transparent BG + + ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True, zorder=1) + ax.axis('off') # Turn off axis for placeholder text display + # No tight_layout here as it might interfere with the manual patch for background + fig.subplots_adjust(top=0.90, bottom=0.10, left=0.10, right=0.90) # General padding + return fig + except Exception as e: + logging.error(f"Error creating placeholder plot: {e}") + # Fallback placeholder if the above fails (less styling) + fig_err, ax_err = plt.subplots(figsize=(8,4)) + fig_err.patch.set_alpha(0.0) + ax_err.patch.set_alpha(0.0) + ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center', zorder=1) + ax_err.axis('off') + return fig_err def generate_posts_activity_plot(df, date_column='published_at'): -    """Generates a plot for posts activity over time.""" -    logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") -    if df is None or df.empty: -        logging.warning(f"Posts activity: DataFrame is empty.") -        return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.") -    if date_column not in df.columns: -        logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") -        return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.") - -    fig = None # Initialize fig to None -    try: -        df_copy = df.copy() -        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): -            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - -        df_copy = df_copy.dropna(subset=[date_column]) -        if df_copy.empty: -            logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.") -            return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.") - -        posts_over_time = df_copy.set_index(date_column).resample('D').size() - -        if posts_over_time.empty: -            logging.info("Posts activity: No posts after resampling by day.") -            return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.") - -        fig, ax = plt.subplots(figsize=(10, 5)) -        posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-') -        # ax.set_title('Posts Activity Over Time', y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date') -        ax.set_ylabel('Number of Posts') -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        logging.info("Successfully generated posts activity plot.") -        return fig -    except Exception as e: -        logging.error(f"Error generating posts activity plot: {e}", exc_info=True) -        if fig: plt.close(fig) # Close if fig was created before error -        return create_placeholder_plot(title="Posts Activity Error", message=str(e)) -    finally: -        pass - + """Generates a plot for posts activity over time.""" + logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") + if df is None or df.empty: + logging.warning(f"Posts activity: DataFrame is empty.") + return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.") + if date_column not in df.columns: + logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") + return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.") + + fig = None + try: + df_copy = df.copy() + if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): + df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + + df_copy = df_copy.dropna(subset=[date_column]) + if df_copy.empty: + logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.") + return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.") + + posts_over_time = df_copy.set_index(date_column).resample('D').size() + + if posts_over_time.empty: + logging.info("Posts activity: No posts after resampling by day.") + return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.") + + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', zorder=1) + ax.set_xlabel('Date') + ax.set_ylabel('Number of Posts') + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) # Ensure grid is behind plot line + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) # Add some padding + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) # Adjusted spacing + logging.info("Successfully generated posts activity plot.") + return fig + except Exception as e: + logging.error(f"Error generating posts activity plot: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title="Posts Activity Error", message=str(e)) def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='commentCount', shares_col='shareCount'): -    """Generates a bar plot for total engagement types (likes, comments, shares).""" -    logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}") - -    required_cols = [likes_col, comments_col, shares_col] -    if df is None or df.empty: -        logging.warning("Engagement type: DataFrame is empty.") -        return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.") - -    missing_cols = [col for col in required_cols if col not in df.columns] -    if missing_cols: -        msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}" -        logging.warning(msg) -        return create_placeholder_plot(title="Post Engagement Types", message=msg) - -    fig = None -    try: -        df_copy = df.copy() -        for col in required_cols: -            df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0) - -        total_likes = df_copy[likes_col].sum() -        total_comments = df_copy[comments_col].sum() -        total_shares = df_copy[shares_col].sum() - -        if total_likes == 0 and total_comments == 0 and total_shares == 0: -            logging.info("Engagement type: All engagement counts are zero.") -            return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.") - -        engagement_data = { -            'Likes': total_likes, -            'Comments': total_comments, -            'Shares': total_shares -        } - -        fig, ax = plt.subplots(figsize=(8, 5)) -        bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon']) -        # ax.set_title('Total Post Engagement Types', y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Engagement Type') -        ax.set_ylabel('Total Count') -        ax.grid(axis='y', linestyle='--', alpha=0.7) - -        for bar in bars: -            yval = bar.get_height() -            ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom') - -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.1) # Adjusted spacing -        logging.info("Successfully generated engagement type plot.") -        return fig -    except Exception as e: -        logging.error(f"Error generating engagement type plot: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title="Engagement Type Error", message=str(e)) -    finally: -        pass + """Generates a bar plot for total engagement types (likes, comments, shares).""" + logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}") + + required_cols = [likes_col, comments_col, shares_col] + if df is None or df.empty: + logging.warning("Engagement type: DataFrame is empty.") + return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.") + + missing_cols = [col for col in required_cols if col not in df.columns] + if missing_cols: + msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}" + logging.warning(msg) + return create_placeholder_plot(title="Post Engagement Types", message=msg) + + fig = None + try: + df_copy = df.copy() + for col in required_cols: + df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0) + + total_likes = df_copy[likes_col].sum() + total_comments = df_copy[comments_col].sum() + total_shares = df_copy[shares_col].sum() + + if total_likes == 0 and total_comments == 0 and total_shares == 0: + logging.info("Engagement type: All engagement counts are zero.") + return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.") + + engagement_data = { + 'Likes': total_likes, + 'Comments': total_comments, + 'Shares': total_shares + } + + categories = list(engagement_data.keys()) + values = list(engagement_data.values()) + + # Define a list of distinct colors for the bars + bar_colors = plt.cm.get_cmap('Pastel1', len(categories)) + + fig, ax = plt.subplots(figsize=(8, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + bars = ax.bar(categories, values, color=[bar_colors(i) for i in range(len(categories))], zorder=1) + ax.set_xlabel('Engagement Type') + ax.set_ylabel('Total Count') + ax.grid(axis='y', linestyle='--', alpha=0.6, zorder=0) + + for bar in bars: + yval = bar.get_height() + ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(values, default=10)), str(int(yval)), ha='center', va='bottom', zorder=2) + + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.15, left=0.1, right=0.95) # Adjusted spacing + logging.info("Successfully generated engagement type plot.") + return fig + except Exception as e: + logging.error(f"Error generating engagement type plot: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title="Engagement Type Error", message=str(e)) def generate_mentions_activity_plot(df, date_column='date'): -    """Generates a plot for mentions activity over time.""" -    logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") -    if df is None or df.empty: -        logging.warning(f"Mentions activity: DataFrame is empty.") -        return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.") -    if date_column not in df.columns: -        logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") -        return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.") - -    fig = None -    try: -        df_copy = df.copy() -        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): -            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - -        df_copy = df_copy.dropna(subset=[date_column]) -        if df_copy.empty: -            logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.") -            return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.") - -        mentions_over_time = df_copy.set_index(date_column).resample('D').size() - -        if mentions_over_time.empty: -            logging.info("Mentions activity: No mentions after resampling by day.") -            return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.") - -        fig, ax = plt.subplots(figsize=(10, 5)) -        mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple') -        # ax.set_title('Mentions Activity Over Time', y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date') -        ax.set_ylabel('Number of Mentions') -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        logging.info("Successfully generated mentions activity plot.") -        return fig -    except Exception as e: -        logging.error(f"Error generating mentions activity plot: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title="Mentions Activity Error", message=str(e)) -    finally: -        pass + """Generates a plot for mentions activity over time.""" + logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") + if df is None or df.empty: + logging.warning(f"Mentions activity: DataFrame is empty.") + return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.") + if date_column not in df.columns: + logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") + return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.") + + fig = None + try: + df_copy = df.copy() + if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): + df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + + df_copy = df_copy.dropna(subset=[date_column]) + if df_copy.empty: + logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.") + return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.") + + mentions_over_time = df_copy.set_index(date_column).resample('D').size() + + if mentions_over_time.empty: + logging.info("Mentions activity: No mentions after resampling by day.") + return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.") + + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple', zorder=1) + ax.set_xlabel('Date') + ax.set_ylabel('Number of Mentions') + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) # Adjusted spacing + logging.info("Successfully generated mentions activity plot.") + return fig + except Exception as e: + logging.error(f"Error generating mentions activity plot: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title="Mentions Activity Error", message=str(e)) def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'): -    """Generates a pie chart for mention sentiment distribution.""" -    logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}") - -    if df is None or df.empty: -        logging.warning("Mention sentiment: DataFrame is empty.") -        return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.") -    if sentiment_column not in df.columns: -        msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}" -        logging.warning(msg) -        return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg) - -    fig = None -    try: -        df_copy = df.copy() -        sentiment_counts = df_copy[sentiment_column].value_counts() -        if sentiment_counts.empty: -            logging.info("Mention sentiment: No sentiment data after value_counts.") -            return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.") - -        fig, ax = plt.subplots(figsize=(8, 5)) -        colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts)) -        pie_colors = [colors_map(i) for i in range(len(sentiment_counts))] -        ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) -        # ax.set_title('Mention Sentiment Distribution', y=1.03) # Matplotlib title REMOVED -        ax.axis('equal') -        fig.tight_layout() -        fig.subplots_adjust(top=0.92) # Adjusted spacing -        logging.info("Successfully generated mention sentiment plot.") -        return fig -    except Exception as e: -        logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title="Mention Sentiment Error", message=str(e)) -    finally: -        pass - -# --- Updated Follower Plot Functions --- + """Generates a pie chart for mention sentiment distribution.""" + logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + logging.warning("Mention sentiment: DataFrame is empty.") + return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.") + if sentiment_column not in df.columns: + msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}" + logging.warning(msg) + return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg) + + fig = None + try: + df_copy = df.copy() + sentiment_counts = df_copy[sentiment_column].value_counts() + if sentiment_counts.empty: + logging.info("Mention sentiment: No sentiment data after value_counts.") + return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.") + + fig, ax = plt.subplots(figsize=(8, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) # Apply before plotting pie + + # Define a list of distinct colors for the pie slices + pie_slice_colors = plt.cm.get_cmap('Pastel2', len(sentiment_counts)) + + ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, + colors=[pie_slice_colors(i) for i in range(len(sentiment_counts))], zorder=1) + ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. + # fig.tight_layout(pad=0.5) # tight_layout can sometimes mess with pie charts if labels are long + fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95) # Give pie chart space + logging.info("Successfully generated mention sentiment plot.") + return fig + except Exception as e: + logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title="Mention Sentiment Error", message=str(e)) def generate_followers_count_over_time_plot(df, date_info_column='category_name', -                                            organic_count_col='follower_count_organic', -                                            paid_count_col='follower_count_paid', -                                            type_filter_column='follower_count_type', -                                            type_value='follower_gains_monthly'): -    title = f"Followers Count Over Time ({type_value})" # This is for logging/placeholder, not displayed title -    logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") - -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No follower data available.") - -    required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column] -    missing_cols = [col for col in required_cols if col not in df.columns] -    if missing_cols: -        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - -    fig = None -    try: -        df_copy = df.copy() -        df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() - -        if df_filtered.empty: -            return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") - -        df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce') -        df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0) -        df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0) -        df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj') - -        if df_filtered.empty: -            return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.") - -        fig, ax = plt.subplots(figsize=(10, 5)) -        ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers') -        ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers') -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date') -        ax.set_ylabel('Follower Count') -        ax.legend() -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + organic_count_col='follower_count_organic', + paid_count_col='follower_count_paid', + type_filter_column='follower_count_type', + type_value='follower_gains_monthly'): + title = f"Followers Count Over Time ({type_value})" + logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No follower data available.") + + required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column] + missing_cols = [col for col in required_cols if col not in df.columns] + if missing_cols: + return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + + fig = None + try: + df_copy = df.copy() + df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() + + if df_filtered.empty: + return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") + + df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce') + df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0) + df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0) + df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj') + + if df_filtered.empty: + return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.") + + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers', zorder=1) + ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers', zorder=1) + ax.set_xlabel('Date') + ax.set_ylabel('Follower Count') + ax.legend(zorder=2) + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def generate_followers_growth_rate_plot(df, date_info_column='category_name', -                                        organic_count_col='follower_count_organic', -                                        paid_count_col='follower_count_paid', -                                        type_filter_column='follower_count_type', -                                        type_value='follower_gains_monthly'): -    title = f"Follower Growth Rate ({type_value})" # This is for logging/placeholder, not displayed title -    logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") - -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No follower data available.") - -    required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column] -    missing_cols = [col for col in required_cols if col not in df.columns] -    if missing_cols: -        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - -    fig = None -    try: -        df_copy = df.copy() -        df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() - -        if df_filtered.empty: -            return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") - -        df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce') -        df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce') -        df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce') -        df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj') - -        if df_filtered.empty or len(df_filtered) < 2: -            return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.") - -        df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100 -        df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100 -        df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True) - -        fig, ax = plt.subplots(figsize=(10, 5)) -        plotted_organic = False -        if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty: -            ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate') -            plotted_organic = True -        plotted_paid = False -        if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty: -            ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate') -            plotted_paid = True - -        if not plotted_organic and not plotted_paid: -            return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.") - -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date') -        ax.set_ylabel('Growth Rate (%)') -        ax.yaxis.set_major_formatter(mticker.PercentFormatter()) -        ax.legend() -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + organic_count_col='follower_count_organic', + paid_count_col='follower_count_paid', + type_filter_column='follower_count_type', + type_value='follower_gains_monthly'): + title = f"Follower Growth Rate ({type_value})" + logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No follower data available.") + + required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column] + missing_cols = [col for col in required_cols if col not in df.columns] + if missing_cols: + return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + + fig = None + try: + df_copy = df.copy() + df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() + + if df_filtered.empty: + return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") + + df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce') + df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce') + df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce') + df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj') + + if df_filtered.empty or len(df_filtered) < 2: + return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.") + + df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100 + df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100 + df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True) + + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + plotted_organic = False + if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty: + ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate', zorder=1) + plotted_organic = True + plotted_paid = False + if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty: + ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate', zorder=1) + plotted_paid = True + + if not plotted_organic and not plotted_paid: + return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.") + + ax.set_xlabel('Date') + ax.set_ylabel('Growth Rate (%)') + ax.yaxis.set_major_formatter(mticker.PercentFormatter()) + ax.legend(zorder=2) + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def generate_followers_by_demographics_plot(df, category_col='category_name', -                                            organic_count_col='follower_count_organic', -                                            paid_count_col='follower_count_paid', -                                            type_filter_column='follower_count_type', -                                            type_value=None, plot_title="Followers by Demographics"): # plot_title is for logging/placeholder -    logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") - -    if df is None or df.empty: -        return create_placeholder_plot(title=plot_title, message="No follower data available.") - -    required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column] -    missing_cols = [col for col in required_cols if col not in df.columns] -    if missing_cols: -        return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - -    if type_value is None: -        return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.") - -    fig = None -    try: -        df_copy = df.copy() -        df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() - -        if df_filtered.empty: -            return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.") - -        df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0) -        df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0) -        demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum() -        demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col] -        demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort']) - -        if demographics_data.empty: -            return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.") - -        top_n = 10 -        plot_title_updated = plot_title # Use original plot_title for placeholder if needed -        if len(demographics_data) > top_n: -            demographics_data = demographics_data.head(top_n) -            # plot_title_updated = f"{plot_title} (Top {top_n})" # No longer setting internal title - -        fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) ) -        bar_width = 0.35 -        index = np.arange(len(demographics_data.index)) -        bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color='skyblue') -        bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color='lightcoral') - -        # ax.set_title(plot_title_updated, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel(category_col.replace('_', ' ').title()) -        ax.set_ylabel('Number of Followers') -        ax.set_xticks(index) -        ax.set_xticklabels(demographics_data.index, rotation=45, ha="right") -        ax.legend() -        ax.grid(axis='y', linestyle='--', alpha=0.7) - -        for bar_group in [bars1, bars2]: -            for bar_item in bar_group: -                yval = bar_item.get_height() -                if yval > 0: -                    ax.text(bar_item.get_x() + bar_item.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]), -                            str(int(yval)), ha='center', va='bottom', fontsize=8) - -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.20) # Increased bottom margin for rotated labels, top for Gradio label -        return fig -    except Exception as e: -        logging.error(f"Error generating {plot_title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{plot_title} Error", message=str(e)) -    finally: -        pass + organic_count_col='follower_count_organic', + paid_count_col='follower_count_paid', + type_filter_column='follower_count_type', + type_value=None, plot_title="Followers by Demographics"): + logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + return create_placeholder_plot(title=plot_title, message="No follower data available.") + + required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column] + missing_cols = [col for col in required_cols if col not in df.columns] + if missing_cols: + return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + + if type_value is None: + return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.") + + fig = None + try: + df_copy = df.copy() + df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() + + if df_filtered.empty: + return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.") + + df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0) + df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0) + demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum() + demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col] + demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort']) + + if demographics_data.empty: + return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.") + + top_n = 10 + if len(demographics_data) > top_n: + demographics_data = demographics_data.head(top_n) + + fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) ) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + bar_width = 0.35 + index = np.arange(len(demographics_data.index)) + + # Using tab10 colormap for distinct colors for organic and paid + # If you wanted each CATEGORY (e.g., Italy, UK) to have different colors for its pair of bars, + # that would require a more complex color mapping. Current setup is distinct for Organic vs Paid. + color_organic = plt.cm.get_cmap('tab10')(0) + color_paid = plt.cm.get_cmap('tab10')(1) + + bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color=color_organic, zorder=1) + bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color=color_paid, zorder=1) + + ax.set_xlabel(category_col.replace('_', ' ').title()) + ax.set_ylabel('Number of Followers') + ax.set_xticks(index) + ax.set_xticklabels(demographics_data.index, rotation=45, ha="right") + ax.legend(zorder=2) + ax.grid(axis='y', linestyle='--', alpha=0.6, zorder=0) + + for bar_group in [bars1, bars2]: + for bar_item in bar_group: + yval = bar_item.get_height() + if yval > 0: + ax.text(bar_item.get_x() + bar_item.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]), + str(int(yval)), ha='center', va='bottom', fontsize=8, zorder=2) + + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.25, left=0.1, right=0.95) + return fig + except Exception as e: + logging.error(f"Error generating {plot_title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{plot_title} Error", message=str(e)) def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'): -    title = "Engagement Rate Over Time" # For logging/placeholder -    logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}") - -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No post data for engagement rate.") - -    required_cols = [date_column, engagement_rate_col] -    missing_cols = [col for col in required_cols if col not in df.columns] -    if missing_cols: -        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - -    fig = None -    try: -        df_copy = df.copy() -        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') -        df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce') -        df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column) - -        if df_copy.empty: -            return create_placeholder_plot(title=title, message="No valid data after cleaning.") - -        engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean() -        engagement_over_time = engagement_over_time.dropna() - -        if engagement_over_time.empty: -            return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.") - -        fig, ax = plt.subplots(figsize=(10, 5)) -        ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange') -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date') -        ax.set_ylabel('Engagement Rate') -        max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0 -        formatter_xmax = 1.0 if 0 <= max_rate_val <= 1.5 else 100.0 -        if max_rate_val > 1.5 and formatter_xmax == 1.0: -             formatter_xmax = 100.0 -        elif max_rate_val > 100 and formatter_xmax == 1.0: -             formatter_xmax = max_rate_val - -        ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax)) -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + title = "Engagement Rate Over Time" + logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No post data for engagement rate.") + + required_cols = [date_column, engagement_rate_col] + missing_cols = [col for col in required_cols if col not in df.columns] + if missing_cols: + return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + + fig = None + try: + df_copy = df.copy() + df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce') + df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column) + + if df_copy.empty: + return create_placeholder_plot(title=title, message="No valid data after cleaning.") + + engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean() + engagement_over_time = engagement_over_time.dropna() + + if engagement_over_time.empty: + return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.") + + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange', zorder=1) + ax.set_xlabel('Date') + ax.set_ylabel('Engagement Rate') + max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0 + formatter_xmax = 1.0 if 0 <= max_rate_val <= 1.5 else 100.0 + if max_rate_val > 1.5 and formatter_xmax == 1.0: + formatter_xmax = 100.0 + elif max_rate_val > 100 and formatter_xmax == 1.0: + formatter_xmax = max_rate_val + + ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax)) + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'): -    title = "Reach Over Time (Clicks)" # For logging/placeholder -    logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}") - -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No post data for reach.") - -    required_cols = [date_column, reach_col] -    missing_cols = [col for col in required_cols if col not in df.columns] -    if missing_cols: -        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - -    fig = None -    try: -        df_copy = df.copy() -        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') -        df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce') -        df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column) - -        if df_copy.empty: -            return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.") - -        reach_over_time = df_copy.resample('D')[reach_col].sum() - -        fig, ax = plt.subplots(figsize=(10, 5)) -        ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen') -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date') -        ax.set_ylabel('Total Clicks') -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + title = "Reach Over Time (Clicks)" + logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No post data for reach.") + + required_cols = [date_column, reach_col] + missing_cols = [col for col in required_cols if col not in df.columns] + if missing_cols: + return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + + fig = None + try: + df_copy = df.copy() + df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce') + df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column) + + if df_copy.empty: + return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.") + + reach_over_time = df_copy.resample('D')[reach_col].sum() + + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen', zorder=1) + ax.set_xlabel('Date') + ax.set_ylabel('Total Clicks') + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'): -    title = "Impressions Over Time" # For logging/placeholder -    logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}") - -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No post data for impressions.") - -    required_cols = [date_column, impressions_col] -    missing_cols = [col for col in required_cols if col not in df.columns] -    if missing_cols: -        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - -    fig = None -    try: -        df_copy = df.copy() -        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') -        df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce') -        df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column) - -        if df_copy.empty: -            return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.") - -        impressions_over_time = df_copy.resample('D')[impressions_col].sum() - -        fig, ax = plt.subplots(figsize=(10, 5)) -        ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue') -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date') -        ax.set_ylabel('Total Impressions') -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + title = "Impressions Over Time" + logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No post data for impressions.") + + required_cols = [date_column, impressions_col] + missing_cols = [col for col in required_cols if col not in df.columns] + if missing_cols: + return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + + fig = None + try: + df_copy = df.copy() + df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce') + df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column) + + if df_copy.empty: + return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.") + + impressions_over_time = df_copy.resample('D')[impressions_col].sum() + + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue', zorder=1) + ax.set_xlabel('Date') + ax.set_ylabel('Total Impressions') + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'): -    title = "Reactions (Likes) Over Time" # For logging/placeholder -    logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}") -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No post data for likes.") -    required_cols = [date_column, likes_col] -    if any(col not in df.columns for col in required_cols): -        return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") - -    fig = None -    try: -        df_copy = df.copy() -        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') -        df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce') -        df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column) -        if df_copy.empty: -            return create_placeholder_plot(title=title, message="No valid data after cleaning.") - -        data_over_time = df_copy.resample('D')[likes_col].sum() -        fig, ax = plt.subplots(figsize=(10, 5)) -        ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson') -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date') -        ax.set_ylabel('Total Likes') -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + title = "Reactions (Likes) Over Time" + logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}") + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No post data for likes.") + required_cols = [date_column, likes_col] + if any(col not in df.columns for col in required_cols): + return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") + + fig = None + try: + df_copy = df.copy() + df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce') + df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column) + if df_copy.empty: + return create_placeholder_plot(title=title, message="No valid data after cleaning.") + + data_over_time = df_copy.resample('D')[likes_col].sum() + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson', zorder=1) + ax.set_xlabel('Date') + ax.set_ylabel('Total Likes') + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'): -    title = "Clicks Over Time" # For logging/placeholder -    logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}") -    # This function essentially calls generate_reach_over_time_plot with specific params -    # The fig.tight_layout() and fig.subplots_adjust will be handled within that function. -    return generate_reach_over_time_plot(df, date_column, clicks_col) - + # This function reuses generate_reach_over_time_plot logic + return generate_reach_over_time_plot(df, date_column, clicks_col) def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'): -    title = "Shares Over Time" # For logging/placeholder -    logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}") -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No post data for shares.") -    required_cols = [date_column, shares_col] -    if any(col not in df.columns for col in required_cols): -        return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") - -    fig = None -    try: -        df_copy = df.copy() -        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') -        df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce') -        df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column) -        if df_copy.empty: -            return create_placeholder_plot(title=title, message="No valid data after cleaning.") - -        data_over_time = df_copy.resample('D')[shares_col].sum() -        fig, ax = plt.subplots(figsize=(10, 5)) -        ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal') -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date') -        ax.set_ylabel('Total Shares') -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + title = "Shares Over Time" + logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}") + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No post data for shares.") + required_cols = [date_column, shares_col] + if any(col not in df.columns for col in required_cols): + return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") + + fig = None + try: + df_copy = df.copy() + df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce') + df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column) + if df_copy.empty: + return create_placeholder_plot(title=title, message="No valid data after cleaning.") + + data_over_time = df_copy.resample('D')[shares_col].sum() + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal', zorder=1) + ax.set_xlabel('Date') + ax.set_ylabel('Total Shares') + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'): -    title = "Comments Over Time" # For logging/placeholder -    logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}") -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No post data for comments.") -    required_cols = [date_column, comments_col] -    if any(col not in df.columns for col in required_cols): -        return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") - -    fig = None -    try: -        df_copy = df.copy() -        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') -        df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce') -        df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column) -        if df_copy.empty: -            return create_placeholder_plot(title=title, message="No valid data after cleaning.") - -        data_over_time = df_copy.resample('D')[comments_col].sum() -        fig, ax = plt.subplots(figsize=(10, 5)) -        ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold') -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date') -        ax.set_ylabel('Total Comments') -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + title = "Comments Over Time" + logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}") + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No post data for comments.") + required_cols = [date_column, comments_col] + if any(col not in df.columns for col in required_cols): + return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") + + fig = None + try: + df_copy = df.copy() + df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce') + df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column) + if df_copy.empty: + return create_placeholder_plot(title=title, message="No valid data after cleaning.") + + data_over_time = df_copy.resample('D')[comments_col].sum() + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold', zorder=1) + ax.set_xlabel('Date') + ax.set_ylabel('Total Comments') + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', date_column=None): -    title = "Breakdown of Comments by Sentiment" # For logging/placeholder -    logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}") - -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No data for comment sentiment.") -    if sentiment_column not in df.columns: -        if 'sentiment' in df.columns and sentiment_column != 'sentiment': -            logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.") -            sentiment_column = 'sentiment' -        else: -            return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}") - -    if df[sentiment_column].isnull().all(): -        return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.") - -    fig = None -    try: -        df_copy = df.copy() -        df_copy[sentiment_column] = df_copy[sentiment_column].astype(str) -        sentiment_counts = df_copy[sentiment_column].value_counts().dropna() - -        if sentiment_counts.empty or sentiment_counts.sum() == 0: -            return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.") - -        fig, ax = plt.subplots(figsize=(8, 5)) -        colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts)) -        pie_colors = [colors_map(i) for i in range(len(sentiment_counts))] -        ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.axis('equal') -        fig.tight_layout() -        fig.subplots_adjust(top=0.92) # Adjusted spacing -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + title = "Breakdown of Comments by Sentiment" + logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No data for comment sentiment.") + if sentiment_column not in df.columns: + if 'sentiment' in df.columns and sentiment_column != 'sentiment': + logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.") + sentiment_column = 'sentiment' + else: + return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}") + + if df[sentiment_column].isnull().all(): + return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.") + + fig = None + try: + df_copy = df.copy() + df_copy[sentiment_column] = df_copy[sentiment_column].astype(str) + sentiment_counts = df_copy[sentiment_column].value_counts().dropna() + + if sentiment_counts.empty or sentiment_counts.sum() == 0: + return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.") + + fig, ax = plt.subplots(figsize=(8, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + pie_slice_colors = plt.cm.get_cmap('coolwarm', len(sentiment_counts)) + ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, + colors=[pie_slice_colors(i) for i in range(len(sentiment_counts))], zorder=1) + ax.axis('equal') + # fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95) + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def generate_post_frequency_plot(df, date_column='published_at', resample_period='D'): -    title = f"Post Frequency Over Time ({resample_period})" # For logging/placeholder -    logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") - -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No data available.") -    if date_column not in df.columns: -        return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.") - -    fig = None -    try: -        df_copy = df.copy() -        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): -            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - -        df_copy = df_copy.dropna(subset=[date_column]) -        if df_copy.empty: -            return create_placeholder_plot(title=title, message="No valid date entries found.") - -        post_frequency = df_copy.set_index(date_column).resample(resample_period).size() - -        if post_frequency.empty: -            return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.") - -        fig, ax = plt.subplots(figsize=(10, 5)) -        post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None) -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Date' if resample_period == 'D' else 'Period') -        ax.set_ylabel('Number of Posts') -        ax.grid(True, linestyle='--', alpha=0.7) -        plt.xticks(rotation=45) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        logging.info(f"Successfully generated {title} plot.") -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + title = f"Post Frequency Over Time ({resample_period})" + logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No data available.") + if date_column not in df.columns: + return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.") + + fig = None + try: + df_copy = df.copy() + if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): + df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + + df_copy = df_copy.dropna(subset=[date_column]) + if df_copy.empty: + return create_placeholder_plot(title=title, message="No valid date entries found.") + + post_frequency = df_copy.set_index(date_column).resample(resample_period).size() + + if post_frequency.empty: + return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.") + + fig, ax = plt.subplots(figsize=(10, 5)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + if resample_period in ['M', 'W']: + num_bars = len(post_frequency) + bar_colors = plt.cm.get_cmap('viridis', num_bars) # Or 'tab10' + post_frequency.plot(kind='bar', ax=ax, color=[bar_colors(i) for i in range(num_bars)], zorder=1) + for i, v in enumerate(post_frequency): + ax.text(i, v + (0.01 * post_frequency.max()), str(v), ha='center', va='bottom', zorder=2) + else: + post_frequency.plot(kind='line', ax=ax, marker='o', zorder=1) + + ax.set_xlabel('Date' if resample_period == 'D' else 'Period') + ax.set_ylabel('Number of Posts') + ax.grid(True, linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45) + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.1, right=0.95) + logging.info(f"Successfully generated {title} plot.") + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def generate_content_format_breakdown_plot(df, format_col='media_type'): -    title = "Breakdown of Content by Format" # For logging/placeholder -    logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}") - -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No data available.") -    if format_col not in df.columns: -        return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}") - -    fig = None -    try: -        df_copy = df.copy() -        format_counts = df_copy[format_col].value_counts().dropna() - -        if format_counts.empty: -            return create_placeholder_plot(title=title, message="No content format data available.") - -        fig, ax = plt.subplots(figsize=(8, 6)) -        format_counts.plot(kind='bar', ax=ax, color='skyblue') -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Media Type') -        ax.set_ylabel('Number of Posts') -        ax.grid(axis='y', linestyle='--', alpha=0.7) -        plt.xticks(rotation=45, ha="right") - -        for i, v in enumerate(format_counts): -            ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom') - -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing -        logging.info(f"Successfully generated {title} plot.") -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + title = "Breakdown of Content by Format" + logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No data available.") + if format_col not in df.columns: + return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}") + + fig = None + try: + df_copy = df.copy() + format_counts = df_copy[format_col].value_counts().dropna() + + if format_counts.empty: + return create_placeholder_plot(title=title, message="No content format data available.") + + fig, ax = plt.subplots(figsize=(8, 6)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + num_bars = len(format_counts) + bar_colors = plt.cm.get_cmap('tab10', num_bars) # Using tab10 for distinct colors + + format_counts.plot(kind='bar', ax=ax, color=[bar_colors(i) for i in range(num_bars)], zorder=1) + ax.set_xlabel('Media Type') + ax.set_ylabel('Number of Posts') + ax.grid(axis='y', linestyle='--', alpha=0.6, zorder=0) + plt.xticks(rotation=45, ha="right") + + for i, v in enumerate(format_counts): + ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom', zorder=2) + + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.20, left=0.15, right=0.95) + logging.info(f"Successfully generated {title} plot.") + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) def _parse_eb_label(label_data): -    if isinstance(label_data, list): -        return label_data -    if isinstance(label_data, str): -        try: -            parsed = ast.literal_eval(label_data) -            if isinstance(parsed, list): -                return parsed -            return [str(parsed)] -        except (ValueError, SyntaxError): -            return [label_data] if label_data.strip() else [] -    if pd.isna(label_data): -        return [] -    return [] + if isinstance(label_data, list): + return label_data + if isinstance(label_data, str): + try: + parsed = ast.literal_eval(label_data) + if isinstance(parsed, list): + return parsed + return [str(parsed)] # Ensure it's a list even if ast.literal_eval returns a single string + except (ValueError, SyntaxError): + # If not a valid list string, treat the whole string as one label if not empty + return [label_data.strip()] if label_data and label_data.strip() else [] + if pd.isna(label_data): + return [] + return [str(label_data)] # Fallback for other types, ensuring it's a list def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15): -    title = f"Breakdown of Content by Topics (Top {top_n})" # For logging/placeholder -    logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}") + title = f"Breakdown of Content by Topics (Top {top_n})" + logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}") + + if df is None or df.empty: + return create_placeholder_plot(title=title, message="No data available.") + if topics_col not in df.columns: + return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}") + + fig = None + try: + df_copy = df.copy() + # Ensure all entries in topics_col are processed by _parse_eb_label + parsed_labels = df_copy[topics_col].apply(_parse_eb_label) + exploded_labels = parsed_labels.explode().dropna() # Explode lists into separate rows + + # Filter out any empty strings that might result from parsing + exploded_labels = exploded_labels[exploded_labels != ''] -    if df is None or df.empty: -        return create_placeholder_plot(title=title, message="No data available.") -    if topics_col not in df.columns: -        return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}") -    fig = None -    try: -        df_copy = df.copy() -        parsed_labels = df_copy[topics_col].apply(_parse_eb_label) -        exploded_labels = parsed_labels.explode().dropna() + if exploded_labels.empty: + return create_placeholder_plot(title=title, message="No topic data found after processing labels.") -        if exploded_labels.empty: -            return create_placeholder_plot(title=title, message="No topic data found after processing labels.") + topic_counts = exploded_labels.value_counts() -        topic_counts = exploded_labels.value_counts() + if topic_counts.empty: + return create_placeholder_plot(title=title, message="No topics to display after counting.") -        if topic_counts.empty: -            return create_placeholder_plot(title=title, message="No topics to display after counting.") + top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True) -        top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True) + fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6)) + _apply_rounded_corners_and_transparent_bg(fig, ax) + + num_bars = len(top_topics) + bar_colors = plt.cm.get_cmap('YlGnBu', num_bars + 3) # Using a sequential colormap for horizontal bars -        fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6)) -        top_topics.plot(kind='barh', ax=ax, color='mediumseagreen') -        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED -        ax.set_xlabel('Number of Posts') -        ax.set_ylabel('Topic') + top_topics.plot(kind='barh', ax=ax, color=[bar_colors(i+3) for i in range(num_bars)], zorder=1) # +3 to get darker shades + ax.set_xlabel('Number of Posts') + ax.set_ylabel('Topic') -        for i, (topic, count) in enumerate(top_topics.items()): -            ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center') + for i, (topic, count) in enumerate(top_topics.items()): # Use .items() for Series + ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center', zorder=2) -        fig.tight_layout() -        fig.subplots_adjust(top=0.92, left=0.25) # Adjusted spacing, added left margin for long labels -        logging.info(f"Successfully generated {title} plot.") -        return fig -    except Exception as e: -        logging.error(f"Error generating {title}: {e}", exc_info=True) -        if fig: plt.close(fig) -        return create_placeholder_plot(title=f"{title} Error", message=str(e)) -    finally: -        pass + fig.tight_layout(pad=0.5) + fig.subplots_adjust(top=0.92, bottom=0.1, left=0.3, right=0.95) # Adjusted left for long topic labels + logging.info(f"Successfully generated {title} plot.") + return fig + except Exception as e: + logging.error(f"Error generating {title}: {e}", exc_info=True) + if fig: plt.close(fig) + return create_placeholder_plot(title=f"{title} Error", message=str(e)) if __name__ == '__main__': -    # Create dummy data for testing -    posts_data = { -        'id': [f'post{i}' for i in range(1, 8)], -        'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']), -        'likeCount': [10, 5, 12, 8, 15, 3, 20], -        'commentCount': [2, 1, 3, 1, 4, 0, 5], -        'shareCount': [1, 0, 1, 1, 2, 0, 1], -        'clickCount': [20, 15, 30, 22, 40, 10, 50], -        'impressionCount': [200, 150, 300, 220, 400, 100, 500], -        'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08], -        'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'], -        'eb_labels': [ -            "['AI', 'Tech']", -            ['Innovation'], -            'General', -            None, -            ['Tech', 'Future'], -            "['AI', 'Development']", -            ['Tech'] -        ], -        'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive'] -    } -    sample_merged_posts_df = pd.DataFrame(posts_data) - -    follower_data = { -        'follower_count_type': [ -            'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', -            'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', # Added more for demo -            'follower_function', 'follower_function', -            'follower_industry', 'follower_industry', -            'follower_seniority', 'follower_seniority' -        ], -        'category_name': [ -            '2024-01-01', '2024-02-01', '2024-03-01', -            'Italy', 'United Kingdom', 'Spain', 'Germany', 'Switzerland', 'France', 'United States', 'Netherlands', 'Brazil', 'Belgium', # Matching screenshot -            'Engineering', 'Sales', -            'Tech', 'Finance', -            'Senior', 'Junior' -        ], -        'follower_count_organic': [ -            100, 110, 125, -            4500, 187, 106, 83, 68, 63, 55, 41, 22, 22, # Matching screenshot values for organic -            400, 200, -            250, 180, -            300, 220 -        ], -        'follower_count_paid': [ -            20, 30, 25, -            200, 10, 5, 10, 5, 8, 2, 5, 3, 1, # Example paid values -            30, 20, -            45, 35, -            60, 40 -        ] -    } -    sample_follower_stats_df = pd.DataFrame(follower_data) - -    logging.info("--- Testing Existing Plot Generations ---") -    fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy()) -    if fig_posts_activity: logging.info("Posts activity plot generated.") - -    fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy()) -    if fig_engagement_type: logging.info("Engagement type plot generated.") - -    mentions_data = { -        'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']), -        'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral'] -    } -    sample_mentions_df = pd.DataFrame(mentions_data) -    fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy()) -    if fig_mentions_activity: logging.info("Mentions activity plot generated.") -    fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy()) -    if fig_mention_sentiment: logging.info("Mention sentiment plot generated.") - -    fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly') -    if fig_followers_count: logging.info("Followers Count Over Time plot generated.") -    fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly') -    if fig_followers_rate: logging.info("Followers Growth Rate plot generated.") -    fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location") -    if fig_geo: logging.info("Followers by Location plot generated.") - # To display the plot if run locally (optional) - # if fig_geo: - #     plt.show() - - -    fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy()) -    if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.") -    fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy()) -    if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.") -    fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy()) -    if fig_impressions: logging.info("Impressions Over Time plot generated.") - -    fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy()) -    if fig_likes_time: logging.info("Likes Over Time plot generated.") -    fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy()) -    if fig_clicks_time: logging.info("Clicks Over Time plot generated.") -    fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy()) -    if fig_shares_time: logging.info("Shares Over Time plot generated.") -    fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy()) -    if fig_comments_time: logging.info("Comments Over Time plot generated.") -    fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy(), sentiment_column='comment_sentiment') -    if fig_comments_sentiment: logging.info("Comments Sentiment Breakdown plot generated.") - - -    logging.info("--- Testing NEW Plot Generations for Content Strategy ---") -    fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D') -    if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.") - -    fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W') -    if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.") - -    fig_content_format = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='media_type') -    if fig_content_format: logging.info("Content Format Breakdown plot generated.") - -    fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5) -    if fig_content_topics: logging.info("Content Topic Breakdown plot generated.") - -    logging.info("--- Testing NEW Plot Generations with Edge Cases ---") -    empty_df = pd.DataFrame() -    fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy()) -    if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.") - -    fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col') -    if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.") - -    fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels') -    if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.") - -    df_no_topics_data = sample_merged_posts_df.copy() -    df_no_topics_data['eb_labels'] = None -    fig_content_topics_all_none = generate_content_topic_breakdown_plot(df_no_topics_data, topics_col='eb_labels') -    if fig_content_topics_all_none: logging.info("Content Topic (all None labels) placeholder generated.") - - -    logging.info("Test script finished. Review plots if displayed locally or saved.") + # Create dummy data for testing + posts_data = { + 'id': [f'post{i}' for i in range(1, 8)], + 'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']), + 'likeCount': [10, 5, 12, 8, 15, 3, 20], + 'commentCount': [2, 1, 3, 1, 4, 0, 5], + 'shareCount': [1, 0, 1, 1, 2, 0, 1], + 'clickCount': [20, 15, 30, 22, 40, 10, 50], + 'impressionCount': [200, 150, 300, 220, 400, 100, 500], + 'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08], + 'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'], + 'eb_labels': [ + "['AI', 'Tech']", # String representation of a list + ['Innovation'], # Actual list + 'General', # Single string + None, + ['Tech', 'Future'], + "['AI', 'Development']", + ['Tech'] + ], + 'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive'] + } + sample_merged_posts_df = pd.DataFrame(posts_data) + + follower_data = { + 'follower_count_type': [ + 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', + 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', + 'follower_function', 'follower_function', + 'follower_industry', 'follower_industry', + 'follower_seniority', 'follower_seniority' + ], + 'category_name': [ + '2024-01-01', '2024-02-01', '2024-03-01', + 'Italy', 'United Kingdom', 'Spain', 'Germany', 'Switzerland', 'France', 'United States', 'Netherlands', 'Brazil', 'Belgium', + 'Engineering', 'Sales', + 'Tech', 'Finance', + 'Senior', 'Junior' + ], + 'follower_count_organic': [ + 100, 110, 125, + 4500, 187, 106, 83, 68, 63, 55, 41, 22, 22, + 400, 200, + 250, 180, + 300, 220 + ], + 'follower_count_paid': [ + 20, 30, 25, + 200, 10, 5, 10, 5, 8, 2, 5, 3, 1, + 30, 20, + 45, 35, + 60, 40 + ] + } + sample_follower_stats_df = pd.DataFrame(follower_data) + + # --- Test individual plots --- + plot_functions_to_test = { + "Posts Activity": (generate_posts_activity_plot, [sample_merged_posts_df.copy()]), + "Engagement Type": (generate_engagement_type_plot, [sample_merged_posts_df.copy()]), + "Mentions Activity": (generate_mentions_activity_plot, [sample_mentions_df.copy()]), + "Mention Sentiment": (generate_mention_sentiment_plot, [sample_mentions_df.copy()]), + "Followers Count": (generate_followers_count_over_time_plot, [sample_follower_stats_df.copy()], {'type_value':'follower_gains_monthly'}), + "Followers Growth": (generate_followers_growth_rate_plot, [sample_follower_stats_df.copy()], {'type_value':'follower_gains_monthly'}), + "Followers Geo": (generate_followers_by_demographics_plot, [sample_follower_stats_df.copy()], {'type_value':'follower_geo', 'plot_title':"Followers by Location"}), + "Engagement Rate": (generate_engagement_rate_over_time_plot, [sample_merged_posts_df.copy()]), + "Reach (Clicks)": (generate_reach_over_time_plot, [sample_merged_posts_df.copy()]), + "Impressions": (generate_impressions_over_time_plot, [sample_merged_posts_df.copy()]), + "Likes Over Time": (generate_likes_over_time_plot, [sample_merged_posts_df.copy()]), + "Shares Over Time": (generate_shares_over_time_plot, [sample_merged_posts_df.copy()]), + "Comments Over Time": (generate_comments_over_time_plot, [sample_merged_posts_df.copy()]), + "Comments Sentiment": (generate_comments_sentiment_breakdown_plot, [sample_merged_posts_df.copy()], {'sentiment_column':'comment_sentiment'}), + "Post Frequency Daily": (generate_post_frequency_plot, [sample_merged_posts_df.copy()], {'resample_period':'D'}), + "Post Frequency Weekly": (generate_post_frequency_plot, [sample_merged_posts_df.copy()], {'resample_period':'W'}), + "Content Format": (generate_content_format_breakdown_plot, [sample_merged_posts_df.copy()]), + "Content Topics": (generate_content_topic_breakdown_plot, [sample_merged_posts_df.copy()], {'top_n':5}), + } + + # Create a directory to save plots if it doesn't exist + # import os + # output_dir = "test_plots" + # os.makedirs(output_dir, exist_ok=True) + + for name, (func, args, kwargs) in plot_functions_to_test.items(): + logging.info(f"--- Testing: {name} ---") + fig = func(*args, **kwargs if kwargs else {}) + if fig: + logging.info(f"{name} plot generated.") + # fig.savefig(os.path.join(output_dir, f"{name.lower().replace(' ', '_')}_test.png")) + # plt.close(fig) # Close the figure after saving to free memory + else: + logging.warning(f"{name} plot generation failed or returned None.") + + logging.info("Test script finished. Review plots if saved locally.")