diff --git "a/analytics_plot_generator.py" "b/analytics_plot_generator.py" --- "a/analytics_plot_generator.py" +++ "b/analytics_plot_generator.py" @@ -11,965 +11,969 @@ import ast # For safely evaluating string representations of lists logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s') def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."): - """Creates a placeholder Matplotlib plot indicating no data or an error.""" - try: - fig, ax = plt.subplots(figsize=(8, 4)) - ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True) - ax.axis('off') - fig.tight_layout() # MODIFIED - # Add spacing for consistency, though it might be less critical for placeholders - fig.subplots_adjust(top=0.90) - return fig - except Exception as e: - logging.error(f"Error creating placeholder plot: {e}") - # Fallback placeholder if the above fails - fig_err, ax_err = plt.subplots() - ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center') - ax_err.axis('off') - fig_err.tight_layout() # MODIFIED - fig_err.subplots_adjust(top=0.90) - return fig_err - # No plt.close(fig) here as Gradio handles the figure object. +    """Creates a placeholder Matplotlib plot indicating no data or an error.""" +    try: +        fig, ax = plt.subplots(figsize=(8, 4)) +        ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True) +        ax.axis('off') +        fig.tight_layout() # MODIFIED +        # Add spacing for consistency, though it might be less critical for placeholders +        fig.subplots_adjust(top=0.90) +        return fig +    except Exception as e: +        logging.error(f"Error creating placeholder plot: {e}") +        # Fallback placeholder if the above fails +        fig_err, ax_err = plt.subplots() +        ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center') +        ax_err.axis('off') +        fig_err.tight_layout() # MODIFIED +        fig_err.subplots_adjust(top=0.90) +        return fig_err +    # No plt.close(fig) here as Gradio handles the figure object. def generate_posts_activity_plot(df, date_column='published_at'): - """Generates a plot for posts activity over time.""" - logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") - if df is None or df.empty: - logging.warning(f"Posts activity: DataFrame is empty.") - return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.") - if date_column not in df.columns: - logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") - return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.") - - fig = None # Initialize fig to None - try: - df_copy = df.copy() - if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): - df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - - df_copy = df_copy.dropna(subset=[date_column]) - if df_copy.empty: - logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.") - return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.") - - posts_over_time = df_copy.set_index(date_column).resample('D').size() - - if posts_over_time.empty: - logging.info("Posts activity: No posts after resampling by day.") - return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.") - - fig, ax = plt.subplots(figsize=(10, 5)) - posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-') - ax.set_title('Posts Activity Over Time', y=1.03) # Matplotlib title - ax.set_xlabel('Date') - ax.set_ylabel('Number of Posts') - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - logging.info("Successfully generated posts activity plot.") - return fig - except Exception as e: - logging.error(f"Error generating posts activity plot: {e}", exc_info=True) - if fig: plt.close(fig) # Close if fig was created before error - return create_placeholder_plot(title="Posts Activity Error", message=str(e)) - finally: - pass +    """Generates a plot for posts activity over time.""" +    logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") +    if df is None or df.empty: +        logging.warning(f"Posts activity: DataFrame is empty.") +        return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.") +    if date_column not in df.columns: +        logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") +        return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.") + +    fig = None # Initialize fig to None +    try: +        df_copy = df.copy() +        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): +            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + +        df_copy = df_copy.dropna(subset=[date_column]) +        if df_copy.empty: +            logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.") +            return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.") + +        posts_over_time = df_copy.set_index(date_column).resample('D').size() + +        if posts_over_time.empty: +            logging.info("Posts activity: No posts after resampling by day.") +            return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.") + +        fig, ax = plt.subplots(figsize=(10, 5)) +        posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-') +        # ax.set_title('Posts Activity Over Time', y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date') +        ax.set_ylabel('Number of Posts') +        ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        logging.info("Successfully generated posts activity plot.") +        return fig +    except Exception as e: +        logging.error(f"Error generating posts activity plot: {e}", exc_info=True) +        if fig: plt.close(fig) # Close if fig was created before error +        return create_placeholder_plot(title="Posts Activity Error", message=str(e)) +    finally: +        pass def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='commentCount', shares_col='shareCount'): - """Generates a bar plot for total engagement types (likes, comments, shares).""" - logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}") - - required_cols = [likes_col, comments_col, shares_col] - if df is None or df.empty: - logging.warning("Engagement type: DataFrame is empty.") - return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.") - - missing_cols = [col for col in required_cols if col not in df.columns] - if missing_cols: - msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}" - logging.warning(msg) - return create_placeholder_plot(title="Post Engagement Types", message=msg) - - fig = None - try: - df_copy = df.copy() - for col in required_cols: - df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0) - - total_likes = df_copy[likes_col].sum() - total_comments = df_copy[comments_col].sum() - total_shares = df_copy[shares_col].sum() - - if total_likes == 0 and total_comments == 0 and total_shares == 0: - logging.info("Engagement type: All engagement counts are zero.") - return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.") - - engagement_data = { - 'Likes': total_likes, - 'Comments': total_comments, - 'Shares': total_shares - } - - fig, ax = plt.subplots(figsize=(8, 5)) - bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon']) - ax.set_title('Total Post Engagement Types', y=1.03) # Matplotlib title - ax.set_xlabel('Engagement Type') - ax.set_ylabel('Total Count') - ax.grid(axis='y', linestyle='--', alpha=0.7) - - for bar in bars: - yval = bar.get_height() - ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom') - - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - logging.info("Successfully generated engagement type plot.") - return fig - except Exception as e: - logging.error(f"Error generating engagement type plot: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title="Engagement Type Error", message=str(e)) - finally: - pass +    """Generates a bar plot for total engagement types (likes, comments, shares).""" +    logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}") + +    required_cols = [likes_col, comments_col, shares_col] +    if df is None or df.empty: +        logging.warning("Engagement type: DataFrame is empty.") +        return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.") + +    missing_cols = [col for col in required_cols if col not in df.columns] +    if missing_cols: +        msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}" +        logging.warning(msg) +        return create_placeholder_plot(title="Post Engagement Types", message=msg) + +    fig = None +    try: +        df_copy = df.copy() +        for col in required_cols: +            df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0) + +        total_likes = df_copy[likes_col].sum() +        total_comments = df_copy[comments_col].sum() +        total_shares = df_copy[shares_col].sum() + +        if total_likes == 0 and total_comments == 0 and total_shares == 0: +            logging.info("Engagement type: All engagement counts are zero.") +            return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.") + +        engagement_data = { +            'Likes': total_likes, +            'Comments': total_comments, +            'Shares': total_shares +        } + +        fig, ax = plt.subplots(figsize=(8, 5)) +        bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon']) +        # ax.set_title('Total Post Engagement Types', y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Engagement Type') +        ax.set_ylabel('Total Count') +        ax.grid(axis='y', linestyle='--', alpha=0.7) + +        for bar in bars: +            yval = bar.get_height() +            ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom') + +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.1) # Adjusted spacing +        logging.info("Successfully generated engagement type plot.") +        return fig +    except Exception as e: +        logging.error(f"Error generating engagement type plot: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title="Engagement Type Error", message=str(e)) +    finally: +        pass def generate_mentions_activity_plot(df, date_column='date'): - """Generates a plot for mentions activity over time.""" - logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") - if df is None or df.empty: - logging.warning(f"Mentions activity: DataFrame is empty.") - return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.") - if date_column not in df.columns: - logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") - return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.") - - fig = None - try: - df_copy = df.copy() - if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): - df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - - df_copy = df_copy.dropna(subset=[date_column]) - if df_copy.empty: - logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.") - return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.") - - mentions_over_time = df_copy.set_index(date_column).resample('D').size() - - if mentions_over_time.empty: - logging.info("Mentions activity: No mentions after resampling by day.") - return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.") - - fig, ax = plt.subplots(figsize=(10, 5)) - mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple') - ax.set_title('Mentions Activity Over Time', y=1.03) # Matplotlib title - ax.set_xlabel('Date') - ax.set_ylabel('Number of Mentions') - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - logging.info("Successfully generated mentions activity plot.") - return fig - except Exception as e: - logging.error(f"Error generating mentions activity plot: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title="Mentions Activity Error", message=str(e)) - finally: - pass +    """Generates a plot for mentions activity over time.""" +    logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") +    if df is None or df.empty: +        logging.warning(f"Mentions activity: DataFrame is empty.") +        return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.") +    if date_column not in df.columns: +        logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") +        return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.") + +    fig = None +    try: +        df_copy = df.copy() +        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): +            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + +        df_copy = df_copy.dropna(subset=[date_column]) +        if df_copy.empty: +            logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.") +            return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.") + +        mentions_over_time = df_copy.set_index(date_column).resample('D').size() + +        if mentions_over_time.empty: +            logging.info("Mentions activity: No mentions after resampling by day.") +            return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.") + +        fig, ax = plt.subplots(figsize=(10, 5)) +        mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple') +        # ax.set_title('Mentions Activity Over Time', y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date') +        ax.set_ylabel('Number of Mentions') +        ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        logging.info("Successfully generated mentions activity plot.") +        return fig +    except Exception as e: +        logging.error(f"Error generating mentions activity plot: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title="Mentions Activity Error", message=str(e)) +    finally: +        pass def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'): - """Generates a pie chart for mention sentiment distribution.""" - logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}") - - if df is None or df.empty: - logging.warning("Mention sentiment: DataFrame is empty.") - return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.") - if sentiment_column not in df.columns: - msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}" - logging.warning(msg) - return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg) - - fig = None - try: - df_copy = df.copy() - sentiment_counts = df_copy[sentiment_column].value_counts() - if sentiment_counts.empty: - logging.info("Mention sentiment: No sentiment data after value_counts.") - return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.") - - fig, ax = plt.subplots(figsize=(8, 5)) - colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts)) - pie_colors = [colors_map(i) for i in range(len(sentiment_counts))] - ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) - ax.set_title('Mention Sentiment Distribution', y=1.03) # Matplotlib title - ax.axis('equal') - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - logging.info("Successfully generated mention sentiment plot.") - return fig - except Exception as e: - logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title="Mention Sentiment Error", message=str(e)) - finally: - pass +    """Generates a pie chart for mention sentiment distribution.""" +    logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}") + +    if df is None or df.empty: +        logging.warning("Mention sentiment: DataFrame is empty.") +        return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.") +    if sentiment_column not in df.columns: +        msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}" +        logging.warning(msg) +        return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg) + +    fig = None +    try: +        df_copy = df.copy() +        sentiment_counts = df_copy[sentiment_column].value_counts() +        if sentiment_counts.empty: +            logging.info("Mention sentiment: No sentiment data after value_counts.") +            return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.") + +        fig, ax = plt.subplots(figsize=(8, 5)) +        colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts)) +        pie_colors = [colors_map(i) for i in range(len(sentiment_counts))] +        ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) +        # ax.set_title('Mention Sentiment Distribution', y=1.03) # Matplotlib title REMOVED +        ax.axis('equal') +        fig.tight_layout() +        fig.subplots_adjust(top=0.92) # Adjusted spacing +        logging.info("Successfully generated mention sentiment plot.") +        return fig +    except Exception as e: +        logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title="Mention Sentiment Error", message=str(e)) +    finally: +        pass # --- Updated Follower Plot Functions --- def generate_followers_count_over_time_plot(df, date_info_column='category_name', - organic_count_col='follower_count_organic', - paid_count_col='follower_count_paid', - type_filter_column='follower_count_type', - type_value='follower_gains_monthly'): - title = f"Followers Count Over Time ({type_value})" - logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") - - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No follower data available.") - - required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column] - missing_cols = [col for col in required_cols if col not in df.columns] - if missing_cols: - return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - - fig = None - try: - df_copy = df.copy() - df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() - - if df_filtered.empty: - return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") - - df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce') - df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0) - df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0) - df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj') - - if df_filtered.empty: - return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.") - - fig, ax = plt.subplots(figsize=(10, 5)) - ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers') - ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers') - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Date') - ax.set_ylabel('Follower Count') - ax.legend() - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +                                            organic_count_col='follower_count_organic', +                                            paid_count_col='follower_count_paid', +                                            type_filter_column='follower_count_type', +                                            type_value='follower_gains_monthly'): +    title = f"Followers Count Over Time ({type_value})" # This is for logging/placeholder, not displayed title +    logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") + +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No follower data available.") + +    required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column] +    missing_cols = [col for col in required_cols if col not in df.columns] +    if missing_cols: +        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + +    fig = None +    try: +        df_copy = df.copy() +        df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() + +        if df_filtered.empty: +            return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") + +        df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce') +        df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0) +        df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0) +        df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj') + +        if df_filtered.empty: +            return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.") + +        fig, ax = plt.subplots(figsize=(10, 5)) +        ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers') +        ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers') +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date') +        ax.set_ylabel('Follower Count') +        ax.legend() +        ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def generate_followers_growth_rate_plot(df, date_info_column='category_name', - organic_count_col='follower_count_organic', - paid_count_col='follower_count_paid', - type_filter_column='follower_count_type', - type_value='follower_gains_monthly'): - title = f"Follower Growth Rate ({type_value})" - logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") - - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No follower data available.") - - required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column] - missing_cols = [col for col in required_cols if col not in df.columns] - if missing_cols: - return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - - fig = None - try: - df_copy = df.copy() - df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() - - if df_filtered.empty: - return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") - - df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce') - df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce') - df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce') - df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj') - - if df_filtered.empty or len(df_filtered) < 2: - return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.") - - df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100 - df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100 - df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True) - - fig, ax = plt.subplots(figsize=(10, 5)) - plotted_organic = False - if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty: - ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate') - plotted_organic = True - plotted_paid = False - if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty: - ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate') - plotted_paid = True - - if not plotted_organic and not plotted_paid: - return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.") - - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Date') - ax.set_ylabel('Growth Rate (%)') - ax.yaxis.set_major_formatter(mticker.PercentFormatter()) - ax.legend() - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +                                        organic_count_col='follower_count_organic', +                                        paid_count_col='follower_count_paid', +                                        type_filter_column='follower_count_type', +                                        type_value='follower_gains_monthly'): +    title = f"Follower Growth Rate ({type_value})" # This is for logging/placeholder, not displayed title +    logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") + +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No follower data available.") + +    required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column] +    missing_cols = [col for col in required_cols if col not in df.columns] +    if missing_cols: +        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + +    fig = None +    try: +        df_copy = df.copy() +        df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() + +        if df_filtered.empty: +            return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") + +        df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce') +        df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce') +        df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce') +        df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj') + +        if df_filtered.empty or len(df_filtered) < 2: +            return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.") + +        df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100 +        df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100 +        df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True) + +        fig, ax = plt.subplots(figsize=(10, 5)) +        plotted_organic = False +        if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty: +            ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate') +            plotted_organic = True +        plotted_paid = False +        if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty: +            ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate') +            plotted_paid = True + +        if not plotted_organic and not plotted_paid: +            return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.") + +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date') +        ax.set_ylabel('Growth Rate (%)') +        ax.yaxis.set_major_formatter(mticker.PercentFormatter()) +        ax.legend() +  ��     ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def generate_followers_by_demographics_plot(df, category_col='category_name', - organic_count_col='follower_count_organic', - paid_count_col='follower_count_paid', - type_filter_column='follower_count_type', - type_value=None, plot_title="Followers by Demographics"): - logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") - - if df is None or df.empty: - return create_placeholder_plot(title=plot_title, message="No follower data available.") - - required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column] - missing_cols = [col for col in required_cols if col not in df.columns] - if missing_cols: - return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - - if type_value is None: - return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.") - - fig = None - try: - df_copy = df.copy() - df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() - - if df_filtered.empty: - return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.") - - df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0) - df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0) - demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum() - demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col] - demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort']) - - if demographics_data.empty: - return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.") - - top_n = 10 - if len(demographics_data) > top_n: - demographics_data = demographics_data.head(top_n) - plot_title_updated = f"{plot_title} (Top {top_n})" - else: - plot_title_updated = plot_title - - fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) ) - bar_width = 0.35 - index = np.arange(len(demographics_data.index)) - bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color='skyblue') - bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color='lightcoral') - - ax.set_title(plot_title_updated, y=1.03) # Matplotlib title - ax.set_xlabel(category_col.replace('_', ' ').title()) - ax.set_ylabel('Number of Followers') - ax.set_xticks(index) - ax.set_xticklabels(demographics_data.index, rotation=45, ha="right") - ax.legend() - ax.grid(axis='y', linestyle='--', alpha=0.7) - - for bar_group in [bars1, bars2]: - for bar_item in bar_group: # Renamed 'bar' to 'bar_item' to avoid conflict - yval = bar_item.get_height() - if yval > 0: - ax.text(bar_item.get_x() + bar_item.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]), - str(int(yval)), ha='center', va='bottom', fontsize=8) - - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.85) # Adjust top for more space, especially with rotated labels - return fig - except Exception as e: - logging.error(f"Error generating {plot_title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{plot_title} Error", message=str(e)) - finally: - pass +                                            organic_count_col='follower_count_organic', +                                            paid_count_col='follower_count_paid', +                                            type_filter_column='follower_count_type', +                                            type_value=None, plot_title="Followers by Demographics"): # plot_title is for logging/placeholder +    logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") + +    if df is None or df.empty: +        return create_placeholder_plot(title=plot_title, message="No follower data available.") + +    required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column] +    missing_cols = [col for col in required_cols if col not in df.columns] +    if missing_cols: +        return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + +    if type_value is None: +        return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.") + +    fig = None +    try: +        df_copy = df.copy() +        df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() + +        if df_filtered.empty: +            return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.") + +        df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0) +        df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0) +        demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum() +        demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col] +        demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort']) + +        if demographics_data.empty: +            return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.") + +        top_n = 10 +        plot_title_updated = plot_title # Use original plot_title for placeholder if needed +        if len(demographics_data) > top_n: +            demographics_data = demographics_data.head(top_n) +            # plot_title_updated = f"{plot_title} (Top {top_n})" # No longer setting internal title + +        fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) ) +        bar_width = 0.35 +        index = np.arange(len(demographics_data.index)) +        bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color='skyblue') +        bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color='lightcoral') + +        # ax.set_title(plot_title_updated, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel(category_col.replace('_', ' ').title()) +        ax.set_ylabel('Number of Followers') +        ax.set_xticks(index) +        ax.set_xticklabels(demographics_data.index, rotation=45, ha="right") +        ax.legend() +        ax.grid(axis='y', linestyle='--', alpha=0.7) + +        for bar_group in [bars1, bars2]: +            for bar_item in bar_group: +                yval = bar_item.get_height() +                if yval > 0: +                    ax.text(bar_item.get_x() + bar_item.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]), +                            str(int(yval)), ha='center', va='bottom', fontsize=8) + +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.20) # Increased bottom margin for rotated labels, top for Gradio label +        return fig +    except Exception as e: +        logging.error(f"Error generating {plot_title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{plot_title} Error", message=str(e)) +    finally: +        pass def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'): - title = "Engagement Rate Over Time" - logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}") - - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No post data for engagement rate.") - - required_cols = [date_column, engagement_rate_col] - missing_cols = [col for col in required_cols if col not in df.columns] - if missing_cols: - return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - - fig = None - try: - df_copy = df.copy() - df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce') - df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column) - - if df_copy.empty: - return create_placeholder_plot(title=title, message="No valid data after cleaning.") - - engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean() - engagement_over_time = engagement_over_time.dropna() - - if engagement_over_time.empty: - return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.") - - fig, ax = plt.subplots(figsize=(10, 5)) - ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange') - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Date') - ax.set_ylabel('Engagement Rate') - max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0 - formatter_xmax = 1.0 if 0 <= max_rate_val <= 1.5 else 100.0 - if max_rate_val > 1.5 and formatter_xmax == 1.0: - formatter_xmax = 100.0 - elif max_rate_val > 100 and formatter_xmax == 1.0: - formatter_xmax = max_rate_val - - ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax)) - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +    title = "Engagement Rate Over Time" # For logging/placeholder +    logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}") + +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No post data for engagement rate.") + +    required_cols = [date_column, engagement_rate_col] +    missing_cols = [col for col in required_cols if col not in df.columns] +    if missing_cols: +        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + +    fig = None +    try: +        df_copy = df.copy() +        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') +        df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce') +        df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column) + +        if df_copy.empty: +            return create_placeholder_plot(title=title, message="No valid data after cleaning.") + +        engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean() +        engagement_over_time = engagement_over_time.dropna() + +        if engagement_over_time.empty: +            return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.") + +        fig, ax = plt.subplots(figsize=(10, 5)) +        ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange') +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date') +        ax.set_ylabel('Engagement Rate') +        max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0 +        formatter_xmax = 1.0 if 0 <= max_rate_val <= 1.5 else 100.0 +        if max_rate_val > 1.5 and formatter_xmax == 1.0: +             formatter_xmax = 100.0 +        elif max_rate_val > 100 and formatter_xmax == 1.0: +             formatter_xmax = max_rate_val + +        ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax)) +        ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'): - title = "Reach Over Time (Clicks)" # Title matches the config in app.py - logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}") - - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No post data for reach.") - - required_cols = [date_column, reach_col] - missing_cols = [col for col in required_cols if col not in df.columns] - if missing_cols: - return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - - fig = None - try: - df_copy = df.copy() - df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce') - df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column) - - if df_copy.empty: - return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.") - - reach_over_time = df_copy.resample('D')[reach_col].sum() - - fig, ax = plt.subplots(figsize=(10, 5)) - ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen') - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Date') - ax.set_ylabel('Total Clicks') # Label consistent with reach_col='clickCount' - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +    title = "Reach Over Time (Clicks)" # For logging/placeholder +    logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}") + +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No post data for reach.") + +    required_cols = [date_column, reach_col] +    missing_cols = [col for col in required_cols if col not in df.columns] +    if missing_cols: +        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + +    fig = None +    try: +        df_copy = df.copy() +        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') +        df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce') +        df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column) + +        if df_copy.empty: +            return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.") + +        reach_over_time = df_copy.resample('D')[reach_col].sum() + +        fig, ax = plt.subplots(figsize=(10, 5)) +        ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen') +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date') +        ax.set_ylabel('Total Clicks') +        ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'): - title = "Impressions Over Time" - logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}") - - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No post data for impressions.") - - required_cols = [date_column, impressions_col] - missing_cols = [col for col in required_cols if col not in df.columns] - if missing_cols: - return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") - - fig = None - try: - df_copy = df.copy() - df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce') - df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column) - - if df_copy.empty: - return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.") - - impressions_over_time = df_copy.resample('D')[impressions_col].sum() - - fig, ax = plt.subplots(figsize=(10, 5)) - ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue') - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Date') - ax.set_ylabel('Total Impressions') - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +    title = "Impressions Over Time" # For logging/placeholder +    logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}") + +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No post data for impressions.") + +    required_cols = [date_column, impressions_col] +    missing_cols = [col for col in required_cols if col not in df.columns] +    if missing_cols: +        return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") + +    fig = None +    try: +        df_copy = df.copy() +        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') +        df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce') +        df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column) + +        if df_copy.empty: +            return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.") + +        impressions_over_time = df_copy.resample('D')[impressions_col].sum() + +        fig, ax = plt.subplots(figsize=(10, 5)) +        ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue') +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date') +        ax.set_ylabel('Total Impressions') +        ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'): - title = "Reactions (Likes) Over Time" - logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}") - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No post data for likes.") - required_cols = [date_column, likes_col] - if any(col not in df.columns for col in required_cols): - return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") - - fig = None - try: - df_copy = df.copy() - df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce') - df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column) - if df_copy.empty: - return create_placeholder_plot(title=title, message="No valid data after cleaning.") - - data_over_time = df_copy.resample('D')[likes_col].sum() - fig, ax = plt.subplots(figsize=(10, 5)) - ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson') - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Date') - ax.set_ylabel('Total Likes') - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +    title = "Reactions (Likes) Over Time" # For logging/placeholder +    logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}") +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No post data for likes.") +    required_cols = [date_column, likes_col] +    if any(col not in df.columns for col in required_cols): +        return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") + +    fig = None +    try: +        df_copy = df.copy() +        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') +        df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce') +        df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column) +        if df_copy.empty: +            return create_placeholder_plot(title=title, message="No valid data after cleaning.") + +        data_over_time = df_copy.resample('D')[likes_col].sum() +        fig, ax = plt.subplots(figsize=(10, 5)) +        ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson') +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date') +        ax.set_ylabel('Total Likes') +        ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'): - title = "Clicks Over Time" - logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}") - # This function essentially calls generate_reach_over_time_plot with specific params - # The fig.tight_layout() and fig.subplots_adjust will be handled within that function. - return generate_reach_over_time_plot(df, date_column, clicks_col) +    title = "Clicks Over Time" # For logging/placeholder +    logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}") +    # This function essentially calls generate_reach_over_time_plot with specific params +    # The fig.tight_layout() and fig.subplots_adjust will be handled within that function. +    return generate_reach_over_time_plot(df, date_column, clicks_col) def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'): - title = "Shares Over Time" - logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}") - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No post data for shares.") - required_cols = [date_column, shares_col] - if any(col not in df.columns for col in required_cols): - return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") - - fig = None - try: - df_copy = df.copy() - df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce') - df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column) - if df_copy.empty: - return create_placeholder_plot(title=title, message="No valid data after cleaning.") - - data_over_time = df_copy.resample('D')[shares_col].sum() - fig, ax = plt.subplots(figsize=(10, 5)) - ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal') - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Date') - ax.set_ylabel('Total Shares') - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +    title = "Shares Over Time" # For logging/placeholder +    logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}") +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No post data for shares.") +    required_cols = [date_column, shares_col] +    if any(col not in df.columns for col in required_cols): +        return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") + +    fig = None +    try: +        df_copy = df.copy() +        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') +        df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce') +        df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column) +        if df_copy.empty: +            return create_placeholder_plot(title=title, message="No valid data after cleaning.") + +        data_over_time = df_copy.resample('D')[shares_col].sum() +        fig, ax = plt.subplots(figsize=(10, 5)) +        ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal') +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date') +        ax.set_ylabel('Total Shares') +        ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'): - title = "Comments Over Time" - logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}") - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No post data for comments.") - required_cols = [date_column, comments_col] - if any(col not in df.columns for col in required_cols): - return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") - - fig = None - try: - df_copy = df.copy() - df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce') - df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column) - if df_copy.empty: - return create_placeholder_plot(title=title, message="No valid data after cleaning.") - - data_over_time = df_copy.resample('D')[comments_col].sum() - fig, ax = plt.subplots(figsize=(10, 5)) - ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold') - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Date') - ax.set_ylabel('Total Comments') - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +    title = "Comments Over Time" # For logging/placeholder +    logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}") +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No post data for comments.") +    required_cols = [date_column, comments_col] +    if any(col not in df.columns for col in required_cols): +        return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") + +    fig = None +    try: +        df_copy = df.copy() +        df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') +        df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce') +        df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column) +        if df_copy.empty: +            return create_placeholder_plot(title=title, message="No valid data after cleaning.") + +        data_over_time = df_copy.resample('D')[comments_col].sum() +        fig, ax = plt.subplots(figsize=(10, 5)) +        ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold') +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date') +        ax.set_ylabel('Total Comments') +        ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', date_column=None): - title = "Breakdown of Comments by Sentiment" - logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}") - - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No data for comment sentiment.") - if sentiment_column not in df.columns: - if 'sentiment' in df.columns and sentiment_column != 'sentiment': - logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.") - sentiment_column = 'sentiment' - else: - return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}") - - if df[sentiment_column].isnull().all(): - return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.") - - fig = None - try: - df_copy = df.copy() - df_copy[sentiment_column] = df_copy[sentiment_column].astype(str) - sentiment_counts = df_copy[sentiment_column].value_counts().dropna() - - if sentiment_counts.empty or sentiment_counts.sum() == 0: - return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.") - - fig, ax = plt.subplots(figsize=(8, 5)) - colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts)) - pie_colors = [colors_map(i) for i in range(len(sentiment_counts))] - ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) - ax.set_title(title, y=1.03) # Matplotlib title - ax.axis('equal') - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +    title = "Breakdown of Comments by Sentiment" # For logging/placeholder +    logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}") + +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No data for comment sentiment.") +    if sentiment_column not in df.columns: +        if 'sentiment' in df.columns and sentiment_column != 'sentiment': +            logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.") +            sentiment_column = 'sentiment' +        else: +            return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}") + +    if df[sentiment_column].isnull().all(): +        return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.") + +    fig = None +    try: +        df_copy = df.copy() +        df_copy[sentiment_column] = df_copy[sentiment_column].astype(str) +        sentiment_counts = df_copy[sentiment_column].value_counts().dropna() + +        if sentiment_counts.empty or sentiment_counts.sum() == 0: +            return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.") + +        fig, ax = plt.subplots(figsize=(8, 5)) +        colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts)) +        pie_colors = [colors_map(i) for i in range(len(sentiment_counts))] +        ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.axis('equal') +        fig.tight_layout() +        fig.subplots_adjust(top=0.92) # Adjusted spacing +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def generate_post_frequency_plot(df, date_column='published_at', resample_period='D'): - title = f"Post Frequency Over Time ({resample_period})" - logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") - - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No data available.") - if date_column not in df.columns: - return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.") - - fig = None - try: - df_copy = df.copy() - if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): - df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') - - df_copy = df_copy.dropna(subset=[date_column]) - if df_copy.empty: - return create_placeholder_plot(title=title, message="No valid date entries found.") - - post_frequency = df_copy.set_index(date_column).resample(resample_period).size() - - if post_frequency.empty: - return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.") - - fig, ax = plt.subplots(figsize=(10, 5)) - post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None) - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Date' if resample_period == 'D' else 'Period') - ax.set_ylabel('Number of Posts') - ax.grid(True, linestyle='--', alpha=0.7) - plt.xticks(rotation=45) - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - logging.info(f"Successfully generated {title} plot.") - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +    title = f"Post Frequency Over Time ({resample_period})" # For logging/placeholder +    logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") + +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No data available.") +    if date_column not in df.columns: +        return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.") + +    fig = None +    try: +        df_copy = df.copy() +        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): +            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') + +        df_copy = df_copy.dropna(subset=[date_column]) +        if df_copy.empty: +            return create_placeholder_plot(title=title, message="No valid date entries found.") + +        post_frequency = df_copy.set_index(date_column).resample(resample_period).size() + +        if post_frequency.empty: +            return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.") + +        fig, ax = plt.subplots(figsize=(10, 5)) +        post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None) +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Date' if resample_period == 'D' else 'Period') +        ax.set_ylabel('Number of Posts') +        ax.grid(True, linestyle='--', alpha=0.7) +        plt.xticks(rotation=45) +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        logging.info(f"Successfully generated {title} plot.") +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def generate_content_format_breakdown_plot(df, format_col='media_type'): - title = "Breakdown of Content by Format" - logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}") - - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No data available.") - if format_col not in df.columns: - return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}") - - fig = None - try: - df_copy = df.copy() - format_counts = df_copy[format_col].value_counts().dropna() - - if format_counts.empty: - return create_placeholder_plot(title=title, message="No content format data available.") - - fig, ax = plt.subplots(figsize=(8, 6)) - format_counts.plot(kind='bar', ax=ax, color='skyblue') - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Media Type') - ax.set_ylabel('Number of Posts') - ax.grid(axis='y', linestyle='--', alpha=0.7) - plt.xticks(rotation=45, ha="right") - - for i, v in enumerate(format_counts): - ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom') - - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - logging.info(f"Successfully generated {title} plot.") - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +    title = "Breakdown of Content by Format" # For logging/placeholder +    logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}") + +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No data available.") +    if format_col not in df.columns: +        return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}") + +    fig = None +    try: +        df_copy = df.copy() +        format_counts = df_copy[format_col].value_counts().dropna() + +        if format_counts.empty: +            return create_placeholder_plot(title=title, message="No content format data available.") + +        fig, ax = plt.subplots(figsize=(8, 6)) +        format_counts.plot(kind='bar', ax=ax, color='skyblue') +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Media Type') +        ax.set_ylabel('Number of Posts') +        ax.grid(axis='y', linestyle='--', alpha=0.7) +        plt.xticks(rotation=45, ha="right") + +        for i, v in enumerate(format_counts): +            ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom') + +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, bottom=0.15) # Adjusted spacing +        logging.info(f"Successfully generated {title} plot.") +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass def _parse_eb_label(label_data): - if isinstance(label_data, list): - return label_data - if isinstance(label_data, str): - try: - parsed = ast.literal_eval(label_data) - if isinstance(parsed, list): - return parsed - return [str(parsed)] - except (ValueError, SyntaxError): - return [label_data] if label_data.strip() else [] - if pd.isna(label_data): - return [] - return [] +    if isinstance(label_data, list): +        return label_data +    if isinstance(label_data, str): +        try: +            parsed = ast.literal_eval(label_data) +            if isinstance(parsed, list): +                return parsed +            return [str(parsed)] +        except (ValueError, SyntaxError): +            return [label_data] if label_data.strip() else [] +    if pd.isna(label_data): +        return [] +    return [] def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15): - title = f"Breakdown of Content by Topics (Top {top_n})" - logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}") +    title = f"Breakdown of Content by Topics (Top {top_n})" # For logging/placeholder +    logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}") - if df is None or df.empty: - return create_placeholder_plot(title=title, message="No data available.") - if topics_col not in df.columns: - return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}") +    if df is None or df.empty: +        return create_placeholder_plot(title=title, message="No data available.") +    if topics_col not in df.columns: +        return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}") - fig = None - try: - df_copy = df.copy() - parsed_labels = df_copy[topics_col].apply(_parse_eb_label) - exploded_labels = parsed_labels.explode().dropna() +    fig = None +    try: +        df_copy = df.copy() +        parsed_labels = df_copy[topics_col].apply(_parse_eb_label) +        exploded_labels = parsed_labels.explode().dropna() - if exploded_labels.empty: - return create_placeholder_plot(title=title, message="No topic data found after processing labels.") +        if exploded_labels.empty: +            return create_placeholder_plot(title=title, message="No topic data found after processing labels.") - topic_counts = exploded_labels.value_counts() +        topic_counts = exploded_labels.value_counts() - if topic_counts.empty: - return create_placeholder_plot(title=title, message="No topics to display after counting.") +        if topic_counts.empty: +            return create_placeholder_plot(title=title, message="No topics to display after counting.") - top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True) +        top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True) - fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6)) - top_topics.plot(kind='barh', ax=ax, color='mediumseagreen') - ax.set_title(title, y=1.03) # Matplotlib title - ax.set_xlabel('Number of Posts') - ax.set_ylabel('Topic') +        fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6)) +        top_topics.plot(kind='barh', ax=ax, color='mediumseagreen') +        # ax.set_title(title, y=1.03) # Matplotlib title REMOVED +        ax.set_xlabel('Number of Posts') +        ax.set_ylabel('Topic') - for i, (topic, count) in enumerate(top_topics.items()): - ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center') +        for i, (topic, count) in enumerate(top_topics.items()): +            ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center') - fig.tight_layout() # MODIFIED - fig.subplots_adjust(top=0.88) # Add space for Gradio label - logging.info(f"Successfully generated {title} plot.") - return fig - except Exception as e: - logging.error(f"Error generating {title}: {e}", exc_info=True) - if fig: plt.close(fig) - return create_placeholder_plot(title=f"{title} Error", message=str(e)) - finally: - pass +        fig.tight_layout() +        fig.subplots_adjust(top=0.92, left=0.25) # Adjusted spacing, added left margin for long labels +        logging.info(f"Successfully generated {title} plot.") +        return fig +    except Exception as e: +        logging.error(f"Error generating {title}: {e}", exc_info=True) +        if fig: plt.close(fig) +        return create_placeholder_plot(title=f"{title} Error", message=str(e)) +    finally: +        pass if __name__ == '__main__': - # Create dummy data for testing - posts_data = { - 'id': [f'post{i}' for i in range(1, 8)], - 'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']), - 'likeCount': [10, 5, 12, 8, 15, 3, 20], - 'commentCount': [2, 1, 3, 1, 4, 0, 5], - 'shareCount': [1, 0, 1, 1, 2, 0, 1], - 'clickCount': [20, 15, 30, 22, 40, 10, 50], - 'impressionCount': [200, 150, 300, 220, 400, 100, 500], - 'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08], - 'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'], - 'eb_labels': [ - "['AI', 'Tech']", - ['Innovation'], - 'General', - None, - ['Tech', 'Future'], - "['AI', 'Development']", - ['Tech'] - ], - 'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive'] - } - sample_merged_posts_df = pd.DataFrame(posts_data) - - follower_data = { - 'follower_count_type': [ - 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', - 'follower_geo', 'follower_geo', 'follower_geo', - 'follower_function', 'follower_function', - 'follower_industry', 'follower_industry', - 'follower_seniority', 'follower_seniority' - ], - 'category_name': [ - '2024-01-01', '2024-02-01', '2024-03-01', - 'USA', 'Canada', 'UK', - 'Engineering', 'Sales', - 'Tech', 'Finance', - 'Senior', 'Junior' - ], - 'follower_count_organic': [ - 100, 110, 125, - 500, 300, 150, - 400, 200, - 250, 180, - 300, 220 - ], - 'follower_count_paid': [ - 20, 30, 25, - 50, 40, 60, - 30, 20, - 45, 35, - 60, 40 - ] - } - sample_follower_stats_df = pd.DataFrame(follower_data) - - logging.info("--- Testing Existing Plot Generations ---") - fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy()) - if fig_posts_activity: logging.info("Posts activity plot generated.") - - fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy()) - if fig_engagement_type: logging.info("Engagement type plot generated.") - - mentions_data = { - 'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']), - 'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral'] - } - sample_mentions_df = pd.DataFrame(mentions_data) - fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy()) - if fig_mentions_activity: logging.info("Mentions activity plot generated.") - fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy()) - if fig_mention_sentiment: logging.info("Mention sentiment plot generated.") - - fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly') - if fig_followers_count: logging.info("Followers Count Over Time plot generated.") - fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly') - if fig_followers_rate: logging.info("Followers Growth Rate plot generated.") - fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location") - if fig_geo: logging.info("Followers by Location plot generated.") - - fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy()) - if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.") - fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy()) - if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.") - fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy()) - if fig_impressions: logging.info("Impressions Over Time plot generated.") - - fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy()) - if fig_likes_time: logging.info("Likes Over Time plot generated.") - fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy()) - if fig_clicks_time: logging.info("Clicks Over Time plot generated.") - fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy()) - if fig_shares_time: logging.info("Shares Over Time plot generated.") - fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy()) - if fig_comments_time: logging.info("Comments Over Time plot generated.") - fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy(), sentiment_column='comment_sentiment') - if fig_comments_sentiment: logging.info("Comments Sentiment Breakdown plot generated.") - - - logging.info("--- Testing NEW Plot Generations for Content Strategy ---") - fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D') - if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.") - - fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W') - if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.") - - fig_content_format = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='media_type') - if fig_content_format: logging.info("Content Format Breakdown plot generated.") - - fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5) - if fig_content_topics: logging.info("Content Topic Breakdown plot generated.") - - logging.info("--- Testing NEW Plot Generations with Edge Cases ---") - empty_df = pd.DataFrame() - fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy()) - if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.") - - fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col') - if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.") - - fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels') - if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.") - - df_no_topics_data = sample_merged_posts_df.copy() - df_no_topics_data['eb_labels'] = None - fig_content_topics_all_none = generate_content_topic_breakdown_plot(df_no_topics_data, topics_col='eb_labels') - if fig_content_topics_all_none: logging.info("Content Topic (all None labels) placeholder generated.") - - - logging.info("Test script finished. Review plots if displayed locally or saved.") +    # Create dummy data for testing +    posts_data = { +        'id': [f'post{i}' for i in range(1, 8)], +        'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']), +        'likeCount': [10, 5, 12, 8, 15, 3, 20], +        'commentCount': [2, 1, 3, 1, 4, 0, 5], +        'shareCount': [1, 0, 1, 1, 2, 0, 1], +        'clickCount': [20, 15, 30, 22, 40, 10, 50], +        'impressionCount': [200, 150, 300, 220, 400, 100, 500], +        'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08], +        'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'], +        'eb_labels': [ +            "['AI', 'Tech']", +            ['Innovation'], +            'General', +            None, +            ['Tech', 'Future'], +            "['AI', 'Development']", +            ['Tech'] +        ], +        'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive'] +    } +    sample_merged_posts_df = pd.DataFrame(posts_data) + +    follower_data = { +        'follower_count_type': [ +            'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', +            'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', 'follower_geo', # Added more for demo +            'follower_function', 'follower_function', +            'follower_industry', 'follower_industry', +            'follower_seniority', 'follower_seniority' +        ], +        'category_name': [ +            '2024-01-01', '2024-02-01', '2024-03-01', +            'Italy', 'United Kingdom', 'Spain', 'Germany', 'Switzerland', 'France', 'United States', 'Netherlands', 'Brazil', 'Belgium', # Matching screenshot +            'Engineering', 'Sales', +            'Tech', 'Finance', +            'Senior', 'Junior' +        ], +        'follower_count_organic': [ +            100, 110, 125, +            4500, 187, 106, 83, 68, 63, 55, 41, 22, 22, # Matching screenshot values for organic +            400, 200, +            250, 180, +            300, 220 +        ], +        'follower_count_paid': [ +            20, 30, 25, +            200, 10, 5, 10, 5, 8, 2, 5, 3, 1, # Example paid values +            30, 20, +            45, 35, +            60, 40 +        ] +    } +    sample_follower_stats_df = pd.DataFrame(follower_data) + +    logging.info("--- Testing Existing Plot Generations ---") +    fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy()) +    if fig_posts_activity: logging.info("Posts activity plot generated.") + +    fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy()) +    if fig_engagement_type: logging.info("Engagement type plot generated.") + +    mentions_data = { +        'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']), +        'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral'] +    } +    sample_mentions_df = pd.DataFrame(mentions_data) +    fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy()) +    if fig_mentions_activity: logging.info("Mentions activity plot generated.") +    fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy()) +    if fig_mention_sentiment: logging.info("Mention sentiment plot generated.") + +    fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly') +    if fig_followers_count: logging.info("Followers Count Over Time plot generated.") +    fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly') +    if fig_followers_rate: logging.info("Followers Growth Rate plot generated.") +    fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location") +    if fig_geo: logging.info("Followers by Location plot generated.") + # To display the plot if run locally (optional) + # if fig_geo: + #     plt.show() + + +    fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy()) +    if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.") +    fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy()) +    if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.") +    fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy()) +    if fig_impressions: logging.info("Impressions Over Time plot generated.") + +    fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy()) +    if fig_likes_time: logging.info("Likes Over Time plot generated.") +    fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy()) +    if fig_clicks_time: logging.info("Clicks Over Time plot generated.") +    fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy()) +    if fig_shares_time: logging.info("Shares Over Time plot generated.") +    fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy()) +    if fig_comments_time: logging.info("Comments Over Time plot generated.") +    fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy(), sentiment_column='comment_sentiment') +    if fig_comments_sentiment: logging.info("Comments Sentiment Breakdown plot generated.") + + +    logging.info("--- Testing NEW Plot Generations for Content Strategy ---") +    fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D') +    if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.") + +    fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W') +    if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.") + +    fig_content_format = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='media_type') +    if fig_content_format: logging.info("Content Format Breakdown plot generated.") + +    fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5) +    if fig_content_topics: logging.info("Content Topic Breakdown plot generated.") + +    logging.info("--- Testing NEW Plot Generations with Edge Cases ---") +    empty_df = pd.DataFrame() +    fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy()) +    if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.") + +    fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col') +    if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.") + +    fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels') +    if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.") + +    df_no_topics_data = sample_merged_posts_df.copy() +    df_no_topics_data['eb_labels'] = None +    fig_content_topics_all_none = generate_content_topic_breakdown_plot(df_no_topics_data, topics_col='eb_labels') +    if fig_content_topics_all_none: logging.info("Content Topic (all None labels) placeholder generated.") + + +    logging.info("Test script finished. Review plots if displayed locally or saved.") +