import pandas as pd import matplotlib.pyplot as plt import logging from io import BytesIO import base64 import numpy as np # Configure logging for this module logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s') def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."): """Creates a placeholder Matplotlib plot indicating no data or an error.""" try: fig, ax = plt.subplots(figsize=(8, 4)) ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True) ax.axis('off') plt.tight_layout() return fig except Exception as e: logging.error(f"Error creating placeholder plot: {e}") fig, ax = plt.subplots() ax.text(0.5, 0.5, "Plot generation error", ha='center', va='center') ax.axis('off') return fig finally: # plt.close(fig) # Close the specific figure to free memory # More robustly, Gradio handles figure objects, explicit close might not always be needed here # but plt.close('all') in calling functions or after a block of plot generations is safer. pass def generate_posts_activity_plot(df, date_column='published_at'): # Default changed as per common use """ Generates a plot for posts activity over time. Assumes df has a date_column (e.g., 'published_at') and groups by date to count posts. """ logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") if df is None or df.empty: logging.warning(f"Posts activity: DataFrame is empty.") return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.") if date_column not in df.columns: logging.warning(f"Posts activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.") return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.") try: df_copy = df.copy() if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') df_copy = df_copy.dropna(subset=[date_column]) if df_copy.empty: logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.") return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.") posts_over_time = df_copy.set_index(date_column).resample('D').size() if posts_over_time.empty: logging.info("Posts activity: No posts after resampling by day.") return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.") fig, ax = plt.subplots(figsize=(10, 5)) posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-') ax.set_title('Posts Activity Over Time') ax.set_xlabel('Date') ax.set_ylabel('Number of Posts') ax.grid(True, linestyle='--', alpha=0.7) plt.xticks(rotation=45) plt.tight_layout() logging.info("Successfully generated posts activity plot.") return fig except Exception as e: logging.error(f"Error generating posts activity plot: {e}", exc_info=True) return create_placeholder_plot(title="Posts Activity Error", message=str(e)) finally: plt.close('all') def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='comments_count', shares_col='shares_count'): """ Generates a bar plot for total engagement types (likes, comments, shares). Input df is expected to be pre-filtered by date if necessary. """ logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}") required_cols = [likes_col, comments_col, shares_col] if df is None or df.empty: logging.warning("Engagement type: DataFrame is empty.") return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.") missing_cols = [col for col in required_cols if col not in df.columns] if missing_cols: msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}" logging.warning(msg) return create_placeholder_plot(title="Post Engagement Types", message=msg) try: df_copy = df.copy() # Work on a copy for col in required_cols: # Ensure numeric, fill NaNs with 0 df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0) total_likes = df_copy[likes_col].sum() total_comments = df_copy[comments_col].sum() total_shares = df_copy[shares_col].sum() if total_likes == 0 and total_comments == 0 and total_shares == 0: logging.info("Engagement type: All engagement counts are zero.") return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.") engagement_data = { 'Likes': total_likes, 'Comments': total_comments, 'Shares': total_shares } fig, ax = plt.subplots(figsize=(8, 5)) bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon']) ax.set_title('Total Post Engagement Types') ax.set_xlabel('Engagement Type') ax.set_ylabel('Total Count') ax.grid(axis='y', linestyle='--', alpha=0.7) for bar in bars: yval = bar.get_height() ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom') plt.tight_layout() logging.info("Successfully generated engagement type plot.") return fig except Exception as e: logging.error(f"Error generating engagement type plot: {e}", exc_info=True) return create_placeholder_plot(title="Engagement Type Error", message=str(e)) finally: plt.close('all') def generate_mentions_activity_plot(df, date_column='date'): # Default changed as per common use """ Generates a plot for mentions activity over time. Assumes df has a date_column (e.g., 'date') and groups by date to count mentions. """ logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") if df is None or df.empty: logging.warning(f"Mentions activity: DataFrame is empty.") return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.") if date_column not in df.columns: logging.warning(f"Mentions activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.") return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.") try: df_copy = df.copy() if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') df_copy = df_copy.dropna(subset=[date_column]) if df_copy.empty: logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.") return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.") mentions_over_time = df_copy.set_index(date_column).resample('D').size() if mentions_over_time.empty: logging.info("Mentions activity: No mentions after resampling by day.") return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.") fig, ax = plt.subplots(figsize=(10, 5)) mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple') ax.set_title('Mentions Activity Over Time') ax.set_xlabel('Date') ax.set_ylabel('Number of Mentions') ax.grid(True, linestyle='--', alpha=0.7) plt.xticks(rotation=45) plt.tight_layout() logging.info("Successfully generated mentions activity plot.") return fig except Exception as e: logging.error(f"Error generating mentions activity plot: {e}", exc_info=True) return create_placeholder_plot(title="Mentions Activity Error", message=str(e)) finally: plt.close('all') def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'): """ Generates a pie chart for mention sentiment distribution. Input df is expected to be pre-filtered by date if necessary. """ logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}") if df is None or df.empty: logging.warning("Mention sentiment: DataFrame is empty.") return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.") if sentiment_column not in df.columns: msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}" logging.warning(msg) return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg) try: df_copy = df.copy() sentiment_counts = df_copy[sentiment_column].value_counts() if sentiment_counts.empty: logging.info("Mention sentiment: No sentiment data after value_counts.") return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.") fig, ax = plt.subplots(figsize=(8, 5)) colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'} pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index] # Default color for unknown sentiments ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) ax.set_title('Mention Sentiment Distribution') ax.axis('equal') plt.tight_layout() logging.info("Successfully generated mention sentiment plot.") return fig except Exception as e: logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True) return create_placeholder_plot(title="Mention Sentiment Error", message=str(e)) finally: plt.close('all') def generate_follower_growth_plot(df, date_column='date', count_column='total_followers'): """ Generates a plot for follower growth over time. This function receives the *unfiltered* follower DataFrame. """ logging.info(f"Generating follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. Input df rows: {len(df) if df is not None else 'None'}") if df is None or df.empty: logging.warning("Follower growth: DataFrame is empty.") return create_placeholder_plot(title="Follower Growth Over Time", message="No follower data available.") if date_column not in df.columns or count_column not in df.columns: missing = [] if date_column not in df.columns: missing.append(date_column) if count_column not in df.columns: missing.append(count_column) msg = f"Follower growth: Columns missing: {missing}. Available: {df.columns.tolist()}" logging.warning(msg) return create_placeholder_plot(title="Follower Growth Over Time", message=msg) try: df_copy = df.copy() if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce') df_copy = df_copy.dropna(subset=[date_column, count_column]) if df_copy.empty: logging.info("Follower growth: DataFrame empty after NaNs dropped from date/count columns.") return create_placeholder_plot(title="Follower Growth Over Time", message="No valid data for follower growth.") df_copy = df_copy.sort_values(by=date_column) fig, ax = plt.subplots(figsize=(10, 5)) ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green') ax.set_title('Follower Growth Over Time') ax.set_xlabel('Date') ax.set_ylabel('Total Followers') ax.grid(True, linestyle='--', alpha=0.7) plt.xticks(rotation=45) plt.tight_layout() logging.info("Successfully generated follower growth plot.") return fig except Exception as e: logging.error(f"Error generating follower growth plot: {e}", exc_info=True) return create_placeholder_plot(title="Follower Growth Error", message=str(e)) finally: plt.close('all') if __name__ == '__main__': # Create dummy data for testing posts_data = { 'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03']), 'likes_count': [10, 5, 12, 8, 15, 3], 'comments_count': [2, 1, 3, 1, 4, 0], 'shares_count': [1, 0, 1, 1, 2, 0] } sample_posts_df = pd.DataFrame(posts_data) mentions_data = { 'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']), 'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral'] } sample_mentions_df = pd.DataFrame(mentions_data) follower_data = { 'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']), 'total_followers': [100, 105, 115, 120, 118] # Example data } sample_follower_stats_df = pd.DataFrame(follower_data) logging.info("--- Testing Plot Generations ---") fig1 = generate_posts_activity_plot(sample_posts_df.copy(), date_column='published_at') if fig1: logging.info("Posts activity plot generated.") # plt.show() for local test fig2 = generate_engagement_type_plot(sample_posts_df.copy()) if fig2: logging.info("Engagement type plot generated.") fig3 = generate_mentions_activity_plot(sample_mentions_df.copy(), date_column='date') if fig3: logging.info("Mentions activity plot generated.") fig4 = generate_mention_sentiment_plot(sample_mentions_df.copy()) if fig4: logging.info("Mention sentiment plot generated.") fig5 = generate_follower_growth_plot(sample_follower_stats_df.copy(), date_column='date', count_column='total_followers') if fig5: logging.info("Follower growth plot generated.") logging.info("--- Testing Placeholders ---") fig_placeholder = create_placeholder_plot() if fig_placeholder: logging.info("Placeholder plot generated.") empty_df = pd.DataFrame(columns=['published_at']) # Empty df with column fig_empty_posts = generate_posts_activity_plot(empty_df, date_column='published_at') if fig_empty_posts: logging.info("Empty posts activity plot (placeholder) generated.") df_no_col = pd.DataFrame({'some_other_date': pd.to_datetime(['2023-01-01'])}) fig_no_col_posts = generate_posts_activity_plot(df_no_col, date_column='published_at') if fig_no_col_posts: logging.info("Posts activity with missing column (placeholder) generated.") logging.info("Test script finished.")