Spaces:
Running
Running
import pandas as pd | |
import matplotlib.pyplot as plt | |
import logging | |
from io import BytesIO | |
import base64 | |
import numpy as np | |
import matplotlib.ticker as mticker | |
import ast # For safely evaluating string representations of lists | |
# Configure logging for this module | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s') | |
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."): | |
"""Creates a placeholder Matplotlib plot indicating no data or an error.""" | |
try: | |
fig, ax = plt.subplots(figsize=(8, 4)) | |
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True) | |
ax.axis('off') | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error creating placeholder plot: {e}") | |
# Fallback placeholder if the above fails | |
fig_err, ax_err = plt.subplots() | |
ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center') | |
ax_err.axis('off') | |
return fig_err | |
# No plt.close(fig) here as Gradio handles the figure object. | |
def generate_posts_activity_plot(df, date_column='published_at'): | |
"""Generates a plot for posts activity over time.""" | |
logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
logging.warning(f"Posts activity: DataFrame is empty.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.") | |
if date_column not in df.columns: | |
logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.") | |
try: | |
df_copy = df.copy() | |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column]) | |
if df_copy.empty: | |
logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.") | |
posts_over_time = df_copy.set_index(date_column).resample('D').size() | |
if posts_over_time.empty: | |
logging.info("Posts activity: No posts after resampling by day.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-') | |
ax.set_title('Posts Activity Over Time') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Number of Posts') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
logging.info("Successfully generated posts activity plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating posts activity plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Posts Activity Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='commentCount', shares_col='shareCount'): # Updated col names | |
"""Generates a bar plot for total engagement types (likes, comments, shares).""" | |
logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}") | |
required_cols = [likes_col, comments_col, shares_col] | |
if df is None or df.empty: | |
logging.warning("Engagement type: DataFrame is empty.") | |
return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.") | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}" | |
logging.warning(msg) | |
return create_placeholder_plot(title="Post Engagement Types", message=msg) | |
try: | |
df_copy = df.copy() | |
for col in required_cols: | |
df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0) | |
total_likes = df_copy[likes_col].sum() | |
total_comments = df_copy[comments_col].sum() | |
total_shares = df_copy[shares_col].sum() | |
if total_likes == 0 and total_comments == 0 and total_shares == 0: | |
logging.info("Engagement type: All engagement counts are zero.") | |
return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.") | |
engagement_data = { | |
'Likes': total_likes, | |
'Comments': total_comments, | |
'Shares': total_shares | |
} | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon']) | |
ax.set_title('Total Post Engagement Types') | |
ax.set_xlabel('Engagement Type') | |
ax.set_ylabel('Total Count') | |
ax.grid(axis='y', linestyle='--', alpha=0.7) | |
for bar in bars: | |
yval = bar.get_height() | |
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom') | |
plt.tight_layout() | |
logging.info("Successfully generated engagement type plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating engagement type plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Engagement Type Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_mentions_activity_plot(df, date_column='date'): | |
"""Generates a plot for mentions activity over time.""" | |
logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
logging.warning(f"Mentions activity: DataFrame is empty.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.") | |
if date_column not in df.columns: | |
logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.") | |
try: | |
df_copy = df.copy() | |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column]) | |
if df_copy.empty: | |
logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.") | |
mentions_over_time = df_copy.set_index(date_column).resample('D').size() | |
if mentions_over_time.empty: | |
logging.info("Mentions activity: No mentions after resampling by day.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple') | |
ax.set_title('Mentions Activity Over Time') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Number of Mentions') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
logging.info("Successfully generated mentions activity plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Mentions Activity Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'): | |
"""Generates a pie chart for mention sentiment distribution.""" | |
logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
logging.warning("Mention sentiment: DataFrame is empty.") | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.") | |
if sentiment_column not in df.columns: | |
msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}" | |
logging.warning(msg) | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg) | |
try: | |
df_copy = df.copy() | |
sentiment_counts = df_copy[sentiment_column].value_counts() | |
if sentiment_counts.empty: | |
logging.info("Mention sentiment: No sentiment data after value_counts.") | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.") | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
# Using a qualitative colormap like 'Pastel1' or 'Set3' can be good for categorical data | |
colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts)) | |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))] | |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) | |
ax.set_title('Mention Sentiment Distribution') | |
ax.axis('equal') | |
plt.tight_layout() | |
logging.info("Successfully generated mention sentiment plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e)) | |
finally: | |
plt.close('all') | |
# --- Updated Follower Plot Functions --- | |
def generate_followers_count_over_time_plot(df, date_info_column='category_name', | |
organic_count_col='follower_count_organic', | |
paid_count_col='follower_count_paid', | |
type_filter_column='follower_count_type', | |
type_value='follower_gains_monthly'): | |
""" | |
Generates a plot for specific follower counts (organic and paid) over time. | |
Date information is expected in 'date_info_column' as strings (e.g., "2024-08-01"). | |
""" | |
title = f"Followers Count Over Time ({type_value})" | |
logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No follower data available.") | |
required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() # Use .copy() to avoid SettingWithCopyWarning | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") | |
# Convert date_info_column to datetime | |
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce') | |
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0) | |
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0) | |
df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj') | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers') | |
ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Follower Count') | |
ax.legend() | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_followers_growth_rate_plot(df, date_info_column='category_name', | |
organic_count_col='follower_count_organic', | |
paid_count_col='follower_count_paid', | |
type_filter_column='follower_count_type', | |
type_value='follower_gains_monthly'): | |
""" | |
Calculates and plots follower growth rate (organic and paid) over time. | |
Date information is expected in 'date_info_column' as strings (e.g., "2024-08-01"). | |
""" | |
title = f"Follower Growth Rate ({type_value})" | |
logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No follower data available.") | |
required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") | |
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce') | |
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce') | |
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce') | |
df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj') | |
if df_filtered.empty or len(df_filtered) < 2: # Need at least 2 points for pct_change | |
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.") | |
df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100 | |
df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100 | |
# Replace inf with NaN then drop NaNs for growth rates | |
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True) | |
# df_filtered.dropna(subset=['organic_growth_rate', 'paid_growth_rate'], how='all', inplace=True) # Keep row if at least one rate is valid | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
plotted_organic = False | |
if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty: | |
ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate') | |
plotted_organic = True | |
plotted_paid = False | |
if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty: | |
ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate') | |
plotted_paid = True | |
if not plotted_organic and not plotted_paid: | |
return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.") | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Growth Rate (%)') | |
ax.yaxis.set_major_formatter(mticker.PercentFormatter()) | |
ax.legend() | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_followers_by_demographics_plot(df, category_col='category_name', | |
organic_count_col='follower_count_organic', | |
paid_count_col='follower_count_paid', | |
type_filter_column='follower_count_type', | |
type_value=None, plot_title="Followers by Demographics"): | |
""" | |
Generates a grouped bar chart for follower demographics (organic and paid). | |
'category_col' here is the demographic attribute (e.g., Location, Industry). | |
""" | |
logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=plot_title, message="No follower data available.") | |
required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") | |
if type_value is None: | |
return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.") | |
try: | |
df_copy = df.copy() | |
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() | |
if df_filtered.empty: | |
return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.") | |
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0) | |
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0) | |
demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum() | |
# Sort by total followers (organic + paid) for better visualization | |
demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col] | |
demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort']) | |
if demographics_data.empty: | |
return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.") | |
top_n = 10 | |
if len(demographics_data) > top_n: | |
demographics_data = demographics_data.head(top_n) | |
plot_title_updated = f"{plot_title} (Top {top_n})" | |
else: | |
plot_title_updated = plot_title | |
fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) ) | |
bar_width = 0.35 | |
index = np.arange(len(demographics_data.index)) | |
bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color='skyblue') | |
bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color='lightcoral') | |
ax.set_title(plot_title_updated) | |
ax.set_xlabel(category_col.replace('_', ' ').title()) | |
ax.set_ylabel('Number of Followers') | |
ax.set_xticks(index) | |
ax.set_xticklabels(demographics_data.index, rotation=45, ha="right") | |
ax.legend() | |
ax.grid(axis='y', linestyle='--', alpha=0.7) | |
# Add labels on top of bars | |
for bar_group in [bars1, bars2]: | |
for bar in bar_group: | |
yval = bar.get_height() | |
if yval > 0: # Only add label if value is not zero | |
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]), | |
str(int(yval)), ha='center', va='bottom', fontsize=8) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {plot_title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{plot_title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'): | |
"""Generates a plot for engagement rate over time.""" | |
title = "Engagement Rate Over Time" | |
logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No post data for engagement rate.") | |
required_cols = [date_column, engagement_rate_col] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning.") | |
engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean() | |
engagement_over_time = engagement_over_time.dropna() | |
if engagement_over_time.empty: | |
return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Engagement Rate') | |
# Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100 | |
max_rate_val = engagement_over_time.max() | |
formatter_xmax = 1.0 if max_rate_val <= 1.5 and max_rate_val >=0 else 100.0 # Heuristic for 0-1 vs 0-100 scale | |
if max_rate_val > 1.5 and formatter_xmax == 1.0: # If data seems to be percentage but formatted as decimal | |
formatter_xmax = 100.0 | |
elif max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1 | |
formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting | |
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax)) | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'): | |
"""Generates a plot for reach (clicks) over time.""" | |
title = "Reach Over Time (Clicks)" | |
logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No post data for reach.") | |
required_cols = [date_column, reach_col] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column) | |
if df_copy.empty: # After dropping NaNs for essential columns | |
return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.") | |
reach_over_time = df_copy.resample('D')[reach_col].sum() | |
# No need to check if reach_over_time is empty if df_copy wasn't, sum of NaNs is 0. | |
# Plot will show 0 if all sums are 0. | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Total Clicks') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'): | |
"""Generates a plot for impressions over time.""" | |
title = "Impressions Over Time" | |
logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No post data for impressions.") | |
required_cols = [date_column, impressions_col] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column) | |
if df_copy.empty: # After dropping NaNs for essential columns | |
return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.") | |
impressions_over_time = df_copy.resample('D')[impressions_col].sum() | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Total Impressions') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
# --- New Plot Functions from User Request --- | |
def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'): | |
"""Generates a plot for likes over time.""" | |
title = "Reactions (Likes) Over Time" | |
logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No post data for likes.") | |
required_cols = [date_column, likes_col] | |
if any(col not in df.columns for col in required_cols): | |
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning.") | |
data_over_time = df_copy.resample('D')[likes_col].sum() | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Total Likes') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'): | |
"""Generates a plot for clicks over time (can be same as reach if clicks are primary reach metric).""" | |
# This is essentially the same as generate_reach_over_time_plot if reach_col is 'clickCount'. | |
# For clarity, keeping it separate if user wants to distinguish or use a different column later. | |
title = "Clicks Over Time" | |
logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}") | |
# Reusing logic from generate_reach_over_time_plot | |
return generate_reach_over_time_plot(df, date_column, clicks_col) | |
def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'): | |
"""Generates a plot for shares over time.""" | |
title = "Shares Over Time" | |
logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No post data for shares.") | |
required_cols = [date_column, shares_col] | |
if any(col not in df.columns for col in required_cols): | |
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning.") | |
data_over_time = df_copy.resample('D')[shares_col].sum() | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Total Shares') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'): | |
"""Generates a plot for comments over time.""" | |
title = "Comments Over Time" | |
logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No post data for comments.") | |
required_cols = [date_column, comments_col] | |
if any(col not in df.columns for col in required_cols): | |
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning.") | |
data_over_time = df_copy.resample('D')[comments_col].sum() | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Total Comments') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', date_column=None): | |
""" | |
Generates a pie chart for comment sentiment distribution. | |
Assumes df might be post-level with an aggregated or example sentiment, | |
or ideally, a comment-level df with sentiment per comment. | |
If date_column is provided, it's for logging/context but not directly used for filtering here. | |
""" | |
title = "Breakdown of Comments by Sentiment" | |
logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No data for comment sentiment.") | |
if sentiment_column not in df.columns: | |
# Check for a common alternative if the primary is missing (e.g. from post-level data) | |
if 'sentiment' in df.columns and sentiment_column != 'sentiment': | |
logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.") | |
sentiment_column = 'sentiment' # Use fallback | |
else: | |
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}") | |
# If the sentiment column has no valid data (all NaNs, or not convertible) | |
if df[sentiment_column].isnull().all(): | |
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.") | |
try: | |
df_copy = df.copy() | |
# Ensure the sentiment column is treated as categorical (string) | |
df_copy[sentiment_column] = df_copy[sentiment_column].astype(str) | |
sentiment_counts = df_copy[sentiment_column].value_counts().dropna() # Dropna for safety | |
if sentiment_counts.empty or sentiment_counts.sum() == 0: | |
return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.") | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts)) | |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))] | |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) | |
ax.set_title(title) | |
ax.axis('equal') | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
# --- NEW PLOT FUNCTIONS FOR CONTENT STRATEGY --- | |
def generate_post_frequency_plot(df, date_column='published_at', resample_period='D'): | |
"""Generates a plot for post frequency over time (e.g., daily, weekly, monthly).""" | |
title = f"Post Frequency Over Time ({resample_period})" | |
logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No data available.") | |
if date_column not in df.columns: | |
return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.") | |
try: | |
df_copy = df.copy() | |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column]) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid date entries found.") | |
post_frequency = df_copy.set_index(date_column).resample(resample_period).size() | |
if post_frequency.empty: | |
return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None) | |
ax.set_title(title) | |
ax.set_xlabel('Date' if resample_period == 'D' else 'Period') | |
ax.set_ylabel('Number of Posts') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
logging.info(f"Successfully generated {title} plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_content_format_breakdown_plot(df, format_col='media_type'): | |
"""Generates a bar chart for breakdown of content by format.""" | |
title = "Breakdown of Content by Format" | |
logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No data available.") | |
if format_col not in df.columns: | |
return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
format_counts = df_copy[format_col].value_counts().dropna() | |
if format_counts.empty: | |
return create_placeholder_plot(title=title, message="No content format data available.") | |
fig, ax = plt.subplots(figsize=(8, 6)) | |
format_counts.plot(kind='bar', ax=ax, color='skyblue') | |
ax.set_title(title) | |
ax.set_xlabel('Media Type') | |
ax.set_ylabel('Number of Posts') | |
ax.grid(axis='y', linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45, ha="right") | |
plt.tight_layout() | |
# Add counts on top of bars | |
for i, v in enumerate(format_counts): | |
ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom') | |
logging.info(f"Successfully generated {title} plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def _parse_eb_label(label_data): | |
"""Helper to parse eb_labels which might be lists or string representations of lists.""" | |
if isinstance(label_data, list): | |
return label_data | |
if isinstance(label_data, str): | |
try: | |
# Try to evaluate as a list | |
parsed = ast.literal_eval(label_data) | |
if isinstance(parsed, list): | |
return parsed | |
# If it's a single string not in list format, treat as a single label | |
return [str(parsed)] | |
except (ValueError, SyntaxError): | |
# If not a list string, treat the whole string as one label | |
return [label_data] if label_data.strip() else [] | |
if pd.isna(label_data): | |
return [] | |
return [] # Default for other types | |
def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15): | |
"""Generates a horizontal bar chart for breakdown of content by topics.""" | |
title = f"Breakdown of Content by Topics (Top {top_n})" | |
logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No data available.") | |
if topics_col not in df.columns: | |
return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
# Apply parsing and explode | |
parsed_labels = df_copy[topics_col].apply(_parse_eb_label) | |
exploded_labels = parsed_labels.explode().dropna() | |
if exploded_labels.empty: | |
return create_placeholder_plot(title=title, message="No topic data found after processing labels.") | |
topic_counts = exploded_labels.value_counts() | |
if topic_counts.empty: | |
return create_placeholder_plot(title=title, message="No topics to display after counting.") | |
# Take top N and sort for plotting (descending for horizontal bar) | |
top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True) | |
fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6)) | |
top_topics.plot(kind='barh', ax=ax, color='mediumseagreen') | |
ax.set_title(title) | |
ax.set_xlabel('Number of Posts') | |
ax.set_ylabel('Topic') | |
# Add counts next to bars | |
for i, (topic, count) in enumerate(top_topics.items()): | |
ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center') | |
plt.tight_layout() | |
logging.info(f"Successfully generated {title} plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
if __name__ == '__main__': | |
# Create dummy data for testing | |
posts_data = { | |
'id': [f'post{i}' for i in range(1, 8)], # Increased to 7 for more data | |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']), | |
'likeCount': [10, 5, 12, 8, 15, 3, 20], | |
'commentCount': [2, 1, 3, 1, 4, 0, 5], | |
'shareCount': [1, 0, 1, 1, 2, 0, 1], | |
'clickCount': [20, 15, 30, 22, 40, 10, 50], | |
'impressionCount': [200, 150, 300, 220, 400, 100, 500], | |
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08], | |
'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'], # New column | |
'eb_labels': [ # New column with various formats | |
"['AI', 'Tech']", | |
['Innovation'], | |
'General', | |
None, | |
['Tech', 'Future'], | |
"['AI', 'Development']", | |
['Tech'] | |
], | |
'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive'] # For comment sentiment plot | |
} | |
sample_merged_posts_df = pd.DataFrame(posts_data) | |
# Updated Follower Stats Data | |
follower_data = { | |
'follower_count_type': [ | |
'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', | |
'follower_geo', 'follower_geo', 'follower_geo', | |
'follower_function', 'follower_function', | |
'follower_industry', 'follower_industry', | |
'follower_seniority', 'follower_seniority' | |
], | |
'category_name': [ | |
'2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains | |
'USA', 'Canada', 'UK', # Geo | |
'Engineering', 'Sales', # Function/Role | |
'Tech', 'Finance', # Industry | |
'Senior', 'Junior' # Seniority | |
], | |
'follower_count_organic': [ | |
100, 110, 125, # Organic monthly gains | |
500, 300, 150, # Organic Geo counts | |
400, 200, # Organic Role counts | |
250, 180, # Organic Industry counts | |
300, 220 # Organic Seniority counts | |
], | |
'follower_count_paid': [ | |
20, 30, 25, # Paid monthly gains | |
50, 40, 60, # Paid Geo counts | |
30, 20, # Paid Role counts | |
45, 35, # Paid Industry counts | |
60, 40 # Paid Seniority counts | |
] | |
} | |
sample_follower_stats_df = pd.DataFrame(follower_data) | |
logging.info("--- Testing Existing Plot Generations ---") | |
# ... (keep existing tests for older plots) ... | |
fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy()) | |
if fig_posts_activity: logging.info("Posts activity plot generated.") | |
fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy()) | |
if fig_engagement_type: logging.info("Engagement type plot generated.") | |
mentions_data = { | |
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']), | |
'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral'] | |
} | |
sample_mentions_df = pd.DataFrame(mentions_data) | |
fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy()) | |
if fig_mentions_activity: logging.info("Mentions activity plot generated.") | |
fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy()) | |
if fig_mention_sentiment: logging.info("Mention sentiment plot generated.") | |
fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly') | |
if fig_followers_count: logging.info("Followers Count Over Time plot generated.") | |
fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly') | |
if fig_followers_rate: logging.info("Followers Growth Rate plot generated.") | |
fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location") | |
if fig_geo: logging.info("Followers by Location plot generated.") | |
# ... add other follower demographic tests ... | |
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy()) | |
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.") | |
fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy()) | |
if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.") | |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy()) | |
if fig_impressions: logging.info("Impressions Over Time plot generated.") | |
fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy()) | |
if fig_likes_time: logging.info("Likes Over Time plot generated.") | |
fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy()) # Uses reach logic | |
if fig_clicks_time: logging.info("Clicks Over Time plot generated.") | |
fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy()) | |
if fig_shares_time: logging.info("Shares Over Time plot generated.") | |
fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy()) | |
if fig_comments_time: logging.info("Comments Over Time plot generated.") | |
fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy(), sentiment_column='comment_sentiment') | |
if fig_comments_sentiment: logging.info("Comments Sentiment Breakdown plot generated.") | |
logging.info("--- Testing NEW Plot Generations for Content Strategy ---") | |
fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D') | |
if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.") | |
fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W') | |
if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.") | |
fig_content_format = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='media_type') | |
if fig_content_format: logging.info("Content Format Breakdown plot generated.") | |
fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5) | |
if fig_content_topics: logging.info("Content Topic Breakdown plot generated.") | |
# Test with missing columns / empty data for new plots | |
logging.info("--- Testing NEW Plot Generations with Edge Cases ---") | |
empty_df = pd.DataFrame() | |
fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy()) | |
if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.") | |
fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col') | |
if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.") | |
fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels') # eb_labels won't exist | |
if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.") | |
df_no_topics_data = sample_merged_posts_df.copy() | |
df_no_topics_data['eb_labels'] = None | |
fig_content_topics_all_none = generate_content_topic_breakdown_plot(df_no_topics_data, topics_col='eb_labels') | |
if fig_content_topics_all_none: logging.info("Content Topic (all None labels) placeholder generated.") | |
logging.info("Test script finished. Review plots if displayed locally or saved.") | |