Spaces:
Running
Running
import pandas as pd | |
import matplotlib.pyplot as plt | |
import logging | |
from io import BytesIO | |
import base64 | |
import numpy as np | |
import matplotlib.ticker as mticker | |
# Configure logging for this module | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s') | |
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."): | |
"""Creates a placeholder Matplotlib plot indicating no data or an error.""" | |
try: | |
fig, ax = plt.subplots(figsize=(8, 4)) | |
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True) | |
ax.axis('off') | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error creating placeholder plot: {e}") | |
# Fallback placeholder if the above fails | |
fig_err, ax_err = plt.subplots() | |
ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center') | |
ax_err.axis('off') | |
return fig_err | |
# No plt.close(fig) here as Gradio handles the figure object. | |
def generate_posts_activity_plot(df, date_column='published_at'): | |
"""Generates a plot for posts activity over time.""" | |
logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
logging.warning(f"Posts activity: DataFrame is empty.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.") | |
if date_column not in df.columns: | |
logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.") | |
try: | |
df_copy = df.copy() | |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column]) | |
if df_copy.empty: | |
logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.") | |
posts_over_time = df_copy.set_index(date_column).resample('D').size() | |
if posts_over_time.empty: | |
logging.info("Posts activity: No posts after resampling by day.") | |
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-') | |
ax.set_title('Posts Activity Over Time') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Number of Posts') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
logging.info("Successfully generated posts activity plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating posts activity plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Posts Activity Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='commentCount', shares_col='shareCount'): # Updated col names | |
"""Generates a bar plot for total engagement types (likes, comments, shares).""" | |
logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}") | |
required_cols = [likes_col, comments_col, shares_col] | |
if df is None or df.empty: | |
logging.warning("Engagement type: DataFrame is empty.") | |
return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.") | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}" | |
logging.warning(msg) | |
return create_placeholder_plot(title="Post Engagement Types", message=msg) | |
try: | |
df_copy = df.copy() | |
for col in required_cols: | |
df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0) | |
total_likes = df_copy[likes_col].sum() | |
total_comments = df_copy[comments_col].sum() | |
total_shares = df_copy[shares_col].sum() | |
if total_likes == 0 and total_comments == 0 and total_shares == 0: | |
logging.info("Engagement type: All engagement counts are zero.") | |
return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.") | |
engagement_data = { | |
'Likes': total_likes, | |
'Comments': total_comments, | |
'Shares': total_shares | |
} | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon']) | |
ax.set_title('Total Post Engagement Types') | |
ax.set_xlabel('Engagement Type') | |
ax.set_ylabel('Total Count') | |
ax.grid(axis='y', linestyle='--', alpha=0.7) | |
for bar in bars: | |
yval = bar.get_height() | |
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom') | |
plt.tight_layout() | |
logging.info("Successfully generated engagement type plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating engagement type plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Engagement Type Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_mentions_activity_plot(df, date_column='date'): | |
"""Generates a plot for mentions activity over time.""" | |
logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
logging.warning(f"Mentions activity: DataFrame is empty.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.") | |
if date_column not in df.columns: | |
logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.") | |
try: | |
df_copy = df.copy() | |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]): | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column]) | |
if df_copy.empty: | |
logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.") | |
mentions_over_time = df_copy.set_index(date_column).resample('D').size() | |
if mentions_over_time.empty: | |
logging.info("Mentions activity: No mentions after resampling by day.") | |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple') | |
ax.set_title('Mentions Activity Over Time') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Number of Mentions') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
logging.info("Successfully generated mentions activity plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Mentions Activity Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'): | |
"""Generates a pie chart for mention sentiment distribution.""" | |
logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
logging.warning("Mention sentiment: DataFrame is empty.") | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.") | |
if sentiment_column not in df.columns: | |
msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}" | |
logging.warning(msg) | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg) | |
try: | |
df_copy = df.copy() | |
sentiment_counts = df_copy[sentiment_column].value_counts() | |
if sentiment_counts.empty: | |
logging.info("Mention sentiment: No sentiment data after value_counts.") | |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.") | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
# Define a broader range of colors or a colormap for more sentiment types | |
colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts)) | |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))] | |
# Or keep your specific colors if sentiment labels are fixed: | |
# colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'} | |
# pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index] | |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors) | |
ax.set_title('Mention Sentiment Distribution') | |
ax.axis('equal') | |
plt.tight_layout() | |
logging.info("Successfully generated mention sentiment plot.") | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e)) | |
finally: | |
plt.close('all') | |
# --- Existing Follower Growth Plot (can be reused or adapted) --- | |
def generate_total_follower_growth_plot(df, date_column='date', count_column='total_followers'): | |
""" Generates a plot for TOTAL follower growth over time. """ | |
# This is your existing function, ensure it's called with the correct data for overall growth. | |
# For 'Follower Count Over Time (follower_gains_monthly)', we'll make a new specific one if structure differs. | |
logging.info(f"Generating total follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title="Total Follower Growth", message="No follower data.") | |
if date_column not in df.columns or count_column not in df.columns: | |
return create_placeholder_plot(title="Total Follower Growth", message=f"Missing columns: {date_column} or {count_column}.") | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, count_column]).sort_values(by=date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title="Total Follower Growth", message="No valid data after cleaning.") | |
fig, ax = plt.subplots(figsize=(10,5)) | |
ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green') | |
ax.set_title('Total Follower Growth Over Time') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Total Followers') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error in generate_total_follower_growth_plot: {e}", exc_info=True) | |
return create_placeholder_plot(title="Total Follower Growth Error", message=str(e)) | |
finally: | |
plt.close('all') | |
# --- New Plot Functions --- | |
def generate_followers_count_over_time_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'): | |
"""Generates a plot for specific follower counts over time (e.g., monthly gains).""" | |
title = f"Followers Count Over Time ({type_value})" | |
logging.info(f"Generating {title}. Date: '{date_column}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No follower data available.") | |
required_cols = [date_column, count_column, type_filter_column] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.") | |
try: | |
df_copy = df.copy() | |
df_filtered = df_copy[df_copy[type_filter_column] == type_value] | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") | |
df_filtered[date_column] = pd.to_datetime(df_filtered[date_column], errors='coerce') | |
df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce') | |
df_filtered = df_filtered.dropna(subset=[date_column, count_column]).sort_values(by=date_column) | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(df_filtered[date_column], df_filtered[count_column], marker='o', linestyle='-', color='dodgerblue') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Follower Count') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_followers_growth_rate_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'): | |
"""Calculates and plots follower growth rate over time.""" | |
title = f"Follower Growth Rate ({type_value})" | |
logging.info(f"Generating {title}. Date: '{date_column}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No follower data available.") | |
required_cols = [date_column, count_column, type_filter_column] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.") | |
try: | |
df_copy = df.copy() | |
df_filtered = df_copy[df_copy[type_filter_column] == type_value] | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.") | |
df_filtered[date_column] = pd.to_datetime(df_filtered[date_column], errors='coerce') | |
df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce') | |
df_filtered = df_filtered.dropna(subset=[date_column, count_column]).sort_values(by=date_column).set_index(date_column) | |
if df_filtered.empty or len(df_filtered) < 2: | |
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.") | |
# Calculate growth rate: (current - previous) / previous * 100 | |
# Ensure previous is not zero to avoid division by zero | |
df_filtered['growth_rate'] = df_filtered[count_column].pct_change() * 100 | |
# Replace inf with NaN (e.g. if previous was 0 and current is non-zero) then drop NaNs | |
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True) | |
df_filtered.dropna(subset=['growth_rate'], inplace=True) | |
if df_filtered.empty: | |
return create_placeholder_plot(title=title, message="No valid growth rate data after calculation.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(df_filtered.index, df_filtered['growth_rate'], marker='o', linestyle='-', color='lightcoral') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Growth Rate (%)') | |
ax.yaxis.set_major_formatter(mticker.PercentFormatter()) | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_followers_by_demographics_plot(df, category_col='category_name', count_column='follower_count_o', type_filter_column='follower_count_type', type_value=None, plot_title="Followers by Demographics"): | |
"""Generates a bar chart for follower demographics (e.g., by location, industry).""" | |
logging.info(f"Generating {plot_title}. Category: '{category_col}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=plot_title, message="No follower data available.") | |
required_cols = [category_col, count_column, type_filter_column] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}.") | |
if type_value is None: # Should be specified | |
return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.") | |
try: | |
df_copy = df.copy() | |
df_filtered = df_copy[df_copy[type_filter_column] == type_value] | |
if df_filtered.empty: | |
return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.") | |
df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce').fillna(0) | |
# Group by the category column and sum the count column | |
demographics_data = df_filtered.groupby(category_col)[count_column].sum().sort_values(ascending=False) | |
if demographics_data.empty: | |
return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.") | |
# Limit to top N for readability if too many categories | |
top_n = 10 | |
if len(demographics_data) > top_n: | |
demographics_data = demographics_data.head(top_n) | |
plot_title += f" (Top {top_n})" | |
fig, ax = plt.subplots(figsize=(10, 6) if len(demographics_data) > 5 else (8,5) ) | |
demographics_data.plot(kind='bar', ax=ax, color='teal') | |
ax.set_title(plot_title) | |
ax.set_xlabel(category_col.replace('_', ' ').title()) | |
ax.set_ylabel('Number of Followers') | |
ax.grid(axis='y', linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45, ha="right") | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {plot_title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{plot_title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'): | |
"""Generates a plot for engagement rate over time.""" | |
title = "Engagement Rate Over Time" | |
logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No post data for engagement rate.") | |
required_cols = [date_column, engagement_rate_col] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
# Assuming 'engagement' is already a rate (e.g., 0.05 for 5%). If it's an absolute count, this logic needs change. | |
df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning.") | |
# Resample daily and calculate mean engagement rate | |
engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean() | |
engagement_over_time = engagement_over_time.dropna() # Remove days with no data after resampling | |
if engagement_over_time.empty: | |
return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Engagement Rate') | |
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0 if engagement_over_time.max() <=1 else 100.0)) # Adjust based on rate scale | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'): # Using clickCount as proxy for Reach | |
"""Generates a plot for reach (clicks) over time.""" | |
title = "Reach Over Time (Clicks)" | |
logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No post data for reach.") | |
required_cols = [date_column, reach_col] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning.") | |
reach_over_time = df_copy.resample('D')[reach_col].sum() | |
if reach_over_time.empty and not df_copy.empty : # if original had data but resampling resulted in empty (e.g. all NaNs for sum) | |
pass # allow plot of zeros if that's the case | |
elif reach_over_time.sum() == 0 and not df_copy.empty : # if all values are zero | |
pass | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Total Clicks') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'): | |
"""Generates a plot for impressions over time.""" | |
title = "Impressions Over Time" | |
logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}") | |
if df is None or df.empty: | |
return create_placeholder_plot(title=title, message="No post data for impressions.") | |
required_cols = [date_column, impressions_col] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}") | |
try: | |
df_copy = df.copy() | |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce') | |
df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce') | |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column) | |
if df_copy.empty: | |
return create_placeholder_plot(title=title, message="No valid data after cleaning.") | |
impressions_over_time = df_copy.resample('D')[impressions_col].sum() | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue') | |
ax.set_title(title) | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Total Impressions') | |
ax.grid(True, linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
except Exception as e: | |
logging.error(f"Error generating {title}: {e}", exc_info=True) | |
return create_placeholder_plot(title=f"{title} Error", message=str(e)) | |
finally: | |
plt.close('all') | |
if __name__ == '__main__': | |
# Create dummy data for testing | |
# Posts Data (merged with stats) | |
posts_data = { | |
'id': [f'post{i}' for i in range(1, 7)], | |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']), | |
'likeCount': [10, 5, 12, 8, 15, 3, 20], | |
'commentCount': [2, 1, 3, 1, 4, 0, 5], | |
'shareCount': [1, 0, 1, 1, 2, 0, 1], | |
'clickCount': [20, 15, 30, 22, 40, 10, 50], | |
'impressionCount': [200, 150, 300, 220, 400, 100, 500], | |
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08] # Engagement Rate | |
} | |
sample_merged_posts_df = pd.DataFrame(posts_data) | |
# Follower Stats Data | |
follower_data = { | |
'date': pd.to_datetime(['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15', '2023-03-01', # For time series | |
'2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', # For demographics (snapshot) | |
'2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', | |
'2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01' | |
]), | |
'follower_count_type': ['follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', | |
'follower_geo', 'follower_geo', 'follower_geo', # Location | |
'follower_function', 'follower_function', 'follower_function', # Role | |
'follower_industry', 'follower_industry', 'follower_industry', # Industry | |
'follower_seniority', 'follower_seniority', 'follower_seniority', # Seniority | |
'total_followers_snapshot', 'total_followers_snapshot', 'total_followers_snapshot' # For existing total growth | |
], | |
'category_name': ['Jan', 'Jan-Mid', 'Feb', 'Feb-Mid', 'Mar', # Corresponds to follower_gains_monthly | |
'USA', 'Canada', 'UK', # Geo | |
'Engineering', 'Sales', 'Marketing', # Function/Role | |
'Tech', 'Finance', 'Healthcare', # Industry | |
'Senior', 'Junior', 'Manager', # Seniority | |
'Overall1', 'Overall2', 'Overall3' # For total_followers_snapshot | |
], | |
'follower_count_o': [100, 105, 115, 120, 130, # Counts for monthly gains | |
500, 300, 200, # Geo counts | |
400, 350, 250, # Role counts | |
600, 200, 200, # Industry counts | |
300, 400, 300, # Seniority counts | |
1000, 1010, 1025 # For total_followers_snapshot | |
], | |
'total_followers': [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,100,115,130] # For existing total growth plot | |
} | |
sample_follower_stats_df = pd.DataFrame(follower_data) | |
# Ensure 'total_followers' for generate_total_follower_growth_plot is correctly populated for its specific rows | |
sample_follower_stats_df.loc[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot', 'total_followers'] = sample_follower_stats_df['follower_count_o'] | |
logging.info("--- Testing New Plot Generations ---") | |
fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly') | |
if fig_followers_count: logging.info("Followers Count Over Time (monthly) plot generated.") | |
fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly') | |
if fig_followers_rate: logging.info("Followers Growth Rate (monthly) plot generated.") | |
fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location") | |
if fig_geo: logging.info("Followers by Location plot generated.") | |
fig_role = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_function', plot_title="Followers by Role") | |
if fig_role: logging.info("Followers by Role plot generated.") | |
fig_industry = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_industry', plot_title="Followers by Industry") | |
if fig_industry: logging.info("Followers by Industry plot generated.") | |
fig_seniority = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_seniority', plot_title="Followers by Seniority") | |
if fig_seniority: logging.info("Followers by Seniority plot generated.") | |
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy()) | |
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.") | |
fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy()) | |
if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.") | |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy()) | |
if fig_impressions: logging.info("Impressions Over Time plot generated.") | |
# Test existing total follower growth plot with appropriate data | |
total_followers_df = sample_follower_stats_df[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot'].copy() | |
total_followers_df['date'] = pd.to_datetime(total_followers_df['date']) # Ensure date is datetime | |
fig_total_growth = generate_total_follower_growth_plot(total_followers_df, date_column='date', count_column='total_followers') | |
if fig_total_growth: logging.info("Total Follower Growth plot (existing function) generated.") | |
logging.info("Test script finished. Review plots if displayed locally or saved.") | |