LinkedinMonitor / analytics_plot_generators.py
GuglielmoTor's picture
Update analytics_plot_generators.py
8019346 verified
raw
history blame
33.2 kB
import pandas as pd
import matplotlib.pyplot as plt
import logging
from io import BytesIO
import base64
import numpy as np
import matplotlib.ticker as mticker
# Configure logging for this module
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
"""Creates a placeholder Matplotlib plot indicating no data or an error."""
try:
fig, ax = plt.subplots(figsize=(8, 4))
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True)
ax.axis('off')
plt.tight_layout()
return fig
except Exception as e:
logging.error(f"Error creating placeholder plot: {e}")
# Fallback placeholder if the above fails
fig_err, ax_err = plt.subplots()
ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center')
ax_err.axis('off')
return fig_err
# No plt.close(fig) here as Gradio handles the figure object.
def generate_posts_activity_plot(df, date_column='published_at'):
"""Generates a plot for posts activity over time."""
logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
logging.warning(f"Posts activity: DataFrame is empty.")
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.")
if date_column not in df.columns:
logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
try:
df_copy = df.copy()
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
df_copy = df_copy.dropna(subset=[date_column])
if df_copy.empty:
logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.")
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")
posts_over_time = df_copy.set_index(date_column).resample('D').size()
if posts_over_time.empty:
logging.info("Posts activity: No posts after resampling by day.")
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
fig, ax = plt.subplots(figsize=(10, 5))
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
ax.set_title('Posts Activity Over Time')
ax.set_xlabel('Date')
ax.set_ylabel('Number of Posts')
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
logging.info("Successfully generated posts activity plot.")
return fig
except Exception as e:
logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
return create_placeholder_plot(title="Posts Activity Error", message=str(e))
finally:
plt.close('all')
def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='commentCount', shares_col='shareCount'): # Updated col names
"""Generates a bar plot for total engagement types (likes, comments, shares)."""
logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
required_cols = [likes_col, comments_col, shares_col]
if df is None or df.empty:
logging.warning("Engagement type: DataFrame is empty.")
return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.")
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}"
logging.warning(msg)
return create_placeholder_plot(title="Post Engagement Types", message=msg)
try:
df_copy = df.copy()
for col in required_cols:
df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
total_likes = df_copy[likes_col].sum()
total_comments = df_copy[comments_col].sum()
total_shares = df_copy[shares_col].sum()
if total_likes == 0 and total_comments == 0 and total_shares == 0:
logging.info("Engagement type: All engagement counts are zero.")
return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.")
engagement_data = {
'Likes': total_likes,
'Comments': total_comments,
'Shares': total_shares
}
fig, ax = plt.subplots(figsize=(8, 5))
bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon'])
ax.set_title('Total Post Engagement Types')
ax.set_xlabel('Engagement Type')
ax.set_ylabel('Total Count')
ax.grid(axis='y', linestyle='--', alpha=0.7)
for bar in bars:
yval = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom')
plt.tight_layout()
logging.info("Successfully generated engagement type plot.")
return fig
except Exception as e:
logging.error(f"Error generating engagement type plot: {e}", exc_info=True)
return create_placeholder_plot(title="Engagement Type Error", message=str(e))
finally:
plt.close('all')
def generate_mentions_activity_plot(df, date_column='date'):
"""Generates a plot for mentions activity over time."""
logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
logging.warning(f"Mentions activity: DataFrame is empty.")
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.")
if date_column not in df.columns:
logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
try:
df_copy = df.copy()
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
df_copy = df_copy.dropna(subset=[date_column])
if df_copy.empty:
logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.")
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")
mentions_over_time = df_copy.set_index(date_column).resample('D').size()
if mentions_over_time.empty:
logging.info("Mentions activity: No mentions after resampling by day.")
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")
fig, ax = plt.subplots(figsize=(10, 5))
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple')
ax.set_title('Mentions Activity Over Time')
ax.set_xlabel('Date')
ax.set_ylabel('Number of Mentions')
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
logging.info("Successfully generated mentions activity plot.")
return fig
except Exception as e:
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
finally:
plt.close('all')
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
"""Generates a pie chart for mention sentiment distribution."""
logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
logging.warning("Mention sentiment: DataFrame is empty.")
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.")
if sentiment_column not in df.columns:
msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}"
logging.warning(msg)
return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg)
try:
df_copy = df.copy()
sentiment_counts = df_copy[sentiment_column].value_counts()
if sentiment_counts.empty:
logging.info("Mention sentiment: No sentiment data after value_counts.")
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
fig, ax = plt.subplots(figsize=(8, 5))
# Define a broader range of colors or a colormap for more sentiment types
colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
# Or keep your specific colors if sentiment labels are fixed:
# colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'}
# pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index]
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
ax.set_title('Mention Sentiment Distribution')
ax.axis('equal')
plt.tight_layout()
logging.info("Successfully generated mention sentiment plot.")
return fig
except Exception as e:
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
finally:
plt.close('all')
# --- Existing Follower Growth Plot (can be reused or adapted) ---
def generate_total_follower_growth_plot(df, date_column='date', count_column='total_followers'):
""" Generates a plot for TOTAL follower growth over time. """
# This is your existing function, ensure it's called with the correct data for overall growth.
# For 'Follower Count Over Time (follower_gains_monthly)', we'll make a new specific one if structure differs.
logging.info(f"Generating total follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. DF rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
return create_placeholder_plot(title="Total Follower Growth", message="No follower data.")
if date_column not in df.columns or count_column not in df.columns:
return create_placeholder_plot(title="Total Follower Growth", message=f"Missing columns: {date_column} or {count_column}.")
try:
df_copy = df.copy()
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce')
df_copy = df_copy.dropna(subset=[date_column, count_column]).sort_values(by=date_column)
if df_copy.empty:
return create_placeholder_plot(title="Total Follower Growth", message="No valid data after cleaning.")
fig, ax = plt.subplots(figsize=(10,5))
ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green')
ax.set_title('Total Follower Growth Over Time')
ax.set_xlabel('Date')
ax.set_ylabel('Total Followers')
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
return fig
except Exception as e:
logging.error(f"Error in generate_total_follower_growth_plot: {e}", exc_info=True)
return create_placeholder_plot(title="Total Follower Growth Error", message=str(e))
finally:
plt.close('all')
# --- New Plot Functions ---
def generate_followers_count_over_time_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'):
"""Generates a plot for specific follower counts over time (e.g., monthly gains)."""
title = f"Followers Count Over Time ({type_value})"
logging.info(f"Generating {title}. Date: '{date_column}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
return create_placeholder_plot(title=title, message="No follower data available.")
required_cols = [date_column, count_column, type_filter_column]
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.")
try:
df_copy = df.copy()
df_filtered = df_copy[df_copy[type_filter_column] == type_value]
if df_filtered.empty:
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
df_filtered[date_column] = pd.to_datetime(df_filtered[date_column], errors='coerce')
df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce')
df_filtered = df_filtered.dropna(subset=[date_column, count_column]).sort_values(by=date_column)
if df_filtered.empty:
return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.")
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(df_filtered[date_column], df_filtered[count_column], marker='o', linestyle='-', color='dodgerblue')
ax.set_title(title)
ax.set_xlabel('Date')
ax.set_ylabel('Follower Count')
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
return fig
except Exception as e:
logging.error(f"Error generating {title}: {e}", exc_info=True)
return create_placeholder_plot(title=f"{title} Error", message=str(e))
finally:
plt.close('all')
def generate_followers_growth_rate_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'):
"""Calculates and plots follower growth rate over time."""
title = f"Follower Growth Rate ({type_value})"
logging.info(f"Generating {title}. Date: '{date_column}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
return create_placeholder_plot(title=title, message="No follower data available.")
required_cols = [date_column, count_column, type_filter_column]
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.")
try:
df_copy = df.copy()
df_filtered = df_copy[df_copy[type_filter_column] == type_value]
if df_filtered.empty:
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
df_filtered[date_column] = pd.to_datetime(df_filtered[date_column], errors='coerce')
df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce')
df_filtered = df_filtered.dropna(subset=[date_column, count_column]).sort_values(by=date_column).set_index(date_column)
if df_filtered.empty or len(df_filtered) < 2:
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
# Calculate growth rate: (current - previous) / previous * 100
# Ensure previous is not zero to avoid division by zero
df_filtered['growth_rate'] = df_filtered[count_column].pct_change() * 100
# Replace inf with NaN (e.g. if previous was 0 and current is non-zero) then drop NaNs
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
df_filtered.dropna(subset=['growth_rate'], inplace=True)
if df_filtered.empty:
return create_placeholder_plot(title=title, message="No valid growth rate data after calculation.")
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(df_filtered.index, df_filtered['growth_rate'], marker='o', linestyle='-', color='lightcoral')
ax.set_title(title)
ax.set_xlabel('Date')
ax.set_ylabel('Growth Rate (%)')
ax.yaxis.set_major_formatter(mticker.PercentFormatter())
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
return fig
except Exception as e:
logging.error(f"Error generating {title}: {e}", exc_info=True)
return create_placeholder_plot(title=f"{title} Error", message=str(e))
finally:
plt.close('all')
def generate_followers_by_demographics_plot(df, category_col='category_name', count_column='follower_count_o', type_filter_column='follower_count_type', type_value=None, plot_title="Followers by Demographics"):
"""Generates a bar chart for follower demographics (e.g., by location, industry)."""
logging.info(f"Generating {plot_title}. Category: '{category_col}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
return create_placeholder_plot(title=plot_title, message="No follower data available.")
required_cols = [category_col, count_column, type_filter_column]
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}.")
if type_value is None: # Should be specified
return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
try:
df_copy = df.copy()
df_filtered = df_copy[df_copy[type_filter_column] == type_value]
if df_filtered.empty:
return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.")
df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce').fillna(0)
# Group by the category column and sum the count column
demographics_data = df_filtered.groupby(category_col)[count_column].sum().sort_values(ascending=False)
if demographics_data.empty:
return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
# Limit to top N for readability if too many categories
top_n = 10
if len(demographics_data) > top_n:
demographics_data = demographics_data.head(top_n)
plot_title += f" (Top {top_n})"
fig, ax = plt.subplots(figsize=(10, 6) if len(demographics_data) > 5 else (8,5) )
demographics_data.plot(kind='bar', ax=ax, color='teal')
ax.set_title(plot_title)
ax.set_xlabel(category_col.replace('_', ' ').title())
ax.set_ylabel('Number of Followers')
ax.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
return fig
except Exception as e:
logging.error(f"Error generating {plot_title}: {e}", exc_info=True)
return create_placeholder_plot(title=f"{plot_title} Error", message=str(e))
finally:
plt.close('all')
def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'):
"""Generates a plot for engagement rate over time."""
title = "Engagement Rate Over Time"
logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
return create_placeholder_plot(title=title, message="No post data for engagement rate.")
required_cols = [date_column, engagement_rate_col]
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
try:
df_copy = df.copy()
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
# Assuming 'engagement' is already a rate (e.g., 0.05 for 5%). If it's an absolute count, this logic needs change.
df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce')
df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column)
if df_copy.empty:
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
# Resample daily and calculate mean engagement rate
engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
engagement_over_time = engagement_over_time.dropna() # Remove days with no data after resampling
if engagement_over_time.empty:
return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange')
ax.set_title(title)
ax.set_xlabel('Date')
ax.set_ylabel('Engagement Rate')
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0 if engagement_over_time.max() <=1 else 100.0)) # Adjust based on rate scale
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
return fig
except Exception as e:
logging.error(f"Error generating {title}: {e}", exc_info=True)
return create_placeholder_plot(title=f"{title} Error", message=str(e))
finally:
plt.close('all')
def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'): # Using clickCount as proxy for Reach
"""Generates a plot for reach (clicks) over time."""
title = "Reach Over Time (Clicks)"
logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
return create_placeholder_plot(title=title, message="No post data for reach.")
required_cols = [date_column, reach_col]
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
try:
df_copy = df.copy()
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
if df_copy.empty:
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
reach_over_time = df_copy.resample('D')[reach_col].sum()
if reach_over_time.empty and not df_copy.empty : # if original had data but resampling resulted in empty (e.g. all NaNs for sum)
pass # allow plot of zeros if that's the case
elif reach_over_time.sum() == 0 and not df_copy.empty : # if all values are zero
pass
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
ax.set_title(title)
ax.set_xlabel('Date')
ax.set_ylabel('Total Clicks')
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
return fig
except Exception as e:
logging.error(f"Error generating {title}: {e}", exc_info=True)
return create_placeholder_plot(title=f"{title} Error", message=str(e))
finally:
plt.close('all')
def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'):
"""Generates a plot for impressions over time."""
title = "Impressions Over Time"
logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
return create_placeholder_plot(title=title, message="No post data for impressions.")
required_cols = [date_column, impressions_col]
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
try:
df_copy = df.copy()
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
if df_copy.empty:
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue')
ax.set_title(title)
ax.set_xlabel('Date')
ax.set_ylabel('Total Impressions')
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
return fig
except Exception as e:
logging.error(f"Error generating {title}: {e}", exc_info=True)
return create_placeholder_plot(title=f"{title} Error", message=str(e))
finally:
plt.close('all')
if __name__ == '__main__':
# Create dummy data for testing
# Posts Data (merged with stats)
posts_data = {
'id': [f'post{i}' for i in range(1, 7)],
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
'likeCount': [10, 5, 12, 8, 15, 3, 20],
'commentCount': [2, 1, 3, 1, 4, 0, 5],
'shareCount': [1, 0, 1, 1, 2, 0, 1],
'clickCount': [20, 15, 30, 22, 40, 10, 50],
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08] # Engagement Rate
}
sample_merged_posts_df = pd.DataFrame(posts_data)
# Follower Stats Data
follower_data = {
'date': pd.to_datetime(['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15', '2023-03-01', # For time series
'2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', # For demographics (snapshot)
'2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01',
'2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01'
]),
'follower_count_type': ['follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
'follower_geo', 'follower_geo', 'follower_geo', # Location
'follower_function', 'follower_function', 'follower_function', # Role
'follower_industry', 'follower_industry', 'follower_industry', # Industry
'follower_seniority', 'follower_seniority', 'follower_seniority', # Seniority
'total_followers_snapshot', 'total_followers_snapshot', 'total_followers_snapshot' # For existing total growth
],
'category_name': ['Jan', 'Jan-Mid', 'Feb', 'Feb-Mid', 'Mar', # Corresponds to follower_gains_monthly
'USA', 'Canada', 'UK', # Geo
'Engineering', 'Sales', 'Marketing', # Function/Role
'Tech', 'Finance', 'Healthcare', # Industry
'Senior', 'Junior', 'Manager', # Seniority
'Overall1', 'Overall2', 'Overall3' # For total_followers_snapshot
],
'follower_count_o': [100, 105, 115, 120, 130, # Counts for monthly gains
500, 300, 200, # Geo counts
400, 350, 250, # Role counts
600, 200, 200, # Industry counts
300, 400, 300, # Seniority counts
1000, 1010, 1025 # For total_followers_snapshot
],
'total_followers': [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,100,115,130] # For existing total growth plot
}
sample_follower_stats_df = pd.DataFrame(follower_data)
# Ensure 'total_followers' for generate_total_follower_growth_plot is correctly populated for its specific rows
sample_follower_stats_df.loc[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot', 'total_followers'] = sample_follower_stats_df['follower_count_o']
logging.info("--- Testing New Plot Generations ---")
fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly')
if fig_followers_count: logging.info("Followers Count Over Time (monthly) plot generated.")
fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly')
if fig_followers_rate: logging.info("Followers Growth Rate (monthly) plot generated.")
fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
if fig_geo: logging.info("Followers by Location plot generated.")
fig_role = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_function', plot_title="Followers by Role")
if fig_role: logging.info("Followers by Role plot generated.")
fig_industry = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_industry', plot_title="Followers by Industry")
if fig_industry: logging.info("Followers by Industry plot generated.")
fig_seniority = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_seniority', plot_title="Followers by Seniority")
if fig_seniority: logging.info("Followers by Seniority plot generated.")
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
if fig_impressions: logging.info("Impressions Over Time plot generated.")
# Test existing total follower growth plot with appropriate data
total_followers_df = sample_follower_stats_df[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot'].copy()
total_followers_df['date'] = pd.to_datetime(total_followers_df['date']) # Ensure date is datetime
fig_total_growth = generate_total_follower_growth_plot(total_followers_df, date_column='date', count_column='total_followers')
if fig_total_growth: logging.info("Total Follower Growth plot (existing function) generated.")
logging.info("Test script finished. Review plots if displayed locally or saved.")