LinkedinMonitor / analytics_plot_generators.py
GuglielmoTor's picture
Create analytics_plot_generators.py
f20ee95 verified
raw
history blame
15.9 kB
import pandas as pd
import matplotlib.pyplot as plt
import logging
from io import BytesIO
import base64
import numpy as np
# Configure logging for this module
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
"""Creates a placeholder Matplotlib plot indicating no data or an error."""
try:
fig, ax = plt.subplots(figsize=(8, 4))
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True)
ax.axis('off')
plt.tight_layout()
return fig
except Exception as e:
logging.error(f"Error creating placeholder plot: {e}")
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Plot generation error", ha='center', va='center')
ax.axis('off')
return fig
finally:
# plt.close(fig) # Close the specific figure to free memory
# More robustly, Gradio handles figure objects, explicit close might not always be needed here
# but plt.close('all') in calling functions or after a block of plot generations is safer.
pass
def generate_posts_activity_plot(df, date_column='published_at'): # Default changed as per common use
"""
Generates a plot for posts activity over time.
Assumes df has a date_column (e.g., 'published_at') and groups by date to count posts.
"""
logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
logging.warning(f"Posts activity: DataFrame is empty.")
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.")
if date_column not in df.columns:
logging.warning(f"Posts activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.")
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
try:
df_copy = df.copy()
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
df_copy = df_copy.dropna(subset=[date_column])
if df_copy.empty:
logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.")
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")
posts_over_time = df_copy.set_index(date_column).resample('D').size()
if posts_over_time.empty:
logging.info("Posts activity: No posts after resampling by day.")
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
fig, ax = plt.subplots(figsize=(10, 5))
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
ax.set_title('Posts Activity Over Time')
ax.set_xlabel('Date')
ax.set_ylabel('Number of Posts')
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
logging.info("Successfully generated posts activity plot.")
return fig
except Exception as e:
logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
return create_placeholder_plot(title="Posts Activity Error", message=str(e))
finally:
plt.close('all')
def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='comments_count', shares_col='shares_count'):
"""
Generates a bar plot for total engagement types (likes, comments, shares).
Input df is expected to be pre-filtered by date if necessary.
"""
logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
required_cols = [likes_col, comments_col, shares_col]
if df is None or df.empty:
logging.warning("Engagement type: DataFrame is empty.")
return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.")
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}"
logging.warning(msg)
return create_placeholder_plot(title="Post Engagement Types", message=msg)
try:
df_copy = df.copy() # Work on a copy
for col in required_cols: # Ensure numeric, fill NaNs with 0
df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
total_likes = df_copy[likes_col].sum()
total_comments = df_copy[comments_col].sum()
total_shares = df_copy[shares_col].sum()
if total_likes == 0 and total_comments == 0 and total_shares == 0:
logging.info("Engagement type: All engagement counts are zero.")
return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.")
engagement_data = {
'Likes': total_likes,
'Comments': total_comments,
'Shares': total_shares
}
fig, ax = plt.subplots(figsize=(8, 5))
bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon'])
ax.set_title('Total Post Engagement Types')
ax.set_xlabel('Engagement Type')
ax.set_ylabel('Total Count')
ax.grid(axis='y', linestyle='--', alpha=0.7)
for bar in bars:
yval = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom')
plt.tight_layout()
logging.info("Successfully generated engagement type plot.")
return fig
except Exception as e:
logging.error(f"Error generating engagement type plot: {e}", exc_info=True)
return create_placeholder_plot(title="Engagement Type Error", message=str(e))
finally:
plt.close('all')
def generate_mentions_activity_plot(df, date_column='date'): # Default changed as per common use
"""
Generates a plot for mentions activity over time.
Assumes df has a date_column (e.g., 'date') and groups by date to count mentions.
"""
logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
logging.warning(f"Mentions activity: DataFrame is empty.")
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.")
if date_column not in df.columns:
logging.warning(f"Mentions activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.")
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
try:
df_copy = df.copy()
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
df_copy = df_copy.dropna(subset=[date_column])
if df_copy.empty:
logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.")
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")
mentions_over_time = df_copy.set_index(date_column).resample('D').size()
if mentions_over_time.empty:
logging.info("Mentions activity: No mentions after resampling by day.")
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")
fig, ax = plt.subplots(figsize=(10, 5))
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple')
ax.set_title('Mentions Activity Over Time')
ax.set_xlabel('Date')
ax.set_ylabel('Number of Mentions')
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
logging.info("Successfully generated mentions activity plot.")
return fig
except Exception as e:
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
finally:
plt.close('all')
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
"""
Generates a pie chart for mention sentiment distribution.
Input df is expected to be pre-filtered by date if necessary.
"""
logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
logging.warning("Mention sentiment: DataFrame is empty.")
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.")
if sentiment_column not in df.columns:
msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}"
logging.warning(msg)
return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg)
try:
df_copy = df.copy()
sentiment_counts = df_copy[sentiment_column].value_counts()
if sentiment_counts.empty:
logging.info("Mention sentiment: No sentiment data after value_counts.")
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
fig, ax = plt.subplots(figsize=(8, 5))
colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'}
pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index] # Default color for unknown sentiments
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
ax.set_title('Mention Sentiment Distribution')
ax.axis('equal')
plt.tight_layout()
logging.info("Successfully generated mention sentiment plot.")
return fig
except Exception as e:
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
finally:
plt.close('all')
def generate_follower_growth_plot(df, date_column='date', count_column='total_followers'):
"""
Generates a plot for follower growth over time.
This function receives the *unfiltered* follower DataFrame.
"""
logging.info(f"Generating follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. Input df rows: {len(df) if df is not None else 'None'}")
if df is None or df.empty:
logging.warning("Follower growth: DataFrame is empty.")
return create_placeholder_plot(title="Follower Growth Over Time", message="No follower data available.")
if date_column not in df.columns or count_column not in df.columns:
missing = []
if date_column not in df.columns: missing.append(date_column)
if count_column not in df.columns: missing.append(count_column)
msg = f"Follower growth: Columns missing: {missing}. Available: {df.columns.tolist()}"
logging.warning(msg)
return create_placeholder_plot(title="Follower Growth Over Time", message=msg)
try:
df_copy = df.copy()
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce')
df_copy = df_copy.dropna(subset=[date_column, count_column])
if df_copy.empty:
logging.info("Follower growth: DataFrame empty after NaNs dropped from date/count columns.")
return create_placeholder_plot(title="Follower Growth Over Time", message="No valid data for follower growth.")
df_copy = df_copy.sort_values(by=date_column)
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green')
ax.set_title('Follower Growth Over Time')
ax.set_xlabel('Date')
ax.set_ylabel('Total Followers')
ax.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
logging.info("Successfully generated follower growth plot.")
return fig
except Exception as e:
logging.error(f"Error generating follower growth plot: {e}", exc_info=True)
return create_placeholder_plot(title="Follower Growth Error", message=str(e))
finally:
plt.close('all')
if __name__ == '__main__':
# Create dummy data for testing
posts_data = {
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03']),
'likes_count': [10, 5, 12, 8, 15, 3],
'comments_count': [2, 1, 3, 1, 4, 0],
'shares_count': [1, 0, 1, 1, 2, 0]
}
sample_posts_df = pd.DataFrame(posts_data)
mentions_data = {
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
}
sample_mentions_df = pd.DataFrame(mentions_data)
follower_data = {
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']),
'total_followers': [100, 105, 115, 120, 118] # Example data
}
sample_follower_stats_df = pd.DataFrame(follower_data)
logging.info("--- Testing Plot Generations ---")
fig1 = generate_posts_activity_plot(sample_posts_df.copy(), date_column='published_at')
if fig1: logging.info("Posts activity plot generated.") # plt.show() for local test
fig2 = generate_engagement_type_plot(sample_posts_df.copy())
if fig2: logging.info("Engagement type plot generated.")
fig3 = generate_mentions_activity_plot(sample_mentions_df.copy(), date_column='date')
if fig3: logging.info("Mentions activity plot generated.")
fig4 = generate_mention_sentiment_plot(sample_mentions_df.copy())
if fig4: logging.info("Mention sentiment plot generated.")
fig5 = generate_follower_growth_plot(sample_follower_stats_df.copy(), date_column='date', count_column='total_followers')
if fig5: logging.info("Follower growth plot generated.")
logging.info("--- Testing Placeholders ---")
fig_placeholder = create_placeholder_plot()
if fig_placeholder: logging.info("Placeholder plot generated.")
empty_df = pd.DataFrame(columns=['published_at']) # Empty df with column
fig_empty_posts = generate_posts_activity_plot(empty_df, date_column='published_at')
if fig_empty_posts: logging.info("Empty posts activity plot (placeholder) generated.")
df_no_col = pd.DataFrame({'some_other_date': pd.to_datetime(['2023-01-01'])})
fig_no_col_posts = generate_posts_activity_plot(df_no_col, date_column='published_at')
if fig_no_col_posts: logging.info("Posts activity with missing column (placeholder) generated.")
logging.info("Test script finished.")