Spaces:
Running
Running
Update analytics_plot_generator.py
Browse files- analytics_plot_generator.py +275 -202
analytics_plot_generator.py
CHANGED
@@ -5,6 +5,7 @@ from io import BytesIO
|
|
5 |
import base64
|
6 |
import numpy as np
|
7 |
import matplotlib.ticker as mticker
|
|
|
8 |
|
9 |
# Configure logging for this module
|
10 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
@@ -184,13 +185,9 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
|
184 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
185 |
|
186 |
fig, ax = plt.subplots(figsize=(8, 5))
|
187 |
-
# Using a
|
188 |
-
|
189 |
-
colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
|
190 |
-
except ValueError: # Fallback if Pastel1 doesn't have enough colors or isn't available
|
191 |
-
colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
|
192 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
193 |
-
|
194 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
195 |
ax.set_title('Mention Sentiment Distribution')
|
196 |
ax.axis('equal')
|
@@ -449,10 +446,13 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
|
|
449 |
ax.set_xlabel('Date')
|
450 |
ax.set_ylabel('Engagement Rate')
|
451 |
# Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
|
452 |
-
max_rate_val = engagement_over_time.max()
|
453 |
-
formatter_xmax = 1.0 if max_rate_val <= 1.5 and max_rate_val
|
454 |
-
if max_rate_val >
|
|
|
|
|
455 |
formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
|
|
|
456 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
457 |
ax.grid(True, linestyle='--', alpha=0.7)
|
458 |
plt.xticks(rotation=45)
|
@@ -525,7 +525,7 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
|
|
525 |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
526 |
|
527 |
if df_copy.empty: # After dropping NaNs for essential columns
|
528 |
-
|
529 |
|
530 |
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
531 |
|
@@ -544,34 +544,27 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
|
|
544 |
finally:
|
545 |
plt.close('all')
|
546 |
|
547 |
-
# ---
|
548 |
-
|
549 |
def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
|
550 |
-
"""Generates a plot for likes
|
551 |
title = "Reactions (Likes) Over Time"
|
552 |
logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
553 |
-
|
554 |
if df is None or df.empty:
|
555 |
return create_placeholder_plot(title=title, message="No post data for likes.")
|
556 |
-
|
557 |
required_cols = [date_column, likes_col]
|
558 |
-
|
559 |
-
|
560 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
561 |
-
|
562 |
try:
|
563 |
df_copy = df.copy()
|
564 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
565 |
df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce')
|
566 |
df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
|
567 |
-
|
568 |
if df_copy.empty:
|
569 |
-
return create_placeholder_plot(title=title, message="No valid data after cleaning
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
fig, ax = plt.subplots(figsize=(10, 5))
|
574 |
-
ax.plot(
|
575 |
ax.set_title(title)
|
576 |
ax.set_xlabel('Date')
|
577 |
ax.set_ylabel('Total Likes')
|
@@ -585,73 +578,36 @@ def generate_likes_over_time_plot(df, date_column='published_at', likes_col='lik
|
|
585 |
finally:
|
586 |
plt.close('all')
|
587 |
|
588 |
-
|
589 |
def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
|
590 |
-
"""Generates a plot for clicks over time (
|
|
|
|
|
591 |
title = "Clicks Over Time"
|
592 |
logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
593 |
-
|
594 |
-
|
595 |
-
return create_placeholder_plot(title=title, message="No post data for clicks.")
|
596 |
-
|
597 |
-
required_cols = [date_column, clicks_col]
|
598 |
-
missing_cols = [col for col in required_cols if col not in df.columns]
|
599 |
-
if missing_cols:
|
600 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
601 |
-
|
602 |
-
try:
|
603 |
-
df_copy = df.copy()
|
604 |
-
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
605 |
-
df_copy[clicks_col] = pd.to_numeric(df_copy[clicks_col], errors='coerce')
|
606 |
-
df_copy = df_copy.dropna(subset=[date_column, clicks_col]).set_index(date_column)
|
607 |
-
|
608 |
-
if df_copy.empty:
|
609 |
-
return create_placeholder_plot(title=title, message="No valid data after cleaning for clicks plot.")
|
610 |
-
|
611 |
-
clicks_over_time = df_copy.resample('D')[clicks_col].sum()
|
612 |
-
|
613 |
-
fig, ax = plt.subplots(figsize=(10, 5))
|
614 |
-
ax.plot(clicks_over_time.index, clicks_over_time.values, marker='.', linestyle='-', color='teal')
|
615 |
-
ax.set_title(title)
|
616 |
-
ax.set_xlabel('Date')
|
617 |
-
ax.set_ylabel('Total Clicks')
|
618 |
-
ax.grid(True, linestyle='--', alpha=0.7)
|
619 |
-
plt.xticks(rotation=45)
|
620 |
-
plt.tight_layout()
|
621 |
-
return fig
|
622 |
-
except Exception as e:
|
623 |
-
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
624 |
-
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
625 |
-
finally:
|
626 |
-
plt.close('all')
|
627 |
|
628 |
|
629 |
def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
|
630 |
"""Generates a plot for shares over time."""
|
631 |
title = "Shares Over Time"
|
632 |
logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
633 |
-
|
634 |
if df is None or df.empty:
|
635 |
return create_placeholder_plot(title=title, message="No post data for shares.")
|
636 |
-
|
637 |
required_cols = [date_column, shares_col]
|
638 |
-
|
639 |
-
|
640 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
641 |
-
|
642 |
try:
|
643 |
df_copy = df.copy()
|
644 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
645 |
df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce')
|
646 |
df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column)
|
647 |
-
|
648 |
if df_copy.empty:
|
649 |
-
return create_placeholder_plot(title=title, message="No valid data after cleaning
|
650 |
-
|
651 |
-
shares_over_time = df_copy.resample('D')[shares_col].sum()
|
652 |
|
|
|
653 |
fig, ax = plt.subplots(figsize=(10, 5))
|
654 |
-
ax.plot(
|
655 |
ax.set_title(title)
|
656 |
ax.set_xlabel('Date')
|
657 |
ax.set_ylabel('Total Shares')
|
@@ -665,33 +621,26 @@ def generate_shares_over_time_plot(df, date_column='published_at', shares_col='s
|
|
665 |
finally:
|
666 |
plt.close('all')
|
667 |
|
668 |
-
|
669 |
def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
|
670 |
"""Generates a plot for comments over time."""
|
671 |
title = "Comments Over Time"
|
672 |
logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
673 |
-
|
674 |
if df is None or df.empty:
|
675 |
return create_placeholder_plot(title=title, message="No post data for comments.")
|
676 |
-
|
677 |
required_cols = [date_column, comments_col]
|
678 |
-
|
679 |
-
|
680 |
-
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
681 |
-
|
682 |
try:
|
683 |
df_copy = df.copy()
|
684 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
685 |
df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce')
|
686 |
df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column)
|
687 |
-
|
688 |
if df_copy.empty:
|
689 |
-
return create_placeholder_plot(title=title, message="No valid data after cleaning
|
690 |
-
|
691 |
-
comments_over_time = df_copy.resample('D')[comments_col].sum()
|
692 |
|
|
|
693 |
fig, ax = plt.subplots(figsize=(10, 5))
|
694 |
-
ax.plot(
|
695 |
ax.set_title(title)
|
696 |
ax.set_xlabel('Date')
|
697 |
ax.set_ylabel('Total Comments')
|
@@ -705,53 +654,190 @@ def generate_comments_over_time_plot(df, date_column='published_at', comments_co
|
|
705 |
finally:
|
706 |
plt.close('all')
|
707 |
|
708 |
-
|
709 |
-
def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', text_column_for_check='comment_text'):
|
710 |
"""
|
711 |
Generates a pie chart for comment sentiment distribution.
|
712 |
-
|
713 |
-
|
|
|
714 |
"""
|
715 |
title = "Breakdown of Comments by Sentiment"
|
716 |
logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
|
717 |
|
718 |
if df is None or df.empty:
|
719 |
-
return create_placeholder_plot(title=title, message="No data
|
720 |
-
|
721 |
-
# Check if the expected sentiment column exists.
|
722 |
-
# Also check for a text column as a heuristic, as sentiment often comes with text.
|
723 |
if sentiment_column not in df.columns:
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
|
|
729 |
|
730 |
-
#
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
|
735 |
try:
|
736 |
df_copy = df.copy()
|
737 |
-
# Ensure sentiment column is treated as categorical
|
738 |
df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
|
739 |
-
sentiment_counts = df_copy[sentiment_column].value_counts()
|
740 |
|
741 |
if sentiment_counts.empty or sentiment_counts.sum() == 0:
|
742 |
return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
|
743 |
|
744 |
fig, ax = plt.subplots(figsize=(8, 5))
|
745 |
-
|
746 |
-
colors_map = plt.cm.get_cmap('Pastel2', len(sentiment_counts))
|
747 |
-
except ValueError:
|
748 |
-
colors_map = plt.cm.get_cmap('Accent', len(sentiment_counts))
|
749 |
-
|
750 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
751 |
|
752 |
-
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=
|
753 |
ax.set_title(title)
|
754 |
ax.axis('equal')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
755 |
plt.tight_layout()
|
756 |
logging.info(f"Successfully generated {title} plot.")
|
757 |
return fig
|
@@ -765,27 +851,28 @@ def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sen
|
|
765 |
if __name__ == '__main__':
|
766 |
# Create dummy data for testing
|
767 |
posts_data = {
|
768 |
-
'id': [f'post{i}' for i in range(1, 8)], #
|
769 |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
770 |
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
771 |
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
772 |
'shareCount': [1, 0, 1, 1, 2, 0, 1],
|
773 |
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
774 |
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
775 |
-
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
776 |
}
|
777 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
778 |
|
779 |
-
# Dummy data for comments with sentiment (hypothetical)
|
780 |
-
comments_sentiment_data = {
|
781 |
-
'comment_id': range(10),
|
782 |
-
'post_id': ['post1']*3 + ['post2']*2 + ['post3']*5,
|
783 |
-
'comment_text': ['Great post!', 'I disagree.', 'Nice work.', 'Interesting.', 'Could be better.', 'Loved it!', 'Not sure.', 'Thanks!', 'Helpful.', 'Okay.'],
|
784 |
-
'comment_sentiment': ['Positive', 'Negative', 'Positive', 'Neutral', 'Negative', 'Positive', 'Neutral', 'Positive', 'Positive', 'Neutral']
|
785 |
-
}
|
786 |
-
sample_comments_with_sentiment_df = pd.DataFrame(comments_sentiment_data)
|
787 |
-
|
788 |
-
|
789 |
# Updated Follower Stats Data
|
790 |
follower_data = {
|
791 |
'follower_count_type': [
|
@@ -795,118 +882,104 @@ if __name__ == '__main__':
|
|
795 |
'follower_industry', 'follower_industry',
|
796 |
'follower_seniority', 'follower_seniority'
|
797 |
],
|
798 |
-
# 'category_name' now holds dates for time-series, and actual categories for demographics
|
799 |
'category_name': [
|
800 |
'2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
|
801 |
-
'USA', 'Canada', 'UK',
|
802 |
-
'Engineering', 'Sales',
|
803 |
-
'Tech', 'Finance',
|
804 |
-
'Senior', 'Junior'
|
805 |
],
|
806 |
'follower_count_organic': [
|
807 |
-
100, 110, 125,
|
808 |
-
500, 300, 150,
|
809 |
-
400, 200,
|
810 |
-
250, 180,
|
811 |
-
300, 220
|
812 |
],
|
813 |
'follower_count_paid': [
|
814 |
-
20, 30, 25,
|
815 |
-
50, 40, 60,
|
816 |
-
30, 20,
|
817 |
-
45, 35,
|
818 |
-
60, 40
|
819 |
]
|
820 |
}
|
821 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
822 |
|
823 |
-
logging.info("--- Testing
|
824 |
-
|
825 |
-
fig_followers_count = generate_followers_count_over_time_plot(
|
826 |
-
sample_follower_stats_df.copy(),
|
827 |
-
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
828 |
-
)
|
829 |
-
if fig_followers_count and not isinstance(fig_followers_count, str): logging.info("Followers Count Over Time (monthly, organic/paid) plot generated.")
|
830 |
-
|
831 |
-
fig_followers_rate = generate_followers_growth_rate_plot(
|
832 |
-
sample_follower_stats_df.copy(),
|
833 |
-
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
834 |
-
)
|
835 |
-
if fig_followers_rate and not isinstance(fig_followers_rate, str): logging.info("Followers Growth Rate (monthly, organic/paid) plot generated.")
|
836 |
-
|
837 |
-
fig_geo = generate_followers_by_demographics_plot(
|
838 |
-
sample_follower_stats_df.copy(),
|
839 |
-
type_value='follower_geo', # category_col defaults to 'category_name'
|
840 |
-
plot_title="Followers by Location (Organic/Paid)"
|
841 |
-
)
|
842 |
-
if fig_geo and not isinstance(fig_geo, str): logging.info("Followers by Location (grouped organic/paid) plot generated.")
|
843 |
-
|
844 |
-
fig_role = generate_followers_by_demographics_plot(
|
845 |
-
sample_follower_stats_df.copy(),
|
846 |
-
type_value='follower_function',
|
847 |
-
plot_title="Followers by Role (Organic/Paid)"
|
848 |
-
)
|
849 |
-
if fig_role and not isinstance(fig_role, str): logging.info("Followers by Role (grouped organic/paid) plot generated.")
|
850 |
-
|
851 |
-
fig_industry = generate_followers_by_demographics_plot(
|
852 |
-
sample_follower_stats_df.copy(),
|
853 |
-
type_value='follower_industry',
|
854 |
-
plot_title="Followers by Industry (Organic/Paid)"
|
855 |
-
)
|
856 |
-
if fig_industry and not isinstance(fig_industry, str): logging.info("Followers by Industry (grouped organic/paid) plot generated.")
|
857 |
-
|
858 |
-
fig_seniority = generate_followers_by_demographics_plot(
|
859 |
-
sample_follower_stats_df.copy(),
|
860 |
-
type_value='follower_seniority',
|
861 |
-
plot_title="Followers by Seniority (Organic/Paid)"
|
862 |
-
)
|
863 |
-
if fig_seniority and not isinstance(fig_seniority, str): logging.info("Followers by Seniority (grouped organic/paid) plot generated.")
|
864 |
-
|
865 |
-
logging.info("--- Testing Other Plot Generations (No Changes to these) ---")
|
866 |
fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
|
867 |
-
if fig_posts_activity
|
868 |
|
869 |
fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
|
870 |
-
if fig_engagement_type
|
871 |
|
872 |
-
# Dummy mentions for testing
|
873 |
mentions_data = {
|
874 |
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
|
875 |
'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
|
876 |
}
|
877 |
sample_mentions_df = pd.DataFrame(mentions_data)
|
878 |
fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
|
879 |
-
if fig_mentions_activity
|
880 |
fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
|
881 |
-
if fig_mention_sentiment
|
882 |
|
883 |
-
|
884 |
-
if
|
|
|
|
|
|
|
|
|
|
|
885 |
|
|
|
|
|
886 |
fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
|
887 |
-
if fig_reach
|
888 |
-
|
889 |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
890 |
-
if fig_impressions
|
891 |
-
|
892 |
-
logging.info("--- Testing NEW Plot Generations ---")
|
893 |
fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
|
894 |
-
if fig_likes_time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
895 |
|
896 |
-
fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy())
|
897 |
-
if fig_clicks_time and not isinstance(fig_clicks_time, str): logging.info("Clicks Over Time plot generated.")
|
898 |
|
899 |
-
|
900 |
-
|
|
|
|
|
|
|
|
|
901 |
|
902 |
-
|
903 |
-
if
|
904 |
|
905 |
-
|
906 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
907 |
|
908 |
-
|
909 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
910 |
|
911 |
|
912 |
logging.info("Test script finished. Review plots if displayed locally or saved.")
|
|
|
5 |
import base64
|
6 |
import numpy as np
|
7 |
import matplotlib.ticker as mticker
|
8 |
+
import ast # For safely evaluating string representations of lists
|
9 |
|
10 |
# Configure logging for this module
|
11 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
|
|
185 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
186 |
|
187 |
fig, ax = plt.subplots(figsize=(8, 5))
|
188 |
+
# Using a qualitative colormap like 'Pastel1' or 'Set3' can be good for categorical data
|
189 |
+
colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
|
|
|
|
|
|
|
190 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
|
|
191 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
192 |
ax.set_title('Mention Sentiment Distribution')
|
193 |
ax.axis('equal')
|
|
|
446 |
ax.set_xlabel('Date')
|
447 |
ax.set_ylabel('Engagement Rate')
|
448 |
# Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
|
449 |
+
max_rate_val = engagement_over_time.max()
|
450 |
+
formatter_xmax = 1.0 if max_rate_val <= 1.5 and max_rate_val >=0 else 100.0 # Heuristic for 0-1 vs 0-100 scale
|
451 |
+
if max_rate_val > 1.5 and formatter_xmax == 1.0: # If data seems to be percentage but formatted as decimal
|
452 |
+
formatter_xmax = 100.0
|
453 |
+
elif max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
|
454 |
formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
|
455 |
+
|
456 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
457 |
ax.grid(True, linestyle='--', alpha=0.7)
|
458 |
plt.xticks(rotation=45)
|
|
|
525 |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
526 |
|
527 |
if df_copy.empty: # After dropping NaNs for essential columns
|
528 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
|
529 |
|
530 |
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
531 |
|
|
|
544 |
finally:
|
545 |
plt.close('all')
|
546 |
|
547 |
+
# --- New Plot Functions from User Request ---
|
|
|
548 |
def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
|
549 |
+
"""Generates a plot for likes over time."""
|
550 |
title = "Reactions (Likes) Over Time"
|
551 |
logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
|
|
552 |
if df is None or df.empty:
|
553 |
return create_placeholder_plot(title=title, message="No post data for likes.")
|
|
|
554 |
required_cols = [date_column, likes_col]
|
555 |
+
if any(col not in df.columns for col in required_cols):
|
556 |
+
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
|
|
|
|
557 |
try:
|
558 |
df_copy = df.copy()
|
559 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
560 |
df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce')
|
561 |
df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
|
|
|
562 |
if df_copy.empty:
|
563 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
564 |
+
|
565 |
+
data_over_time = df_copy.resample('D')[likes_col].sum()
|
|
|
566 |
fig, ax = plt.subplots(figsize=(10, 5))
|
567 |
+
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson')
|
568 |
ax.set_title(title)
|
569 |
ax.set_xlabel('Date')
|
570 |
ax.set_ylabel('Total Likes')
|
|
|
578 |
finally:
|
579 |
plt.close('all')
|
580 |
|
|
|
581 |
def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
|
582 |
+
"""Generates a plot for clicks over time (can be same as reach if clicks are primary reach metric)."""
|
583 |
+
# This is essentially the same as generate_reach_over_time_plot if reach_col is 'clickCount'.
|
584 |
+
# For clarity, keeping it separate if user wants to distinguish or use a different column later.
|
585 |
title = "Clicks Over Time"
|
586 |
logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
587 |
+
# Reusing logic from generate_reach_over_time_plot
|
588 |
+
return generate_reach_over_time_plot(df, date_column, clicks_col)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
589 |
|
590 |
|
591 |
def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
|
592 |
"""Generates a plot for shares over time."""
|
593 |
title = "Shares Over Time"
|
594 |
logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
|
|
595 |
if df is None or df.empty:
|
596 |
return create_placeholder_plot(title=title, message="No post data for shares.")
|
|
|
597 |
required_cols = [date_column, shares_col]
|
598 |
+
if any(col not in df.columns for col in required_cols):
|
599 |
+
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
|
|
|
|
600 |
try:
|
601 |
df_copy = df.copy()
|
602 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
603 |
df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce')
|
604 |
df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column)
|
|
|
605 |
if df_copy.empty:
|
606 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
|
|
|
|
607 |
|
608 |
+
data_over_time = df_copy.resample('D')[shares_col].sum()
|
609 |
fig, ax = plt.subplots(figsize=(10, 5))
|
610 |
+
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal')
|
611 |
ax.set_title(title)
|
612 |
ax.set_xlabel('Date')
|
613 |
ax.set_ylabel('Total Shares')
|
|
|
621 |
finally:
|
622 |
plt.close('all')
|
623 |
|
|
|
624 |
def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
|
625 |
"""Generates a plot for comments over time."""
|
626 |
title = "Comments Over Time"
|
627 |
logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
|
|
628 |
if df is None or df.empty:
|
629 |
return create_placeholder_plot(title=title, message="No post data for comments.")
|
|
|
630 |
required_cols = [date_column, comments_col]
|
631 |
+
if any(col not in df.columns for col in required_cols):
|
632 |
+
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
|
|
|
|
633 |
try:
|
634 |
df_copy = df.copy()
|
635 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
636 |
df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce')
|
637 |
df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column)
|
|
|
638 |
if df_copy.empty:
|
639 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
|
|
|
|
640 |
|
641 |
+
data_over_time = df_copy.resample('D')[comments_col].sum()
|
642 |
fig, ax = plt.subplots(figsize=(10, 5))
|
643 |
+
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold')
|
644 |
ax.set_title(title)
|
645 |
ax.set_xlabel('Date')
|
646 |
ax.set_ylabel('Total Comments')
|
|
|
654 |
finally:
|
655 |
plt.close('all')
|
656 |
|
657 |
+
def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', date_column=None):
|
|
|
658 |
"""
|
659 |
Generates a pie chart for comment sentiment distribution.
|
660 |
+
Assumes df might be post-level with an aggregated or example sentiment,
|
661 |
+
or ideally, a comment-level df with sentiment per comment.
|
662 |
+
If date_column is provided, it's for logging/context but not directly used for filtering here.
|
663 |
"""
|
664 |
title = "Breakdown of Comments by Sentiment"
|
665 |
logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
|
666 |
|
667 |
if df is None or df.empty:
|
668 |
+
return create_placeholder_plot(title=title, message="No data for comment sentiment.")
|
|
|
|
|
|
|
669 |
if sentiment_column not in df.columns:
|
670 |
+
# Check for a common alternative if the primary is missing (e.g. from post-level data)
|
671 |
+
if 'sentiment' in df.columns and sentiment_column != 'sentiment':
|
672 |
+
logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.")
|
673 |
+
sentiment_column = 'sentiment' # Use fallback
|
674 |
+
else:
|
675 |
+
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}")
|
676 |
|
677 |
+
# If the sentiment column has no valid data (all NaNs, or not convertible)
|
678 |
+
if df[sentiment_column].isnull().all():
|
679 |
+
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.")
|
|
|
680 |
|
681 |
try:
|
682 |
df_copy = df.copy()
|
683 |
+
# Ensure the sentiment column is treated as categorical (string)
|
684 |
df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
|
685 |
+
sentiment_counts = df_copy[sentiment_column].value_counts().dropna() # Dropna for safety
|
686 |
|
687 |
if sentiment_counts.empty or sentiment_counts.sum() == 0:
|
688 |
return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
|
689 |
|
690 |
fig, ax = plt.subplots(figsize=(8, 5))
|
691 |
+
colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
|
|
|
|
|
|
|
|
|
692 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
693 |
|
694 |
+
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
695 |
ax.set_title(title)
|
696 |
ax.axis('equal')
|
697 |
+
plt.tight_layout()
|
698 |
+
return fig
|
699 |
+
except Exception as e:
|
700 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
701 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
702 |
+
finally:
|
703 |
+
plt.close('all')
|
704 |
+
|
705 |
+
# --- NEW PLOT FUNCTIONS FOR CONTENT STRATEGY ---
|
706 |
+
def generate_post_frequency_plot(df, date_column='published_at', resample_period='D'):
|
707 |
+
"""Generates a plot for post frequency over time (e.g., daily, weekly, monthly)."""
|
708 |
+
title = f"Post Frequency Over Time ({resample_period})"
|
709 |
+
logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
710 |
+
|
711 |
+
if df is None or df.empty:
|
712 |
+
return create_placeholder_plot(title=title, message="No data available.")
|
713 |
+
if date_column not in df.columns:
|
714 |
+
return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.")
|
715 |
+
|
716 |
+
try:
|
717 |
+
df_copy = df.copy()
|
718 |
+
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
719 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
720 |
+
|
721 |
+
df_copy = df_copy.dropna(subset=[date_column])
|
722 |
+
if df_copy.empty:
|
723 |
+
return create_placeholder_plot(title=title, message="No valid date entries found.")
|
724 |
+
|
725 |
+
post_frequency = df_copy.set_index(date_column).resample(resample_period).size()
|
726 |
+
|
727 |
+
if post_frequency.empty:
|
728 |
+
return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.")
|
729 |
+
|
730 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
731 |
+
post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None)
|
732 |
+
ax.set_title(title)
|
733 |
+
ax.set_xlabel('Date' if resample_period == 'D' else 'Period')
|
734 |
+
ax.set_ylabel('Number of Posts')
|
735 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
736 |
+
plt.xticks(rotation=45)
|
737 |
+
plt.tight_layout()
|
738 |
+
logging.info(f"Successfully generated {title} plot.")
|
739 |
+
return fig
|
740 |
+
except Exception as e:
|
741 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
742 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
743 |
+
finally:
|
744 |
+
plt.close('all')
|
745 |
+
|
746 |
+
def generate_content_format_breakdown_plot(df, format_col='media_type'):
|
747 |
+
"""Generates a bar chart for breakdown of content by format."""
|
748 |
+
title = "Breakdown of Content by Format"
|
749 |
+
logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}")
|
750 |
+
|
751 |
+
if df is None or df.empty:
|
752 |
+
return create_placeholder_plot(title=title, message="No data available.")
|
753 |
+
if format_col not in df.columns:
|
754 |
+
return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}")
|
755 |
+
|
756 |
+
try:
|
757 |
+
df_copy = df.copy()
|
758 |
+
format_counts = df_copy[format_col].value_counts().dropna()
|
759 |
+
|
760 |
+
if format_counts.empty:
|
761 |
+
return create_placeholder_plot(title=title, message="No content format data available.")
|
762 |
+
|
763 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
764 |
+
format_counts.plot(kind='bar', ax=ax, color='skyblue')
|
765 |
+
ax.set_title(title)
|
766 |
+
ax.set_xlabel('Media Type')
|
767 |
+
ax.set_ylabel('Number of Posts')
|
768 |
+
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
769 |
+
plt.xticks(rotation=45, ha="right")
|
770 |
+
plt.tight_layout()
|
771 |
+
|
772 |
+
# Add counts on top of bars
|
773 |
+
for i, v in enumerate(format_counts):
|
774 |
+
ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom')
|
775 |
+
|
776 |
+
logging.info(f"Successfully generated {title} plot.")
|
777 |
+
return fig
|
778 |
+
except Exception as e:
|
779 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
780 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
781 |
+
finally:
|
782 |
+
plt.close('all')
|
783 |
+
|
784 |
+
def _parse_eb_label(label_data):
|
785 |
+
"""Helper to parse eb_labels which might be lists or string representations of lists."""
|
786 |
+
if isinstance(label_data, list):
|
787 |
+
return label_data
|
788 |
+
if isinstance(label_data, str):
|
789 |
+
try:
|
790 |
+
# Try to evaluate as a list
|
791 |
+
parsed = ast.literal_eval(label_data)
|
792 |
+
if isinstance(parsed, list):
|
793 |
+
return parsed
|
794 |
+
# If it's a single string not in list format, treat as a single label
|
795 |
+
return [str(parsed)]
|
796 |
+
except (ValueError, SyntaxError):
|
797 |
+
# If not a list string, treat the whole string as one label
|
798 |
+
return [label_data] if label_data.strip() else []
|
799 |
+
if pd.isna(label_data):
|
800 |
+
return []
|
801 |
+
return [] # Default for other types
|
802 |
+
|
803 |
+
def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
|
804 |
+
"""Generates a horizontal bar chart for breakdown of content by topics."""
|
805 |
+
title = f"Breakdown of Content by Topics (Top {top_n})"
|
806 |
+
logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}")
|
807 |
+
|
808 |
+
if df is None or df.empty:
|
809 |
+
return create_placeholder_plot(title=title, message="No data available.")
|
810 |
+
if topics_col not in df.columns:
|
811 |
+
return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}")
|
812 |
+
|
813 |
+
try:
|
814 |
+
df_copy = df.copy()
|
815 |
+
|
816 |
+
# Apply parsing and explode
|
817 |
+
parsed_labels = df_copy[topics_col].apply(_parse_eb_label)
|
818 |
+
exploded_labels = parsed_labels.explode().dropna()
|
819 |
+
|
820 |
+
if exploded_labels.empty:
|
821 |
+
return create_placeholder_plot(title=title, message="No topic data found after processing labels.")
|
822 |
+
|
823 |
+
topic_counts = exploded_labels.value_counts()
|
824 |
+
|
825 |
+
if topic_counts.empty:
|
826 |
+
return create_placeholder_plot(title=title, message="No topics to display after counting.")
|
827 |
+
|
828 |
+
# Take top N and sort for plotting (descending for horizontal bar)
|
829 |
+
top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True)
|
830 |
+
|
831 |
+
fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6))
|
832 |
+
top_topics.plot(kind='barh', ax=ax, color='mediumseagreen')
|
833 |
+
ax.set_title(title)
|
834 |
+
ax.set_xlabel('Number of Posts')
|
835 |
+
ax.set_ylabel('Topic')
|
836 |
+
|
837 |
+
# Add counts next to bars
|
838 |
+
for i, (topic, count) in enumerate(top_topics.items()):
|
839 |
+
ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center')
|
840 |
+
|
841 |
plt.tight_layout()
|
842 |
logging.info(f"Successfully generated {title} plot.")
|
843 |
return fig
|
|
|
851 |
if __name__ == '__main__':
|
852 |
# Create dummy data for testing
|
853 |
posts_data = {
|
854 |
+
'id': [f'post{i}' for i in range(1, 8)], # Increased to 7 for more data
|
855 |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
856 |
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
857 |
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
858 |
'shareCount': [1, 0, 1, 1, 2, 0, 1],
|
859 |
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
860 |
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
861 |
+
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08],
|
862 |
+
'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'], # New column
|
863 |
+
'eb_labels': [ # New column with various formats
|
864 |
+
"['AI', 'Tech']",
|
865 |
+
['Innovation'],
|
866 |
+
'General',
|
867 |
+
None,
|
868 |
+
['Tech', 'Future'],
|
869 |
+
"['AI', 'Development']",
|
870 |
+
['Tech']
|
871 |
+
],
|
872 |
+
'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive'] # For comment sentiment plot
|
873 |
}
|
874 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
875 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
876 |
# Updated Follower Stats Data
|
877 |
follower_data = {
|
878 |
'follower_count_type': [
|
|
|
882 |
'follower_industry', 'follower_industry',
|
883 |
'follower_seniority', 'follower_seniority'
|
884 |
],
|
|
|
885 |
'category_name': [
|
886 |
'2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
|
887 |
+
'USA', 'Canada', 'UK', # Geo
|
888 |
+
'Engineering', 'Sales', # Function/Role
|
889 |
+
'Tech', 'Finance', # Industry
|
890 |
+
'Senior', 'Junior' # Seniority
|
891 |
],
|
892 |
'follower_count_organic': [
|
893 |
+
100, 110, 125, # Organic monthly gains
|
894 |
+
500, 300, 150, # Organic Geo counts
|
895 |
+
400, 200, # Organic Role counts
|
896 |
+
250, 180, # Organic Industry counts
|
897 |
+
300, 220 # Organic Seniority counts
|
898 |
],
|
899 |
'follower_count_paid': [
|
900 |
+
20, 30, 25, # Paid monthly gains
|
901 |
+
50, 40, 60, # Paid Geo counts
|
902 |
+
30, 20, # Paid Role counts
|
903 |
+
45, 35, # Paid Industry counts
|
904 |
+
60, 40 # Paid Seniority counts
|
905 |
]
|
906 |
}
|
907 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
908 |
|
909 |
+
logging.info("--- Testing Existing Plot Generations ---")
|
910 |
+
# ... (keep existing tests for older plots) ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
911 |
fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
|
912 |
+
if fig_posts_activity: logging.info("Posts activity plot generated.")
|
913 |
|
914 |
fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
|
915 |
+
if fig_engagement_type: logging.info("Engagement type plot generated.")
|
916 |
|
|
|
917 |
mentions_data = {
|
918 |
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
|
919 |
'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
|
920 |
}
|
921 |
sample_mentions_df = pd.DataFrame(mentions_data)
|
922 |
fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
|
923 |
+
if fig_mentions_activity: logging.info("Mentions activity plot generated.")
|
924 |
fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
|
925 |
+
if fig_mention_sentiment: logging.info("Mention sentiment plot generated.")
|
926 |
|
927 |
+
fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly')
|
928 |
+
if fig_followers_count: logging.info("Followers Count Over Time plot generated.")
|
929 |
+
fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly')
|
930 |
+
if fig_followers_rate: logging.info("Followers Growth Rate plot generated.")
|
931 |
+
fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
|
932 |
+
if fig_geo: logging.info("Followers by Location plot generated.")
|
933 |
+
# ... add other follower demographic tests ...
|
934 |
|
935 |
+
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
|
936 |
+
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
|
937 |
fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
|
938 |
+
if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
|
|
|
939 |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
940 |
+
if fig_impressions: logging.info("Impressions Over Time plot generated.")
|
941 |
+
|
|
|
942 |
fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
|
943 |
+
if fig_likes_time: logging.info("Likes Over Time plot generated.")
|
944 |
+
fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy()) # Uses reach logic
|
945 |
+
if fig_clicks_time: logging.info("Clicks Over Time plot generated.")
|
946 |
+
fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
|
947 |
+
if fig_shares_time: logging.info("Shares Over Time plot generated.")
|
948 |
+
fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy())
|
949 |
+
if fig_comments_time: logging.info("Comments Over Time plot generated.")
|
950 |
+
fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy(), sentiment_column='comment_sentiment')
|
951 |
+
if fig_comments_sentiment: logging.info("Comments Sentiment Breakdown plot generated.")
|
952 |
|
|
|
|
|
953 |
|
954 |
+
logging.info("--- Testing NEW Plot Generations for Content Strategy ---")
|
955 |
+
fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D')
|
956 |
+
if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.")
|
957 |
+
|
958 |
+
fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W')
|
959 |
+
if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.")
|
960 |
|
961 |
+
fig_content_format = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='media_type')
|
962 |
+
if fig_content_format: logging.info("Content Format Breakdown plot generated.")
|
963 |
|
964 |
+
fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5)
|
965 |
+
if fig_content_topics: logging.info("Content Topic Breakdown plot generated.")
|
966 |
+
|
967 |
+
# Test with missing columns / empty data for new plots
|
968 |
+
logging.info("--- Testing NEW Plot Generations with Edge Cases ---")
|
969 |
+
empty_df = pd.DataFrame()
|
970 |
+
fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy())
|
971 |
+
if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.")
|
972 |
|
973 |
+
fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col')
|
974 |
+
if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.")
|
975 |
+
|
976 |
+
fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels') # eb_labels won't exist
|
977 |
+
if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.")
|
978 |
+
|
979 |
+
df_no_topics_data = sample_merged_posts_df.copy()
|
980 |
+
df_no_topics_data['eb_labels'] = None
|
981 |
+
fig_content_topics_all_none = generate_content_topic_breakdown_plot(df_no_topics_data, topics_col='eb_labels')
|
982 |
+
if fig_content_topics_all_none: logging.info("Content Topic (all None labels) placeholder generated.")
|
983 |
|
984 |
|
985 |
logging.info("Test script finished. Review plots if displayed locally or saved.")
|