GuglielmoTor commited on
Commit
617c2c1
·
verified ·
1 Parent(s): deb2291

Update analytics_plot_generator.py

Browse files
Files changed (1) hide show
  1. analytics_plot_generator.py +275 -202
analytics_plot_generator.py CHANGED
@@ -5,6 +5,7 @@ from io import BytesIO
5
  import base64
6
  import numpy as np
7
  import matplotlib.ticker as mticker
 
8
 
9
  # Configure logging for this module
10
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
@@ -184,13 +185,9 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
184
  return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
185
 
186
  fig, ax = plt.subplots(figsize=(8, 5))
187
- # Using a more distinct color map if available, or fallback
188
- try:
189
- colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
190
- except ValueError: # Fallback if Pastel1 doesn't have enough colors or isn't available
191
- colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
192
  pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
193
-
194
  ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
195
  ax.set_title('Mention Sentiment Distribution')
196
  ax.axis('equal')
@@ -449,10 +446,13 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
449
  ax.set_xlabel('Date')
450
  ax.set_ylabel('Engagement Rate')
451
  # Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
452
- max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0
453
- formatter_xmax = 1.0 if max_rate_val <= 1.5 and max_rate_val !=0 else 100.0 # Heuristic: if max is small, assume 0-1 scale
454
- if max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
 
 
455
  formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
 
456
  ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
457
  ax.grid(True, linestyle='--', alpha=0.7)
458
  plt.xticks(rotation=45)
@@ -525,7 +525,7 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
525
  df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
526
 
527
  if df_copy.empty: # After dropping NaNs for essential columns
528
- return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
529
 
530
  impressions_over_time = df_copy.resample('D')[impressions_col].sum()
531
 
@@ -544,34 +544,27 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
544
  finally:
545
  plt.close('all')
546
 
547
- # --- NEW PLOT FUNCTIONS ---
548
-
549
  def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
550
- """Generates a plot for likes (reactions) over time."""
551
  title = "Reactions (Likes) Over Time"
552
  logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
553
-
554
  if df is None or df.empty:
555
  return create_placeholder_plot(title=title, message="No post data for likes.")
556
-
557
  required_cols = [date_column, likes_col]
558
- missing_cols = [col for col in required_cols if col not in df.columns]
559
- if missing_cols:
560
- return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
561
-
562
  try:
563
  df_copy = df.copy()
564
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
565
  df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce')
566
  df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
567
-
568
  if df_copy.empty:
569
- return create_placeholder_plot(title=title, message="No valid data after cleaning for likes plot.")
570
-
571
- likes_over_time = df_copy.resample('D')[likes_col].sum()
572
-
573
  fig, ax = plt.subplots(figsize=(10, 5))
574
- ax.plot(likes_over_time.index, likes_over_time.values, marker='.', linestyle='-', color='crimson')
575
  ax.set_title(title)
576
  ax.set_xlabel('Date')
577
  ax.set_ylabel('Total Likes')
@@ -585,73 +578,36 @@ def generate_likes_over_time_plot(df, date_column='published_at', likes_col='lik
585
  finally:
586
  plt.close('all')
587
 
588
-
589
  def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
590
- """Generates a plot for clicks over time (distinct from general reach if needed)."""
 
 
591
  title = "Clicks Over Time"
592
  logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
593
-
594
- if df is None or df.empty:
595
- return create_placeholder_plot(title=title, message="No post data for clicks.")
596
-
597
- required_cols = [date_column, clicks_col]
598
- missing_cols = [col for col in required_cols if col not in df.columns]
599
- if missing_cols:
600
- return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
601
-
602
- try:
603
- df_copy = df.copy()
604
- df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
605
- df_copy[clicks_col] = pd.to_numeric(df_copy[clicks_col], errors='coerce')
606
- df_copy = df_copy.dropna(subset=[date_column, clicks_col]).set_index(date_column)
607
-
608
- if df_copy.empty:
609
- return create_placeholder_plot(title=title, message="No valid data after cleaning for clicks plot.")
610
-
611
- clicks_over_time = df_copy.resample('D')[clicks_col].sum()
612
-
613
- fig, ax = plt.subplots(figsize=(10, 5))
614
- ax.plot(clicks_over_time.index, clicks_over_time.values, marker='.', linestyle='-', color='teal')
615
- ax.set_title(title)
616
- ax.set_xlabel('Date')
617
- ax.set_ylabel('Total Clicks')
618
- ax.grid(True, linestyle='--', alpha=0.7)
619
- plt.xticks(rotation=45)
620
- plt.tight_layout()
621
- return fig
622
- except Exception as e:
623
- logging.error(f"Error generating {title}: {e}", exc_info=True)
624
- return create_placeholder_plot(title=f"{title} Error", message=str(e))
625
- finally:
626
- plt.close('all')
627
 
628
 
629
  def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
630
  """Generates a plot for shares over time."""
631
  title = "Shares Over Time"
632
  logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
633
-
634
  if df is None or df.empty:
635
  return create_placeholder_plot(title=title, message="No post data for shares.")
636
-
637
  required_cols = [date_column, shares_col]
638
- missing_cols = [col for col in required_cols if col not in df.columns]
639
- if missing_cols:
640
- return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
641
-
642
  try:
643
  df_copy = df.copy()
644
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
645
  df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce')
646
  df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column)
647
-
648
  if df_copy.empty:
649
- return create_placeholder_plot(title=title, message="No valid data after cleaning for shares plot.")
650
-
651
- shares_over_time = df_copy.resample('D')[shares_col].sum()
652
 
 
653
  fig, ax = plt.subplots(figsize=(10, 5))
654
- ax.plot(shares_over_time.index, shares_over_time.values, marker='.', linestyle='-', color='goldenrod')
655
  ax.set_title(title)
656
  ax.set_xlabel('Date')
657
  ax.set_ylabel('Total Shares')
@@ -665,33 +621,26 @@ def generate_shares_over_time_plot(df, date_column='published_at', shares_col='s
665
  finally:
666
  plt.close('all')
667
 
668
-
669
  def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
670
  """Generates a plot for comments over time."""
671
  title = "Comments Over Time"
672
  logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
673
-
674
  if df is None or df.empty:
675
  return create_placeholder_plot(title=title, message="No post data for comments.")
676
-
677
  required_cols = [date_column, comments_col]
678
- missing_cols = [col for col in required_cols if col not in df.columns]
679
- if missing_cols:
680
- return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
681
-
682
  try:
683
  df_copy = df.copy()
684
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
685
  df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce')
686
  df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column)
687
-
688
  if df_copy.empty:
689
- return create_placeholder_plot(title=title, message="No valid data after cleaning for comments plot.")
690
-
691
- comments_over_time = df_copy.resample('D')[comments_col].sum()
692
 
 
693
  fig, ax = plt.subplots(figsize=(10, 5))
694
- ax.plot(comments_over_time.index, comments_over_time.values, marker='.', linestyle='-', color='forestgreen')
695
  ax.set_title(title)
696
  ax.set_xlabel('Date')
697
  ax.set_ylabel('Total Comments')
@@ -705,53 +654,190 @@ def generate_comments_over_time_plot(df, date_column='published_at', comments_co
705
  finally:
706
  plt.close('all')
707
 
708
-
709
- def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', text_column_for_check='comment_text'):
710
  """
711
  Generates a pie chart for comment sentiment distribution.
712
- Expects a DataFrame with a 'sentiment_column' for sentiment labels of individual comments.
713
- 'text_column_for_check' is used to infer if the DataFrame might contain comment-level data.
 
714
  """
715
  title = "Breakdown of Comments by Sentiment"
716
  logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
717
 
718
  if df is None or df.empty:
719
- return create_placeholder_plot(title=title, message="No data available for comment sentiment analysis.")
720
-
721
- # Check if the expected sentiment column exists.
722
- # Also check for a text column as a heuristic, as sentiment often comes with text.
723
  if sentiment_column not in df.columns:
724
- msg = (f"Column '{sentiment_column}' for comment sentiment not found. "
725
- f"This plot requires a DataFrame with pre-analyzed comment sentiments. "
726
- f"Available columns: {df.columns.tolist()}")
727
- logging.warning(msg)
728
- return create_placeholder_plot(title=title, message=msg)
 
729
 
730
- # Optional: A light check if it seems like post-level data instead of comment-level
731
- # if text_column_for_check not in df.columns and 'commentCount' in df.columns:
732
- # logging.warning(f"'{sentiment_column}' found, but '{text_column_for_check}' is missing. Ensure '{sentiment_column}' refers to individual comment sentiments, not post sentiment.")
733
-
734
 
735
  try:
736
  df_copy = df.copy()
737
- # Ensure sentiment column is treated as categorical/string
738
  df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
739
- sentiment_counts = df_copy[sentiment_column].value_counts()
740
 
741
  if sentiment_counts.empty or sentiment_counts.sum() == 0:
742
  return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
743
 
744
  fig, ax = plt.subplots(figsize=(8, 5))
745
- try:
746
- colors_map = plt.cm.get_cmap('Pastel2', len(sentiment_counts))
747
- except ValueError:
748
- colors_map = plt.cm.get_cmap('Accent', len(sentiment_counts))
749
-
750
  pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
751
 
752
- ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140, colors=pie_colors)
753
  ax.set_title(title)
754
  ax.axis('equal')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
755
  plt.tight_layout()
756
  logging.info(f"Successfully generated {title} plot.")
757
  return fig
@@ -765,27 +851,28 @@ def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sen
765
  if __name__ == '__main__':
766
  # Create dummy data for testing
767
  posts_data = {
768
- 'id': [f'post{i}' for i in range(1, 8)], # Extended to 7 for more data points
769
  'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
770
  'likeCount': [10, 5, 12, 8, 15, 3, 20],
771
  'commentCount': [2, 1, 3, 1, 4, 0, 5],
772
  'shareCount': [1, 0, 1, 1, 2, 0, 1],
773
  'clickCount': [20, 15, 30, 22, 40, 10, 50],
774
  'impressionCount': [200, 150, 300, 220, 400, 100, 500],
775
- 'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08]
 
 
 
 
 
 
 
 
 
 
 
776
  }
777
  sample_merged_posts_df = pd.DataFrame(posts_data)
778
 
779
- # Dummy data for comments with sentiment (hypothetical)
780
- comments_sentiment_data = {
781
- 'comment_id': range(10),
782
- 'post_id': ['post1']*3 + ['post2']*2 + ['post3']*5,
783
- 'comment_text': ['Great post!', 'I disagree.', 'Nice work.', 'Interesting.', 'Could be better.', 'Loved it!', 'Not sure.', 'Thanks!', 'Helpful.', 'Okay.'],
784
- 'comment_sentiment': ['Positive', 'Negative', 'Positive', 'Neutral', 'Negative', 'Positive', 'Neutral', 'Positive', 'Positive', 'Neutral']
785
- }
786
- sample_comments_with_sentiment_df = pd.DataFrame(comments_sentiment_data)
787
-
788
-
789
  # Updated Follower Stats Data
790
  follower_data = {
791
  'follower_count_type': [
@@ -795,118 +882,104 @@ if __name__ == '__main__':
795
  'follower_industry', 'follower_industry',
796
  'follower_seniority', 'follower_seniority'
797
  ],
798
- # 'category_name' now holds dates for time-series, and actual categories for demographics
799
  'category_name': [
800
  '2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
801
- 'USA', 'Canada', 'UK', # Geo
802
- 'Engineering', 'Sales', # Function/Role
803
- 'Tech', 'Finance', # Industry
804
- 'Senior', 'Junior' # Seniority
805
  ],
806
  'follower_count_organic': [
807
- 100, 110, 125, # Organic monthly gains
808
- 500, 300, 150, # Organic Geo counts
809
- 400, 200, # Organic Role counts
810
- 250, 180, # Organic Industry counts
811
- 300, 220 # Organic Seniority counts
812
  ],
813
  'follower_count_paid': [
814
- 20, 30, 25, # Paid monthly gains
815
- 50, 40, 60, # Paid Geo counts
816
- 30, 20, # Paid Role counts
817
- 45, 35, # Paid Industry counts
818
- 60, 40 # Paid Seniority counts
819
  ]
820
  }
821
  sample_follower_stats_df = pd.DataFrame(follower_data)
822
 
823
- logging.info("--- Testing Updated Follower Plot Generations ---")
824
-
825
- fig_followers_count = generate_followers_count_over_time_plot(
826
- sample_follower_stats_df.copy(),
827
- type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
828
- )
829
- if fig_followers_count and not isinstance(fig_followers_count, str): logging.info("Followers Count Over Time (monthly, organic/paid) plot generated.")
830
-
831
- fig_followers_rate = generate_followers_growth_rate_plot(
832
- sample_follower_stats_df.copy(),
833
- type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
834
- )
835
- if fig_followers_rate and not isinstance(fig_followers_rate, str): logging.info("Followers Growth Rate (monthly, organic/paid) plot generated.")
836
-
837
- fig_geo = generate_followers_by_demographics_plot(
838
- sample_follower_stats_df.copy(),
839
- type_value='follower_geo', # category_col defaults to 'category_name'
840
- plot_title="Followers by Location (Organic/Paid)"
841
- )
842
- if fig_geo and not isinstance(fig_geo, str): logging.info("Followers by Location (grouped organic/paid) plot generated.")
843
-
844
- fig_role = generate_followers_by_demographics_plot(
845
- sample_follower_stats_df.copy(),
846
- type_value='follower_function',
847
- plot_title="Followers by Role (Organic/Paid)"
848
- )
849
- if fig_role and not isinstance(fig_role, str): logging.info("Followers by Role (grouped organic/paid) plot generated.")
850
-
851
- fig_industry = generate_followers_by_demographics_plot(
852
- sample_follower_stats_df.copy(),
853
- type_value='follower_industry',
854
- plot_title="Followers by Industry (Organic/Paid)"
855
- )
856
- if fig_industry and not isinstance(fig_industry, str): logging.info("Followers by Industry (grouped organic/paid) plot generated.")
857
-
858
- fig_seniority = generate_followers_by_demographics_plot(
859
- sample_follower_stats_df.copy(),
860
- type_value='follower_seniority',
861
- plot_title="Followers by Seniority (Organic/Paid)"
862
- )
863
- if fig_seniority and not isinstance(fig_seniority, str): logging.info("Followers by Seniority (grouped organic/paid) plot generated.")
864
-
865
- logging.info("--- Testing Other Plot Generations (No Changes to these) ---")
866
  fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
867
- if fig_posts_activity and not isinstance(fig_posts_activity, str): logging.info("Posts activity plot generated.")
868
 
869
  fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
870
- if fig_engagement_type and not isinstance(fig_engagement_type, str): logging.info("Engagement type plot generated.")
871
 
872
- # Dummy mentions for testing
873
  mentions_data = {
874
  'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
875
  'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
876
  }
877
  sample_mentions_df = pd.DataFrame(mentions_data)
878
  fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
879
- if fig_mentions_activity and not isinstance(fig_mentions_activity, str): logging.info("Mentions activity plot generated.")
880
  fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
881
- if fig_mention_sentiment and not isinstance(fig_mention_sentiment, str): logging.info("Mention sentiment plot generated.")
882
 
883
- fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
884
- if fig_eng_rate and not isinstance(fig_eng_rate, str): logging.info("Engagement Rate Over Time plot generated.")
 
 
 
 
 
885
 
 
 
886
  fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
887
- if fig_reach and not isinstance(fig_reach, str): logging.info("Reach Over Time (Clicks) plot generated.")
888
-
889
  fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
890
- if fig_impressions and not isinstance(fig_impressions, str): logging.info("Impressions Over Time plot generated.")
891
-
892
- logging.info("--- Testing NEW Plot Generations ---")
893
  fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
894
- if fig_likes_time and not isinstance(fig_likes_time, str): logging.info("Likes Over Time plot generated.")
 
 
 
 
 
 
 
 
895
 
896
- fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy())
897
- if fig_clicks_time and not isinstance(fig_clicks_time, str): logging.info("Clicks Over Time plot generated.")
898
 
899
- fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
900
- if fig_shares_time and not isinstance(fig_shares_time, str): logging.info("Shares Over Time plot generated.")
 
 
 
 
901
 
902
- fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy())
903
- if fig_comments_time and not isinstance(fig_comments_time, str): logging.info("Comments Over Time plot generated.")
904
 
905
- fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_comments_with_sentiment_df.copy())
906
- if fig_comments_sentiment and not isinstance(fig_comments_sentiment, str): logging.info("Comments Sentiment Breakdown plot generated (with dummy comment sentiment data).")
 
 
 
 
 
 
907
 
908
- fig_comments_sentiment_no_data = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy()) # Test with df lacking the sentiment col
909
- if fig_comments_sentiment_no_data and not isinstance(fig_comments_sentiment_no_data, str) : logging.info("Comments Sentiment Breakdown plot generated (placeholder, as expected).")
 
 
 
 
 
 
 
 
910
 
911
 
912
  logging.info("Test script finished. Review plots if displayed locally or saved.")
 
5
  import base64
6
  import numpy as np
7
  import matplotlib.ticker as mticker
8
+ import ast # For safely evaluating string representations of lists
9
 
10
  # Configure logging for this module
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
 
185
  return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
186
 
187
  fig, ax = plt.subplots(figsize=(8, 5))
188
+ # Using a qualitative colormap like 'Pastel1' or 'Set3' can be good for categorical data
189
+ colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
 
 
 
190
  pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
 
191
  ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
192
  ax.set_title('Mention Sentiment Distribution')
193
  ax.axis('equal')
 
446
  ax.set_xlabel('Date')
447
  ax.set_ylabel('Engagement Rate')
448
  # Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
449
+ max_rate_val = engagement_over_time.max()
450
+ formatter_xmax = 1.0 if max_rate_val <= 1.5 and max_rate_val >=0 else 100.0 # Heuristic for 0-1 vs 0-100 scale
451
+ if max_rate_val > 1.5 and formatter_xmax == 1.0: # If data seems to be percentage but formatted as decimal
452
+ formatter_xmax = 100.0
453
+ elif max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
454
  formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
455
+
456
  ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
457
  ax.grid(True, linestyle='--', alpha=0.7)
458
  plt.xticks(rotation=45)
 
525
  df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
526
 
527
  if df_copy.empty: # After dropping NaNs for essential columns
528
+ return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
529
 
530
  impressions_over_time = df_copy.resample('D')[impressions_col].sum()
531
 
 
544
  finally:
545
  plt.close('all')
546
 
547
+ # --- New Plot Functions from User Request ---
 
548
  def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
549
+ """Generates a plot for likes over time."""
550
  title = "Reactions (Likes) Over Time"
551
  logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
 
552
  if df is None or df.empty:
553
  return create_placeholder_plot(title=title, message="No post data for likes.")
 
554
  required_cols = [date_column, likes_col]
555
+ if any(col not in df.columns for col in required_cols):
556
+ return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
 
 
557
  try:
558
  df_copy = df.copy()
559
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
560
  df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce')
561
  df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
 
562
  if df_copy.empty:
563
+ return create_placeholder_plot(title=title, message="No valid data after cleaning.")
564
+
565
+ data_over_time = df_copy.resample('D')[likes_col].sum()
 
566
  fig, ax = plt.subplots(figsize=(10, 5))
567
+ ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson')
568
  ax.set_title(title)
569
  ax.set_xlabel('Date')
570
  ax.set_ylabel('Total Likes')
 
578
  finally:
579
  plt.close('all')
580
 
 
581
  def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
582
+ """Generates a plot for clicks over time (can be same as reach if clicks are primary reach metric)."""
583
+ # This is essentially the same as generate_reach_over_time_plot if reach_col is 'clickCount'.
584
+ # For clarity, keeping it separate if user wants to distinguish or use a different column later.
585
  title = "Clicks Over Time"
586
  logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
587
+ # Reusing logic from generate_reach_over_time_plot
588
+ return generate_reach_over_time_plot(df, date_column, clicks_col)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
 
590
 
591
  def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
592
  """Generates a plot for shares over time."""
593
  title = "Shares Over Time"
594
  logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
 
595
  if df is None or df.empty:
596
  return create_placeholder_plot(title=title, message="No post data for shares.")
 
597
  required_cols = [date_column, shares_col]
598
+ if any(col not in df.columns for col in required_cols):
599
+ return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
 
 
600
  try:
601
  df_copy = df.copy()
602
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
603
  df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce')
604
  df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column)
 
605
  if df_copy.empty:
606
+ return create_placeholder_plot(title=title, message="No valid data after cleaning.")
 
 
607
 
608
+ data_over_time = df_copy.resample('D')[shares_col].sum()
609
  fig, ax = plt.subplots(figsize=(10, 5))
610
+ ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal')
611
  ax.set_title(title)
612
  ax.set_xlabel('Date')
613
  ax.set_ylabel('Total Shares')
 
621
  finally:
622
  plt.close('all')
623
 
 
624
  def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
625
  """Generates a plot for comments over time."""
626
  title = "Comments Over Time"
627
  logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
 
628
  if df is None or df.empty:
629
  return create_placeholder_plot(title=title, message="No post data for comments.")
 
630
  required_cols = [date_column, comments_col]
631
+ if any(col not in df.columns for col in required_cols):
632
+ return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
 
 
633
  try:
634
  df_copy = df.copy()
635
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
636
  df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce')
637
  df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column)
 
638
  if df_copy.empty:
639
+ return create_placeholder_plot(title=title, message="No valid data after cleaning.")
 
 
640
 
641
+ data_over_time = df_copy.resample('D')[comments_col].sum()
642
  fig, ax = plt.subplots(figsize=(10, 5))
643
+ ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold')
644
  ax.set_title(title)
645
  ax.set_xlabel('Date')
646
  ax.set_ylabel('Total Comments')
 
654
  finally:
655
  plt.close('all')
656
 
657
+ def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', date_column=None):
 
658
  """
659
  Generates a pie chart for comment sentiment distribution.
660
+ Assumes df might be post-level with an aggregated or example sentiment,
661
+ or ideally, a comment-level df with sentiment per comment.
662
+ If date_column is provided, it's for logging/context but not directly used for filtering here.
663
  """
664
  title = "Breakdown of Comments by Sentiment"
665
  logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
666
 
667
  if df is None or df.empty:
668
+ return create_placeholder_plot(title=title, message="No data for comment sentiment.")
 
 
 
669
  if sentiment_column not in df.columns:
670
+ # Check for a common alternative if the primary is missing (e.g. from post-level data)
671
+ if 'sentiment' in df.columns and sentiment_column != 'sentiment':
672
+ logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.")
673
+ sentiment_column = 'sentiment' # Use fallback
674
+ else:
675
+ return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}")
676
 
677
+ # If the sentiment column has no valid data (all NaNs, or not convertible)
678
+ if df[sentiment_column].isnull().all():
679
+ return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.")
 
680
 
681
  try:
682
  df_copy = df.copy()
683
+ # Ensure the sentiment column is treated as categorical (string)
684
  df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
685
+ sentiment_counts = df_copy[sentiment_column].value_counts().dropna() # Dropna for safety
686
 
687
  if sentiment_counts.empty or sentiment_counts.sum() == 0:
688
  return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
689
 
690
  fig, ax = plt.subplots(figsize=(8, 5))
691
+ colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
 
 
 
 
692
  pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
693
 
694
+ ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
695
  ax.set_title(title)
696
  ax.axis('equal')
697
+ plt.tight_layout()
698
+ return fig
699
+ except Exception as e:
700
+ logging.error(f"Error generating {title}: {e}", exc_info=True)
701
+ return create_placeholder_plot(title=f"{title} Error", message=str(e))
702
+ finally:
703
+ plt.close('all')
704
+
705
+ # --- NEW PLOT FUNCTIONS FOR CONTENT STRATEGY ---
706
+ def generate_post_frequency_plot(df, date_column='published_at', resample_period='D'):
707
+ """Generates a plot for post frequency over time (e.g., daily, weekly, monthly)."""
708
+ title = f"Post Frequency Over Time ({resample_period})"
709
+ logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
710
+
711
+ if df is None or df.empty:
712
+ return create_placeholder_plot(title=title, message="No data available.")
713
+ if date_column not in df.columns:
714
+ return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.")
715
+
716
+ try:
717
+ df_copy = df.copy()
718
+ if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
719
+ df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
720
+
721
+ df_copy = df_copy.dropna(subset=[date_column])
722
+ if df_copy.empty:
723
+ return create_placeholder_plot(title=title, message="No valid date entries found.")
724
+
725
+ post_frequency = df_copy.set_index(date_column).resample(resample_period).size()
726
+
727
+ if post_frequency.empty:
728
+ return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.")
729
+
730
+ fig, ax = plt.subplots(figsize=(10, 5))
731
+ post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None)
732
+ ax.set_title(title)
733
+ ax.set_xlabel('Date' if resample_period == 'D' else 'Period')
734
+ ax.set_ylabel('Number of Posts')
735
+ ax.grid(True, linestyle='--', alpha=0.7)
736
+ plt.xticks(rotation=45)
737
+ plt.tight_layout()
738
+ logging.info(f"Successfully generated {title} plot.")
739
+ return fig
740
+ except Exception as e:
741
+ logging.error(f"Error generating {title}: {e}", exc_info=True)
742
+ return create_placeholder_plot(title=f"{title} Error", message=str(e))
743
+ finally:
744
+ plt.close('all')
745
+
746
+ def generate_content_format_breakdown_plot(df, format_col='media_type'):
747
+ """Generates a bar chart for breakdown of content by format."""
748
+ title = "Breakdown of Content by Format"
749
+ logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}")
750
+
751
+ if df is None or df.empty:
752
+ return create_placeholder_plot(title=title, message="No data available.")
753
+ if format_col not in df.columns:
754
+ return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}")
755
+
756
+ try:
757
+ df_copy = df.copy()
758
+ format_counts = df_copy[format_col].value_counts().dropna()
759
+
760
+ if format_counts.empty:
761
+ return create_placeholder_plot(title=title, message="No content format data available.")
762
+
763
+ fig, ax = plt.subplots(figsize=(8, 6))
764
+ format_counts.plot(kind='bar', ax=ax, color='skyblue')
765
+ ax.set_title(title)
766
+ ax.set_xlabel('Media Type')
767
+ ax.set_ylabel('Number of Posts')
768
+ ax.grid(axis='y', linestyle='--', alpha=0.7)
769
+ plt.xticks(rotation=45, ha="right")
770
+ plt.tight_layout()
771
+
772
+ # Add counts on top of bars
773
+ for i, v in enumerate(format_counts):
774
+ ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom')
775
+
776
+ logging.info(f"Successfully generated {title} plot.")
777
+ return fig
778
+ except Exception as e:
779
+ logging.error(f"Error generating {title}: {e}", exc_info=True)
780
+ return create_placeholder_plot(title=f"{title} Error", message=str(e))
781
+ finally:
782
+ plt.close('all')
783
+
784
+ def _parse_eb_label(label_data):
785
+ """Helper to parse eb_labels which might be lists or string representations of lists."""
786
+ if isinstance(label_data, list):
787
+ return label_data
788
+ if isinstance(label_data, str):
789
+ try:
790
+ # Try to evaluate as a list
791
+ parsed = ast.literal_eval(label_data)
792
+ if isinstance(parsed, list):
793
+ return parsed
794
+ # If it's a single string not in list format, treat as a single label
795
+ return [str(parsed)]
796
+ except (ValueError, SyntaxError):
797
+ # If not a list string, treat the whole string as one label
798
+ return [label_data] if label_data.strip() else []
799
+ if pd.isna(label_data):
800
+ return []
801
+ return [] # Default for other types
802
+
803
+ def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
804
+ """Generates a horizontal bar chart for breakdown of content by topics."""
805
+ title = f"Breakdown of Content by Topics (Top {top_n})"
806
+ logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}")
807
+
808
+ if df is None or df.empty:
809
+ return create_placeholder_plot(title=title, message="No data available.")
810
+ if topics_col not in df.columns:
811
+ return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}")
812
+
813
+ try:
814
+ df_copy = df.copy()
815
+
816
+ # Apply parsing and explode
817
+ parsed_labels = df_copy[topics_col].apply(_parse_eb_label)
818
+ exploded_labels = parsed_labels.explode().dropna()
819
+
820
+ if exploded_labels.empty:
821
+ return create_placeholder_plot(title=title, message="No topic data found after processing labels.")
822
+
823
+ topic_counts = exploded_labels.value_counts()
824
+
825
+ if topic_counts.empty:
826
+ return create_placeholder_plot(title=title, message="No topics to display after counting.")
827
+
828
+ # Take top N and sort for plotting (descending for horizontal bar)
829
+ top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True)
830
+
831
+ fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6))
832
+ top_topics.plot(kind='barh', ax=ax, color='mediumseagreen')
833
+ ax.set_title(title)
834
+ ax.set_xlabel('Number of Posts')
835
+ ax.set_ylabel('Topic')
836
+
837
+ # Add counts next to bars
838
+ for i, (topic, count) in enumerate(top_topics.items()):
839
+ ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center')
840
+
841
  plt.tight_layout()
842
  logging.info(f"Successfully generated {title} plot.")
843
  return fig
 
851
  if __name__ == '__main__':
852
  # Create dummy data for testing
853
  posts_data = {
854
+ 'id': [f'post{i}' for i in range(1, 8)], # Increased to 7 for more data
855
  'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
856
  'likeCount': [10, 5, 12, 8, 15, 3, 20],
857
  'commentCount': [2, 1, 3, 1, 4, 0, 5],
858
  'shareCount': [1, 0, 1, 1, 2, 0, 1],
859
  'clickCount': [20, 15, 30, 22, 40, 10, 50],
860
  'impressionCount': [200, 150, 300, 220, 400, 100, 500],
861
+ 'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08],
862
+ 'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'], # New column
863
+ 'eb_labels': [ # New column with various formats
864
+ "['AI', 'Tech']",
865
+ ['Innovation'],
866
+ 'General',
867
+ None,
868
+ ['Tech', 'Future'],
869
+ "['AI', 'Development']",
870
+ ['Tech']
871
+ ],
872
+ 'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive'] # For comment sentiment plot
873
  }
874
  sample_merged_posts_df = pd.DataFrame(posts_data)
875
 
 
 
 
 
 
 
 
 
 
 
876
  # Updated Follower Stats Data
877
  follower_data = {
878
  'follower_count_type': [
 
882
  'follower_industry', 'follower_industry',
883
  'follower_seniority', 'follower_seniority'
884
  ],
 
885
  'category_name': [
886
  '2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
887
+ 'USA', 'Canada', 'UK', # Geo
888
+ 'Engineering', 'Sales', # Function/Role
889
+ 'Tech', 'Finance', # Industry
890
+ 'Senior', 'Junior' # Seniority
891
  ],
892
  'follower_count_organic': [
893
+ 100, 110, 125, # Organic monthly gains
894
+ 500, 300, 150, # Organic Geo counts
895
+ 400, 200, # Organic Role counts
896
+ 250, 180, # Organic Industry counts
897
+ 300, 220 # Organic Seniority counts
898
  ],
899
  'follower_count_paid': [
900
+ 20, 30, 25, # Paid monthly gains
901
+ 50, 40, 60, # Paid Geo counts
902
+ 30, 20, # Paid Role counts
903
+ 45, 35, # Paid Industry counts
904
+ 60, 40 # Paid Seniority counts
905
  ]
906
  }
907
  sample_follower_stats_df = pd.DataFrame(follower_data)
908
 
909
+ logging.info("--- Testing Existing Plot Generations ---")
910
+ # ... (keep existing tests for older plots) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
911
  fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
912
+ if fig_posts_activity: logging.info("Posts activity plot generated.")
913
 
914
  fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
915
+ if fig_engagement_type: logging.info("Engagement type plot generated.")
916
 
 
917
  mentions_data = {
918
  'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
919
  'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
920
  }
921
  sample_mentions_df = pd.DataFrame(mentions_data)
922
  fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
923
+ if fig_mentions_activity: logging.info("Mentions activity plot generated.")
924
  fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
925
+ if fig_mention_sentiment: logging.info("Mention sentiment plot generated.")
926
 
927
+ fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly')
928
+ if fig_followers_count: logging.info("Followers Count Over Time plot generated.")
929
+ fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), type_value='follower_gains_monthly')
930
+ if fig_followers_rate: logging.info("Followers Growth Rate plot generated.")
931
+ fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
932
+ if fig_geo: logging.info("Followers by Location plot generated.")
933
+ # ... add other follower demographic tests ...
934
 
935
+ fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
936
+ if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
937
  fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
938
+ if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
 
939
  fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
940
+ if fig_impressions: logging.info("Impressions Over Time plot generated.")
941
+
 
942
  fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
943
+ if fig_likes_time: logging.info("Likes Over Time plot generated.")
944
+ fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy()) # Uses reach logic
945
+ if fig_clicks_time: logging.info("Clicks Over Time plot generated.")
946
+ fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
947
+ if fig_shares_time: logging.info("Shares Over Time plot generated.")
948
+ fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy())
949
+ if fig_comments_time: logging.info("Comments Over Time plot generated.")
950
+ fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy(), sentiment_column='comment_sentiment')
951
+ if fig_comments_sentiment: logging.info("Comments Sentiment Breakdown plot generated.")
952
 
 
 
953
 
954
+ logging.info("--- Testing NEW Plot Generations for Content Strategy ---")
955
+ fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D')
956
+ if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.")
957
+
958
+ fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W')
959
+ if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.")
960
 
961
+ fig_content_format = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='media_type')
962
+ if fig_content_format: logging.info("Content Format Breakdown plot generated.")
963
 
964
+ fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5)
965
+ if fig_content_topics: logging.info("Content Topic Breakdown plot generated.")
966
+
967
+ # Test with missing columns / empty data for new plots
968
+ logging.info("--- Testing NEW Plot Generations with Edge Cases ---")
969
+ empty_df = pd.DataFrame()
970
+ fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy())
971
+ if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.")
972
 
973
+ fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col')
974
+ if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.")
975
+
976
+ fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels') # eb_labels won't exist
977
+ if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.")
978
+
979
+ df_no_topics_data = sample_merged_posts_df.copy()
980
+ df_no_topics_data['eb_labels'] = None
981
+ fig_content_topics_all_none = generate_content_topic_breakdown_plot(df_no_topics_data, topics_col='eb_labels')
982
+ if fig_content_topics_all_none: logging.info("Content Topic (all None labels) placeholder generated.")
983
 
984
 
985
  logging.info("Test script finished. Review plots if displayed locally or saved.")