Spaces:
Running
Running
Update analytics_plot_generator.py
Browse files- analytics_plot_generator.py +283 -31
analytics_plot_generator.py
CHANGED
@@ -184,8 +184,13 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
|
184 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
185 |
|
186 |
fig, ax = plt.subplots(figsize=(8, 5))
|
187 |
-
|
|
|
|
|
|
|
|
|
188 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
|
|
189 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
190 |
ax.set_title('Mention Sentiment Distribution')
|
191 |
ax.axis('equal')
|
@@ -444,10 +449,10 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
|
|
444 |
ax.set_xlabel('Date')
|
445 |
ax.set_ylabel('Engagement Rate')
|
446 |
# Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
|
447 |
-
max_rate_val = engagement_over_time.max()
|
448 |
-
formatter_xmax = 1.0 if max_rate_val <= 1.5 else 100.0 # Heuristic: if max is small, assume 0-1 scale
|
449 |
if max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
|
450 |
-
|
451 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
452 |
ax.grid(True, linestyle='--', alpha=0.7)
|
453 |
plt.xticks(rotation=45)
|
@@ -520,7 +525,7 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
|
|
520 |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
521 |
|
522 |
if df_copy.empty: # After dropping NaNs for essential columns
|
523 |
-
|
524 |
|
525 |
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
526 |
|
@@ -539,21 +544,248 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
|
|
539 |
finally:
|
540 |
plt.close('all')
|
541 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
542 |
|
543 |
if __name__ == '__main__':
|
544 |
# Create dummy data for testing
|
545 |
posts_data = {
|
546 |
-
'id': [f'post{i}' for i in range(1,
|
547 |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
548 |
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
549 |
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
550 |
-
'shareCount': [1, 0, 1, 1, 2, 0, 1],
|
551 |
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
552 |
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
553 |
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08]
|
554 |
}
|
555 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
556 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
557 |
# Updated Follower Stats Data
|
558 |
follower_data = {
|
559 |
'follower_count_type': [
|
@@ -566,24 +798,24 @@ if __name__ == '__main__':
|
|
566 |
# 'category_name' now holds dates for time-series, and actual categories for demographics
|
567 |
'category_name': [
|
568 |
'2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
|
569 |
-
'USA', 'Canada', 'UK',
|
570 |
'Engineering', 'Sales', # Function/Role
|
571 |
'Tech', 'Finance', # Industry
|
572 |
'Senior', 'Junior' # Seniority
|
573 |
],
|
574 |
'follower_count_organic': [
|
575 |
-
100, 110, 125,
|
576 |
-
500, 300, 150,
|
577 |
-
400, 200,
|
578 |
-
250, 180,
|
579 |
-
300, 220
|
580 |
],
|
581 |
'follower_count_paid': [
|
582 |
-
20, 30, 25,
|
583 |
-
50, 40, 60,
|
584 |
-
30, 20,
|
585 |
-
45, 35,
|
586 |
-
60, 40
|
587 |
]
|
588 |
}
|
589 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
@@ -594,48 +826,48 @@ if __name__ == '__main__':
|
|
594 |
sample_follower_stats_df.copy(),
|
595 |
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
596 |
)
|
597 |
-
if fig_followers_count: logging.info("Followers Count Over Time (monthly, organic/paid) plot generated.")
|
598 |
|
599 |
fig_followers_rate = generate_followers_growth_rate_plot(
|
600 |
sample_follower_stats_df.copy(),
|
601 |
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
602 |
)
|
603 |
-
if fig_followers_rate: logging.info("Followers Growth Rate (monthly, organic/paid) plot generated.")
|
604 |
|
605 |
fig_geo = generate_followers_by_demographics_plot(
|
606 |
sample_follower_stats_df.copy(),
|
607 |
type_value='follower_geo', # category_col defaults to 'category_name'
|
608 |
plot_title="Followers by Location (Organic/Paid)"
|
609 |
)
|
610 |
-
if fig_geo: logging.info("Followers by Location (grouped organic/paid) plot generated.")
|
611 |
|
612 |
fig_role = generate_followers_by_demographics_plot(
|
613 |
sample_follower_stats_df.copy(),
|
614 |
type_value='follower_function',
|
615 |
plot_title="Followers by Role (Organic/Paid)"
|
616 |
)
|
617 |
-
if fig_role: logging.info("Followers by Role (grouped organic/paid) plot generated.")
|
618 |
|
619 |
fig_industry = generate_followers_by_demographics_plot(
|
620 |
sample_follower_stats_df.copy(),
|
621 |
type_value='follower_industry',
|
622 |
plot_title="Followers by Industry (Organic/Paid)"
|
623 |
)
|
624 |
-
if fig_industry: logging.info("Followers by Industry (grouped organic/paid) plot generated.")
|
625 |
|
626 |
fig_seniority = generate_followers_by_demographics_plot(
|
627 |
sample_follower_stats_df.copy(),
|
628 |
type_value='follower_seniority',
|
629 |
plot_title="Followers by Seniority (Organic/Paid)"
|
630 |
)
|
631 |
-
if fig_seniority: logging.info("Followers by Seniority (grouped organic/paid) plot generated.")
|
632 |
|
633 |
logging.info("--- Testing Other Plot Generations (No Changes to these) ---")
|
634 |
fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
|
635 |
-
if fig_posts_activity: logging.info("Posts activity plot generated.")
|
636 |
|
637 |
fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
|
638 |
-
if fig_engagement_type: logging.info("Engagement type plot generated.")
|
639 |
|
640 |
# Dummy mentions for testing
|
641 |
mentions_data = {
|
@@ -644,17 +876,37 @@ if __name__ == '__main__':
|
|
644 |
}
|
645 |
sample_mentions_df = pd.DataFrame(mentions_data)
|
646 |
fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
|
647 |
-
if fig_mentions_activity: logging.info("Mentions activity plot generated.")
|
648 |
fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
|
649 |
-
if fig_mention_sentiment: logging.info("Mention sentiment plot generated.")
|
650 |
|
651 |
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
|
652 |
-
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
|
653 |
|
654 |
fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
|
655 |
-
if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
|
656 |
|
657 |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
658 |
-
if fig_impressions: logging.info("Impressions Over Time plot generated.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
659 |
|
660 |
logging.info("Test script finished. Review plots if displayed locally or saved.")
|
|
|
184 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
185 |
|
186 |
fig, ax = plt.subplots(figsize=(8, 5))
|
187 |
+
# Using a more distinct color map if available, or fallback
|
188 |
+
try:
|
189 |
+
colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
|
190 |
+
except ValueError: # Fallback if Pastel1 doesn't have enough colors or isn't available
|
191 |
+
colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
|
192 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
193 |
+
|
194 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
195 |
ax.set_title('Mention Sentiment Distribution')
|
196 |
ax.axis('equal')
|
|
|
449 |
ax.set_xlabel('Date')
|
450 |
ax.set_ylabel('Engagement Rate')
|
451 |
# Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
|
452 |
+
max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0
|
453 |
+
formatter_xmax = 1.0 if max_rate_val <= 1.5 and max_rate_val !=0 else 100.0 # Heuristic: if max is small, assume 0-1 scale
|
454 |
if max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
|
455 |
+
formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
|
456 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
457 |
ax.grid(True, linestyle='--', alpha=0.7)
|
458 |
plt.xticks(rotation=45)
|
|
|
525 |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
526 |
|
527 |
if df_copy.empty: # After dropping NaNs for essential columns
|
528 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
|
529 |
|
530 |
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
531 |
|
|
|
544 |
finally:
|
545 |
plt.close('all')
|
546 |
|
547 |
+
# --- NEW PLOT FUNCTIONS ---
|
548 |
+
|
549 |
+
def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
|
550 |
+
"""Generates a plot for likes (reactions) over time."""
|
551 |
+
title = "Reactions (Likes) Over Time"
|
552 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
553 |
+
|
554 |
+
if df is None or df.empty:
|
555 |
+
return create_placeholder_plot(title=title, message="No post data for likes.")
|
556 |
+
|
557 |
+
required_cols = [date_column, likes_col]
|
558 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
559 |
+
if missing_cols:
|
560 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
561 |
+
|
562 |
+
try:
|
563 |
+
df_copy = df.copy()
|
564 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
565 |
+
df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce')
|
566 |
+
df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
|
567 |
+
|
568 |
+
if df_copy.empty:
|
569 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for likes plot.")
|
570 |
+
|
571 |
+
likes_over_time = df_copy.resample('D')[likes_col].sum()
|
572 |
+
|
573 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
574 |
+
ax.plot(likes_over_time.index, likes_over_time.values, marker='.', linestyle='-', color='crimson')
|
575 |
+
ax.set_title(title)
|
576 |
+
ax.set_xlabel('Date')
|
577 |
+
ax.set_ylabel('Total Likes')
|
578 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
579 |
+
plt.xticks(rotation=45)
|
580 |
+
plt.tight_layout()
|
581 |
+
return fig
|
582 |
+
except Exception as e:
|
583 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
584 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
585 |
+
finally:
|
586 |
+
plt.close('all')
|
587 |
+
|
588 |
+
|
589 |
+
def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
|
590 |
+
"""Generates a plot for clicks over time (distinct from general reach if needed)."""
|
591 |
+
title = "Clicks Over Time"
|
592 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
593 |
+
|
594 |
+
if df is None or df.empty:
|
595 |
+
return create_placeholder_plot(title=title, message="No post data for clicks.")
|
596 |
+
|
597 |
+
required_cols = [date_column, clicks_col]
|
598 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
599 |
+
if missing_cols:
|
600 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
601 |
+
|
602 |
+
try:
|
603 |
+
df_copy = df.copy()
|
604 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
605 |
+
df_copy[clicks_col] = pd.to_numeric(df_copy[clicks_col], errors='coerce')
|
606 |
+
df_copy = df_copy.dropna(subset=[date_column, clicks_col]).set_index(date_column)
|
607 |
+
|
608 |
+
if df_copy.empty:
|
609 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for clicks plot.")
|
610 |
+
|
611 |
+
clicks_over_time = df_copy.resample('D')[clicks_col].sum()
|
612 |
+
|
613 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
614 |
+
ax.plot(clicks_over_time.index, clicks_over_time.values, marker='.', linestyle='-', color='teal')
|
615 |
+
ax.set_title(title)
|
616 |
+
ax.set_xlabel('Date')
|
617 |
+
ax.set_ylabel('Total Clicks')
|
618 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
619 |
+
plt.xticks(rotation=45)
|
620 |
+
plt.tight_layout()
|
621 |
+
return fig
|
622 |
+
except Exception as e:
|
623 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
624 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
625 |
+
finally:
|
626 |
+
plt.close('all')
|
627 |
+
|
628 |
+
|
629 |
+
def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
|
630 |
+
"""Generates a plot for shares over time."""
|
631 |
+
title = "Shares Over Time"
|
632 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
633 |
+
|
634 |
+
if df is None or df.empty:
|
635 |
+
return create_placeholder_plot(title=title, message="No post data for shares.")
|
636 |
+
|
637 |
+
required_cols = [date_column, shares_col]
|
638 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
639 |
+
if missing_cols:
|
640 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
641 |
+
|
642 |
+
try:
|
643 |
+
df_copy = df.copy()
|
644 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
645 |
+
df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce')
|
646 |
+
df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column)
|
647 |
+
|
648 |
+
if df_copy.empty:
|
649 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for shares plot.")
|
650 |
+
|
651 |
+
shares_over_time = df_copy.resample('D')[shares_col].sum()
|
652 |
+
|
653 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
654 |
+
ax.plot(shares_over_time.index, shares_over_time.values, marker='.', linestyle='-', color='goldenrod')
|
655 |
+
ax.set_title(title)
|
656 |
+
ax.set_xlabel('Date')
|
657 |
+
ax.set_ylabel('Total Shares')
|
658 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
659 |
+
plt.xticks(rotation=45)
|
660 |
+
plt.tight_layout()
|
661 |
+
return fig
|
662 |
+
except Exception as e:
|
663 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
664 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
665 |
+
finally:
|
666 |
+
plt.close('all')
|
667 |
+
|
668 |
+
|
669 |
+
def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
|
670 |
+
"""Generates a plot for comments over time."""
|
671 |
+
title = "Comments Over Time"
|
672 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
673 |
+
|
674 |
+
if df is None or df.empty:
|
675 |
+
return create_placeholder_plot(title=title, message="No post data for comments.")
|
676 |
+
|
677 |
+
required_cols = [date_column, comments_col]
|
678 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
679 |
+
if missing_cols:
|
680 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
681 |
+
|
682 |
+
try:
|
683 |
+
df_copy = df.copy()
|
684 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
685 |
+
df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce')
|
686 |
+
df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column)
|
687 |
+
|
688 |
+
if df_copy.empty:
|
689 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for comments plot.")
|
690 |
+
|
691 |
+
comments_over_time = df_copy.resample('D')[comments_col].sum()
|
692 |
+
|
693 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
694 |
+
ax.plot(comments_over_time.index, comments_over_time.values, marker='.', linestyle='-', color='forestgreen')
|
695 |
+
ax.set_title(title)
|
696 |
+
ax.set_xlabel('Date')
|
697 |
+
ax.set_ylabel('Total Comments')
|
698 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
699 |
+
plt.xticks(rotation=45)
|
700 |
+
plt.tight_layout()
|
701 |
+
return fig
|
702 |
+
except Exception as e:
|
703 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
704 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
705 |
+
finally:
|
706 |
+
plt.close('all')
|
707 |
+
|
708 |
+
|
709 |
+
def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', text_column_for_check='comment_text'):
|
710 |
+
"""
|
711 |
+
Generates a pie chart for comment sentiment distribution.
|
712 |
+
Expects a DataFrame with a 'sentiment_column' for sentiment labels of individual comments.
|
713 |
+
'text_column_for_check' is used to infer if the DataFrame might contain comment-level data.
|
714 |
+
"""
|
715 |
+
title = "Breakdown of Comments by Sentiment"
|
716 |
+
logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
|
717 |
+
|
718 |
+
if df is None or df.empty:
|
719 |
+
return create_placeholder_plot(title=title, message="No data available for comment sentiment analysis.")
|
720 |
+
|
721 |
+
# Check if the expected sentiment column exists.
|
722 |
+
# Also check for a text column as a heuristic, as sentiment often comes with text.
|
723 |
+
if sentiment_column not in df.columns:
|
724 |
+
msg = (f"Column '{sentiment_column}' for comment sentiment not found. "
|
725 |
+
f"This plot requires a DataFrame with pre-analyzed comment sentiments. "
|
726 |
+
f"Available columns: {df.columns.tolist()}")
|
727 |
+
logging.warning(msg)
|
728 |
+
return create_placeholder_plot(title=title, message=msg)
|
729 |
+
|
730 |
+
# Optional: A light check if it seems like post-level data instead of comment-level
|
731 |
+
# if text_column_for_check not in df.columns and 'commentCount' in df.columns:
|
732 |
+
# logging.warning(f"'{sentiment_column}' found, but '{text_column_for_check}' is missing. Ensure '{sentiment_column}' refers to individual comment sentiments, not post sentiment.")
|
733 |
+
|
734 |
+
|
735 |
+
try:
|
736 |
+
df_copy = df.copy()
|
737 |
+
# Ensure sentiment column is treated as categorical/string
|
738 |
+
df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
|
739 |
+
sentiment_counts = df_copy[sentiment_column].value_counts()
|
740 |
+
|
741 |
+
if sentiment_counts.empty or sentiment_counts.sum() == 0:
|
742 |
+
return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
|
743 |
+
|
744 |
+
fig, ax = plt.subplots(figsize=(8, 5))
|
745 |
+
try:
|
746 |
+
colors_map = plt.cm.get_cmap('Pastel2', len(sentiment_counts))
|
747 |
+
except ValueError:
|
748 |
+
colors_map = plt.cm.get_cmap('Accent', len(sentiment_counts))
|
749 |
+
|
750 |
+
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
751 |
+
|
752 |
+
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140, colors=pie_colors)
|
753 |
+
ax.set_title(title)
|
754 |
+
ax.axis('equal')
|
755 |
+
plt.tight_layout()
|
756 |
+
logging.info(f"Successfully generated {title} plot.")
|
757 |
+
return fig
|
758 |
+
except Exception as e:
|
759 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
760 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
761 |
+
finally:
|
762 |
+
plt.close('all')
|
763 |
+
|
764 |
|
765 |
if __name__ == '__main__':
|
766 |
# Create dummy data for testing
|
767 |
posts_data = {
|
768 |
+
'id': [f'post{i}' for i in range(1, 8)], # Extended to 7 for more data points
|
769 |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
770 |
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
771 |
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
772 |
+
'shareCount': [1, 0, 1, 1, 2, 0, 1],
|
773 |
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
774 |
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
775 |
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08]
|
776 |
}
|
777 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
778 |
|
779 |
+
# Dummy data for comments with sentiment (hypothetical)
|
780 |
+
comments_sentiment_data = {
|
781 |
+
'comment_id': range(10),
|
782 |
+
'post_id': ['post1']*3 + ['post2']*2 + ['post3']*5,
|
783 |
+
'comment_text': ['Great post!', 'I disagree.', 'Nice work.', 'Interesting.', 'Could be better.', 'Loved it!', 'Not sure.', 'Thanks!', 'Helpful.', 'Okay.'],
|
784 |
+
'comment_sentiment': ['Positive', 'Negative', 'Positive', 'Neutral', 'Negative', 'Positive', 'Neutral', 'Positive', 'Positive', 'Neutral']
|
785 |
+
}
|
786 |
+
sample_comments_with_sentiment_df = pd.DataFrame(comments_sentiment_data)
|
787 |
+
|
788 |
+
|
789 |
# Updated Follower Stats Data
|
790 |
follower_data = {
|
791 |
'follower_count_type': [
|
|
|
798 |
# 'category_name' now holds dates for time-series, and actual categories for demographics
|
799 |
'category_name': [
|
800 |
'2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
|
801 |
+
'USA', 'Canada', 'UK', # Geo
|
802 |
'Engineering', 'Sales', # Function/Role
|
803 |
'Tech', 'Finance', # Industry
|
804 |
'Senior', 'Junior' # Seniority
|
805 |
],
|
806 |
'follower_count_organic': [
|
807 |
+
100, 110, 125, # Organic monthly gains
|
808 |
+
500, 300, 150, # Organic Geo counts
|
809 |
+
400, 200, # Organic Role counts
|
810 |
+
250, 180, # Organic Industry counts
|
811 |
+
300, 220 # Organic Seniority counts
|
812 |
],
|
813 |
'follower_count_paid': [
|
814 |
+
20, 30, 25, # Paid monthly gains
|
815 |
+
50, 40, 60, # Paid Geo counts
|
816 |
+
30, 20, # Paid Role counts
|
817 |
+
45, 35, # Paid Industry counts
|
818 |
+
60, 40 # Paid Seniority counts
|
819 |
]
|
820 |
}
|
821 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
|
|
826 |
sample_follower_stats_df.copy(),
|
827 |
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
828 |
)
|
829 |
+
if fig_followers_count and not isinstance(fig_followers_count, str): logging.info("Followers Count Over Time (monthly, organic/paid) plot generated.")
|
830 |
|
831 |
fig_followers_rate = generate_followers_growth_rate_plot(
|
832 |
sample_follower_stats_df.copy(),
|
833 |
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
834 |
)
|
835 |
+
if fig_followers_rate and not isinstance(fig_followers_rate, str): logging.info("Followers Growth Rate (monthly, organic/paid) plot generated.")
|
836 |
|
837 |
fig_geo = generate_followers_by_demographics_plot(
|
838 |
sample_follower_stats_df.copy(),
|
839 |
type_value='follower_geo', # category_col defaults to 'category_name'
|
840 |
plot_title="Followers by Location (Organic/Paid)"
|
841 |
)
|
842 |
+
if fig_geo and not isinstance(fig_geo, str): logging.info("Followers by Location (grouped organic/paid) plot generated.")
|
843 |
|
844 |
fig_role = generate_followers_by_demographics_plot(
|
845 |
sample_follower_stats_df.copy(),
|
846 |
type_value='follower_function',
|
847 |
plot_title="Followers by Role (Organic/Paid)"
|
848 |
)
|
849 |
+
if fig_role and not isinstance(fig_role, str): logging.info("Followers by Role (grouped organic/paid) plot generated.")
|
850 |
|
851 |
fig_industry = generate_followers_by_demographics_plot(
|
852 |
sample_follower_stats_df.copy(),
|
853 |
type_value='follower_industry',
|
854 |
plot_title="Followers by Industry (Organic/Paid)"
|
855 |
)
|
856 |
+
if fig_industry and not isinstance(fig_industry, str): logging.info("Followers by Industry (grouped organic/paid) plot generated.")
|
857 |
|
858 |
fig_seniority = generate_followers_by_demographics_plot(
|
859 |
sample_follower_stats_df.copy(),
|
860 |
type_value='follower_seniority',
|
861 |
plot_title="Followers by Seniority (Organic/Paid)"
|
862 |
)
|
863 |
+
if fig_seniority and not isinstance(fig_seniority, str): logging.info("Followers by Seniority (grouped organic/paid) plot generated.")
|
864 |
|
865 |
logging.info("--- Testing Other Plot Generations (No Changes to these) ---")
|
866 |
fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
|
867 |
+
if fig_posts_activity and not isinstance(fig_posts_activity, str): logging.info("Posts activity plot generated.")
|
868 |
|
869 |
fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
|
870 |
+
if fig_engagement_type and not isinstance(fig_engagement_type, str): logging.info("Engagement type plot generated.")
|
871 |
|
872 |
# Dummy mentions for testing
|
873 |
mentions_data = {
|
|
|
876 |
}
|
877 |
sample_mentions_df = pd.DataFrame(mentions_data)
|
878 |
fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
|
879 |
+
if fig_mentions_activity and not isinstance(fig_mentions_activity, str): logging.info("Mentions activity plot generated.")
|
880 |
fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
|
881 |
+
if fig_mention_sentiment and not isinstance(fig_mention_sentiment, str): logging.info("Mention sentiment plot generated.")
|
882 |
|
883 |
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
|
884 |
+
if fig_eng_rate and not isinstance(fig_eng_rate, str): logging.info("Engagement Rate Over Time plot generated.")
|
885 |
|
886 |
fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
|
887 |
+
if fig_reach and not isinstance(fig_reach, str): logging.info("Reach Over Time (Clicks) plot generated.")
|
888 |
|
889 |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
890 |
+
if fig_impressions and not isinstance(fig_impressions, str): logging.info("Impressions Over Time plot generated.")
|
891 |
+
|
892 |
+
logging.info("--- Testing NEW Plot Generations ---")
|
893 |
+
fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
|
894 |
+
if fig_likes_time and not isinstance(fig_likes_time, str): logging.info("Likes Over Time plot generated.")
|
895 |
+
|
896 |
+
fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy())
|
897 |
+
if fig_clicks_time and not isinstance(fig_clicks_time, str): logging.info("Clicks Over Time plot generated.")
|
898 |
+
|
899 |
+
fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
|
900 |
+
if fig_shares_time and not isinstance(fig_shares_time, str): logging.info("Shares Over Time plot generated.")
|
901 |
+
|
902 |
+
fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy())
|
903 |
+
if fig_comments_time and not isinstance(fig_comments_time, str): logging.info("Comments Over Time plot generated.")
|
904 |
+
|
905 |
+
fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_comments_with_sentiment_df.copy())
|
906 |
+
if fig_comments_sentiment and not isinstance(fig_comments_sentiment, str): logging.info("Comments Sentiment Breakdown plot generated (with dummy comment sentiment data).")
|
907 |
+
|
908 |
+
fig_comments_sentiment_no_data = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy()) # Test with df lacking the sentiment col
|
909 |
+
if fig_comments_sentiment_no_data and not isinstance(fig_comments_sentiment_no_data, str) : logging.info("Comments Sentiment Breakdown plot generated (placeholder, as expected).")
|
910 |
+
|
911 |
|
912 |
logging.info("Test script finished. Review plots if displayed locally or saved.")
|