Spaces:
Running
Running
Update analytics_plot_generator.py
Browse files- analytics_plot_generator.py +283 -31
analytics_plot_generator.py
CHANGED
|
@@ -184,8 +184,13 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
|
| 184 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
| 185 |
|
| 186 |
fig, ax = plt.subplots(figsize=(8, 5))
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
|
|
|
| 189 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
| 190 |
ax.set_title('Mention Sentiment Distribution')
|
| 191 |
ax.axis('equal')
|
|
@@ -444,10 +449,10 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
|
|
| 444 |
ax.set_xlabel('Date')
|
| 445 |
ax.set_ylabel('Engagement Rate')
|
| 446 |
# Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
|
| 447 |
-
max_rate_val = engagement_over_time.max()
|
| 448 |
-
formatter_xmax = 1.0 if max_rate_val <= 1.5 else 100.0 # Heuristic: if max is small, assume 0-1 scale
|
| 449 |
if max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
|
| 450 |
-
|
| 451 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
| 452 |
ax.grid(True, linestyle='--', alpha=0.7)
|
| 453 |
plt.xticks(rotation=45)
|
|
@@ -520,7 +525,7 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
|
|
| 520 |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
| 521 |
|
| 522 |
if df_copy.empty: # After dropping NaNs for essential columns
|
| 523 |
-
|
| 524 |
|
| 525 |
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
| 526 |
|
|
@@ -539,21 +544,248 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
|
|
| 539 |
finally:
|
| 540 |
plt.close('all')
|
| 541 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
|
| 543 |
if __name__ == '__main__':
|
| 544 |
# Create dummy data for testing
|
| 545 |
posts_data = {
|
| 546 |
-
'id': [f'post{i}' for i in range(1,
|
| 547 |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
| 548 |
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
| 549 |
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
| 550 |
-
'shareCount': [1, 0, 1, 1, 2, 0, 1],
|
| 551 |
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
| 552 |
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
| 553 |
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08]
|
| 554 |
}
|
| 555 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
| 556 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 557 |
# Updated Follower Stats Data
|
| 558 |
follower_data = {
|
| 559 |
'follower_count_type': [
|
|
@@ -566,24 +798,24 @@ if __name__ == '__main__':
|
|
| 566 |
# 'category_name' now holds dates for time-series, and actual categories for demographics
|
| 567 |
'category_name': [
|
| 568 |
'2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
|
| 569 |
-
'USA', 'Canada', 'UK',
|
| 570 |
'Engineering', 'Sales', # Function/Role
|
| 571 |
'Tech', 'Finance', # Industry
|
| 572 |
'Senior', 'Junior' # Seniority
|
| 573 |
],
|
| 574 |
'follower_count_organic': [
|
| 575 |
-
100, 110, 125,
|
| 576 |
-
500, 300, 150,
|
| 577 |
-
400, 200,
|
| 578 |
-
250, 180,
|
| 579 |
-
300, 220
|
| 580 |
],
|
| 581 |
'follower_count_paid': [
|
| 582 |
-
20, 30, 25,
|
| 583 |
-
50, 40, 60,
|
| 584 |
-
30, 20,
|
| 585 |
-
45, 35,
|
| 586 |
-
60, 40
|
| 587 |
]
|
| 588 |
}
|
| 589 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
|
@@ -594,48 +826,48 @@ if __name__ == '__main__':
|
|
| 594 |
sample_follower_stats_df.copy(),
|
| 595 |
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
| 596 |
)
|
| 597 |
-
if fig_followers_count: logging.info("Followers Count Over Time (monthly, organic/paid) plot generated.")
|
| 598 |
|
| 599 |
fig_followers_rate = generate_followers_growth_rate_plot(
|
| 600 |
sample_follower_stats_df.copy(),
|
| 601 |
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
| 602 |
)
|
| 603 |
-
if fig_followers_rate: logging.info("Followers Growth Rate (monthly, organic/paid) plot generated.")
|
| 604 |
|
| 605 |
fig_geo = generate_followers_by_demographics_plot(
|
| 606 |
sample_follower_stats_df.copy(),
|
| 607 |
type_value='follower_geo', # category_col defaults to 'category_name'
|
| 608 |
plot_title="Followers by Location (Organic/Paid)"
|
| 609 |
)
|
| 610 |
-
if fig_geo: logging.info("Followers by Location (grouped organic/paid) plot generated.")
|
| 611 |
|
| 612 |
fig_role = generate_followers_by_demographics_plot(
|
| 613 |
sample_follower_stats_df.copy(),
|
| 614 |
type_value='follower_function',
|
| 615 |
plot_title="Followers by Role (Organic/Paid)"
|
| 616 |
)
|
| 617 |
-
if fig_role: logging.info("Followers by Role (grouped organic/paid) plot generated.")
|
| 618 |
|
| 619 |
fig_industry = generate_followers_by_demographics_plot(
|
| 620 |
sample_follower_stats_df.copy(),
|
| 621 |
type_value='follower_industry',
|
| 622 |
plot_title="Followers by Industry (Organic/Paid)"
|
| 623 |
)
|
| 624 |
-
if fig_industry: logging.info("Followers by Industry (grouped organic/paid) plot generated.")
|
| 625 |
|
| 626 |
fig_seniority = generate_followers_by_demographics_plot(
|
| 627 |
sample_follower_stats_df.copy(),
|
| 628 |
type_value='follower_seniority',
|
| 629 |
plot_title="Followers by Seniority (Organic/Paid)"
|
| 630 |
)
|
| 631 |
-
if fig_seniority: logging.info("Followers by Seniority (grouped organic/paid) plot generated.")
|
| 632 |
|
| 633 |
logging.info("--- Testing Other Plot Generations (No Changes to these) ---")
|
| 634 |
fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
|
| 635 |
-
if fig_posts_activity: logging.info("Posts activity plot generated.")
|
| 636 |
|
| 637 |
fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
|
| 638 |
-
if fig_engagement_type: logging.info("Engagement type plot generated.")
|
| 639 |
|
| 640 |
# Dummy mentions for testing
|
| 641 |
mentions_data = {
|
|
@@ -644,17 +876,37 @@ if __name__ == '__main__':
|
|
| 644 |
}
|
| 645 |
sample_mentions_df = pd.DataFrame(mentions_data)
|
| 646 |
fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
|
| 647 |
-
if fig_mentions_activity: logging.info("Mentions activity plot generated.")
|
| 648 |
fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
|
| 649 |
-
if fig_mention_sentiment: logging.info("Mention sentiment plot generated.")
|
| 650 |
|
| 651 |
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
|
| 652 |
-
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
|
| 653 |
|
| 654 |
fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
|
| 655 |
-
if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
|
| 656 |
|
| 657 |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
| 658 |
-
if fig_impressions: logging.info("Impressions Over Time plot generated.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
|
| 660 |
logging.info("Test script finished. Review plots if displayed locally or saved.")
|
|
|
|
| 184 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
| 185 |
|
| 186 |
fig, ax = plt.subplots(figsize=(8, 5))
|
| 187 |
+
# Using a more distinct color map if available, or fallback
|
| 188 |
+
try:
|
| 189 |
+
colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
|
| 190 |
+
except ValueError: # Fallback if Pastel1 doesn't have enough colors or isn't available
|
| 191 |
+
colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
|
| 192 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
| 193 |
+
|
| 194 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
| 195 |
ax.set_title('Mention Sentiment Distribution')
|
| 196 |
ax.axis('equal')
|
|
|
|
| 449 |
ax.set_xlabel('Date')
|
| 450 |
ax.set_ylabel('Engagement Rate')
|
| 451 |
# Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
|
| 452 |
+
max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0
|
| 453 |
+
formatter_xmax = 1.0 if max_rate_val <= 1.5 and max_rate_val !=0 else 100.0 # Heuristic: if max is small, assume 0-1 scale
|
| 454 |
if max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
|
| 455 |
+
formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
|
| 456 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
| 457 |
ax.grid(True, linestyle='--', alpha=0.7)
|
| 458 |
plt.xticks(rotation=45)
|
|
|
|
| 525 |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
| 526 |
|
| 527 |
if df_copy.empty: # After dropping NaNs for essential columns
|
| 528 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
|
| 529 |
|
| 530 |
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
| 531 |
|
|
|
|
| 544 |
finally:
|
| 545 |
plt.close('all')
|
| 546 |
|
| 547 |
+
# --- NEW PLOT FUNCTIONS ---
|
| 548 |
+
|
| 549 |
+
def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
|
| 550 |
+
"""Generates a plot for likes (reactions) over time."""
|
| 551 |
+
title = "Reactions (Likes) Over Time"
|
| 552 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
| 553 |
+
|
| 554 |
+
if df is None or df.empty:
|
| 555 |
+
return create_placeholder_plot(title=title, message="No post data for likes.")
|
| 556 |
+
|
| 557 |
+
required_cols = [date_column, likes_col]
|
| 558 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
| 559 |
+
if missing_cols:
|
| 560 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
| 561 |
+
|
| 562 |
+
try:
|
| 563 |
+
df_copy = df.copy()
|
| 564 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
| 565 |
+
df_copy[likes_col] = pd.to_numeric(df_copy[likes_col], errors='coerce')
|
| 566 |
+
df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
|
| 567 |
+
|
| 568 |
+
if df_copy.empty:
|
| 569 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for likes plot.")
|
| 570 |
+
|
| 571 |
+
likes_over_time = df_copy.resample('D')[likes_col].sum()
|
| 572 |
+
|
| 573 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 574 |
+
ax.plot(likes_over_time.index, likes_over_time.values, marker='.', linestyle='-', color='crimson')
|
| 575 |
+
ax.set_title(title)
|
| 576 |
+
ax.set_xlabel('Date')
|
| 577 |
+
ax.set_ylabel('Total Likes')
|
| 578 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
| 579 |
+
plt.xticks(rotation=45)
|
| 580 |
+
plt.tight_layout()
|
| 581 |
+
return fig
|
| 582 |
+
except Exception as e:
|
| 583 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
| 584 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
| 585 |
+
finally:
|
| 586 |
+
plt.close('all')
|
| 587 |
+
|
| 588 |
+
|
| 589 |
+
def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
|
| 590 |
+
"""Generates a plot for clicks over time (distinct from general reach if needed)."""
|
| 591 |
+
title = "Clicks Over Time"
|
| 592 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
| 593 |
+
|
| 594 |
+
if df is None or df.empty:
|
| 595 |
+
return create_placeholder_plot(title=title, message="No post data for clicks.")
|
| 596 |
+
|
| 597 |
+
required_cols = [date_column, clicks_col]
|
| 598 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
| 599 |
+
if missing_cols:
|
| 600 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
| 601 |
+
|
| 602 |
+
try:
|
| 603 |
+
df_copy = df.copy()
|
| 604 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
| 605 |
+
df_copy[clicks_col] = pd.to_numeric(df_copy[clicks_col], errors='coerce')
|
| 606 |
+
df_copy = df_copy.dropna(subset=[date_column, clicks_col]).set_index(date_column)
|
| 607 |
+
|
| 608 |
+
if df_copy.empty:
|
| 609 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for clicks plot.")
|
| 610 |
+
|
| 611 |
+
clicks_over_time = df_copy.resample('D')[clicks_col].sum()
|
| 612 |
+
|
| 613 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 614 |
+
ax.plot(clicks_over_time.index, clicks_over_time.values, marker='.', linestyle='-', color='teal')
|
| 615 |
+
ax.set_title(title)
|
| 616 |
+
ax.set_xlabel('Date')
|
| 617 |
+
ax.set_ylabel('Total Clicks')
|
| 618 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
| 619 |
+
plt.xticks(rotation=45)
|
| 620 |
+
plt.tight_layout()
|
| 621 |
+
return fig
|
| 622 |
+
except Exception as e:
|
| 623 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
| 624 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
| 625 |
+
finally:
|
| 626 |
+
plt.close('all')
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
|
| 630 |
+
"""Generates a plot for shares over time."""
|
| 631 |
+
title = "Shares Over Time"
|
| 632 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
| 633 |
+
|
| 634 |
+
if df is None or df.empty:
|
| 635 |
+
return create_placeholder_plot(title=title, message="No post data for shares.")
|
| 636 |
+
|
| 637 |
+
required_cols = [date_column, shares_col]
|
| 638 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
| 639 |
+
if missing_cols:
|
| 640 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
| 641 |
+
|
| 642 |
+
try:
|
| 643 |
+
df_copy = df.copy()
|
| 644 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
| 645 |
+
df_copy[shares_col] = pd.to_numeric(df_copy[shares_col], errors='coerce')
|
| 646 |
+
df_copy = df_copy.dropna(subset=[date_column, shares_col]).set_index(date_column)
|
| 647 |
+
|
| 648 |
+
if df_copy.empty:
|
| 649 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for shares plot.")
|
| 650 |
+
|
| 651 |
+
shares_over_time = df_copy.resample('D')[shares_col].sum()
|
| 652 |
+
|
| 653 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 654 |
+
ax.plot(shares_over_time.index, shares_over_time.values, marker='.', linestyle='-', color='goldenrod')
|
| 655 |
+
ax.set_title(title)
|
| 656 |
+
ax.set_xlabel('Date')
|
| 657 |
+
ax.set_ylabel('Total Shares')
|
| 658 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
| 659 |
+
plt.xticks(rotation=45)
|
| 660 |
+
plt.tight_layout()
|
| 661 |
+
return fig
|
| 662 |
+
except Exception as e:
|
| 663 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
| 664 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
| 665 |
+
finally:
|
| 666 |
+
plt.close('all')
|
| 667 |
+
|
| 668 |
+
|
| 669 |
+
def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
|
| 670 |
+
"""Generates a plot for comments over time."""
|
| 671 |
+
title = "Comments Over Time"
|
| 672 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
| 673 |
+
|
| 674 |
+
if df is None or df.empty:
|
| 675 |
+
return create_placeholder_plot(title=title, message="No post data for comments.")
|
| 676 |
+
|
| 677 |
+
required_cols = [date_column, comments_col]
|
| 678 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
| 679 |
+
if missing_cols:
|
| 680 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
| 681 |
+
|
| 682 |
+
try:
|
| 683 |
+
df_copy = df.copy()
|
| 684 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
| 685 |
+
df_copy[comments_col] = pd.to_numeric(df_copy[comments_col], errors='coerce')
|
| 686 |
+
df_copy = df_copy.dropna(subset=[date_column, comments_col]).set_index(date_column)
|
| 687 |
+
|
| 688 |
+
if df_copy.empty:
|
| 689 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for comments plot.")
|
| 690 |
+
|
| 691 |
+
comments_over_time = df_copy.resample('D')[comments_col].sum()
|
| 692 |
+
|
| 693 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 694 |
+
ax.plot(comments_over_time.index, comments_over_time.values, marker='.', linestyle='-', color='forestgreen')
|
| 695 |
+
ax.set_title(title)
|
| 696 |
+
ax.set_xlabel('Date')
|
| 697 |
+
ax.set_ylabel('Total Comments')
|
| 698 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
| 699 |
+
plt.xticks(rotation=45)
|
| 700 |
+
plt.tight_layout()
|
| 701 |
+
return fig
|
| 702 |
+
except Exception as e:
|
| 703 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
| 704 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
| 705 |
+
finally:
|
| 706 |
+
plt.close('all')
|
| 707 |
+
|
| 708 |
+
|
| 709 |
+
def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', text_column_for_check='comment_text'):
|
| 710 |
+
"""
|
| 711 |
+
Generates a pie chart for comment sentiment distribution.
|
| 712 |
+
Expects a DataFrame with a 'sentiment_column' for sentiment labels of individual comments.
|
| 713 |
+
'text_column_for_check' is used to infer if the DataFrame might contain comment-level data.
|
| 714 |
+
"""
|
| 715 |
+
title = "Breakdown of Comments by Sentiment"
|
| 716 |
+
logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
|
| 717 |
+
|
| 718 |
+
if df is None or df.empty:
|
| 719 |
+
return create_placeholder_plot(title=title, message="No data available for comment sentiment analysis.")
|
| 720 |
+
|
| 721 |
+
# Check if the expected sentiment column exists.
|
| 722 |
+
# Also check for a text column as a heuristic, as sentiment often comes with text.
|
| 723 |
+
if sentiment_column not in df.columns:
|
| 724 |
+
msg = (f"Column '{sentiment_column}' for comment sentiment not found. "
|
| 725 |
+
f"This plot requires a DataFrame with pre-analyzed comment sentiments. "
|
| 726 |
+
f"Available columns: {df.columns.tolist()}")
|
| 727 |
+
logging.warning(msg)
|
| 728 |
+
return create_placeholder_plot(title=title, message=msg)
|
| 729 |
+
|
| 730 |
+
# Optional: A light check if it seems like post-level data instead of comment-level
|
| 731 |
+
# if text_column_for_check not in df.columns and 'commentCount' in df.columns:
|
| 732 |
+
# logging.warning(f"'{sentiment_column}' found, but '{text_column_for_check}' is missing. Ensure '{sentiment_column}' refers to individual comment sentiments, not post sentiment.")
|
| 733 |
+
|
| 734 |
+
|
| 735 |
+
try:
|
| 736 |
+
df_copy = df.copy()
|
| 737 |
+
# Ensure sentiment column is treated as categorical/string
|
| 738 |
+
df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
|
| 739 |
+
sentiment_counts = df_copy[sentiment_column].value_counts()
|
| 740 |
+
|
| 741 |
+
if sentiment_counts.empty or sentiment_counts.sum() == 0:
|
| 742 |
+
return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
|
| 743 |
+
|
| 744 |
+
fig, ax = plt.subplots(figsize=(8, 5))
|
| 745 |
+
try:
|
| 746 |
+
colors_map = plt.cm.get_cmap('Pastel2', len(sentiment_counts))
|
| 747 |
+
except ValueError:
|
| 748 |
+
colors_map = plt.cm.get_cmap('Accent', len(sentiment_counts))
|
| 749 |
+
|
| 750 |
+
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
| 751 |
+
|
| 752 |
+
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140, colors=pie_colors)
|
| 753 |
+
ax.set_title(title)
|
| 754 |
+
ax.axis('equal')
|
| 755 |
+
plt.tight_layout()
|
| 756 |
+
logging.info(f"Successfully generated {title} plot.")
|
| 757 |
+
return fig
|
| 758 |
+
except Exception as e:
|
| 759 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
| 760 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
| 761 |
+
finally:
|
| 762 |
+
plt.close('all')
|
| 763 |
+
|
| 764 |
|
| 765 |
if __name__ == '__main__':
|
| 766 |
# Create dummy data for testing
|
| 767 |
posts_data = {
|
| 768 |
+
'id': [f'post{i}' for i in range(1, 8)], # Extended to 7 for more data points
|
| 769 |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
| 770 |
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
| 771 |
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
| 772 |
+
'shareCount': [1, 0, 1, 1, 2, 0, 1],
|
| 773 |
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
| 774 |
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
| 775 |
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08]
|
| 776 |
}
|
| 777 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
| 778 |
|
| 779 |
+
# Dummy data for comments with sentiment (hypothetical)
|
| 780 |
+
comments_sentiment_data = {
|
| 781 |
+
'comment_id': range(10),
|
| 782 |
+
'post_id': ['post1']*3 + ['post2']*2 + ['post3']*5,
|
| 783 |
+
'comment_text': ['Great post!', 'I disagree.', 'Nice work.', 'Interesting.', 'Could be better.', 'Loved it!', 'Not sure.', 'Thanks!', 'Helpful.', 'Okay.'],
|
| 784 |
+
'comment_sentiment': ['Positive', 'Negative', 'Positive', 'Neutral', 'Negative', 'Positive', 'Neutral', 'Positive', 'Positive', 'Neutral']
|
| 785 |
+
}
|
| 786 |
+
sample_comments_with_sentiment_df = pd.DataFrame(comments_sentiment_data)
|
| 787 |
+
|
| 788 |
+
|
| 789 |
# Updated Follower Stats Data
|
| 790 |
follower_data = {
|
| 791 |
'follower_count_type': [
|
|
|
|
| 798 |
# 'category_name' now holds dates for time-series, and actual categories for demographics
|
| 799 |
'category_name': [
|
| 800 |
'2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
|
| 801 |
+
'USA', 'Canada', 'UK', # Geo
|
| 802 |
'Engineering', 'Sales', # Function/Role
|
| 803 |
'Tech', 'Finance', # Industry
|
| 804 |
'Senior', 'Junior' # Seniority
|
| 805 |
],
|
| 806 |
'follower_count_organic': [
|
| 807 |
+
100, 110, 125, # Organic monthly gains
|
| 808 |
+
500, 300, 150, # Organic Geo counts
|
| 809 |
+
400, 200, # Organic Role counts
|
| 810 |
+
250, 180, # Organic Industry counts
|
| 811 |
+
300, 220 # Organic Seniority counts
|
| 812 |
],
|
| 813 |
'follower_count_paid': [
|
| 814 |
+
20, 30, 25, # Paid monthly gains
|
| 815 |
+
50, 40, 60, # Paid Geo counts
|
| 816 |
+
30, 20, # Paid Role counts
|
| 817 |
+
45, 35, # Paid Industry counts
|
| 818 |
+
60, 40 # Paid Seniority counts
|
| 819 |
]
|
| 820 |
}
|
| 821 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
|
|
|
| 826 |
sample_follower_stats_df.copy(),
|
| 827 |
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
| 828 |
)
|
| 829 |
+
if fig_followers_count and not isinstance(fig_followers_count, str): logging.info("Followers Count Over Time (monthly, organic/paid) plot generated.")
|
| 830 |
|
| 831 |
fig_followers_rate = generate_followers_growth_rate_plot(
|
| 832 |
sample_follower_stats_df.copy(),
|
| 833 |
type_value='follower_gains_monthly' # date_info_column defaults to 'category_name'
|
| 834 |
)
|
| 835 |
+
if fig_followers_rate and not isinstance(fig_followers_rate, str): logging.info("Followers Growth Rate (monthly, organic/paid) plot generated.")
|
| 836 |
|
| 837 |
fig_geo = generate_followers_by_demographics_plot(
|
| 838 |
sample_follower_stats_df.copy(),
|
| 839 |
type_value='follower_geo', # category_col defaults to 'category_name'
|
| 840 |
plot_title="Followers by Location (Organic/Paid)"
|
| 841 |
)
|
| 842 |
+
if fig_geo and not isinstance(fig_geo, str): logging.info("Followers by Location (grouped organic/paid) plot generated.")
|
| 843 |
|
| 844 |
fig_role = generate_followers_by_demographics_plot(
|
| 845 |
sample_follower_stats_df.copy(),
|
| 846 |
type_value='follower_function',
|
| 847 |
plot_title="Followers by Role (Organic/Paid)"
|
| 848 |
)
|
| 849 |
+
if fig_role and not isinstance(fig_role, str): logging.info("Followers by Role (grouped organic/paid) plot generated.")
|
| 850 |
|
| 851 |
fig_industry = generate_followers_by_demographics_plot(
|
| 852 |
sample_follower_stats_df.copy(),
|
| 853 |
type_value='follower_industry',
|
| 854 |
plot_title="Followers by Industry (Organic/Paid)"
|
| 855 |
)
|
| 856 |
+
if fig_industry and not isinstance(fig_industry, str): logging.info("Followers by Industry (grouped organic/paid) plot generated.")
|
| 857 |
|
| 858 |
fig_seniority = generate_followers_by_demographics_plot(
|
| 859 |
sample_follower_stats_df.copy(),
|
| 860 |
type_value='follower_seniority',
|
| 861 |
plot_title="Followers by Seniority (Organic/Paid)"
|
| 862 |
)
|
| 863 |
+
if fig_seniority and not isinstance(fig_seniority, str): logging.info("Followers by Seniority (grouped organic/paid) plot generated.")
|
| 864 |
|
| 865 |
logging.info("--- Testing Other Plot Generations (No Changes to these) ---")
|
| 866 |
fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
|
| 867 |
+
if fig_posts_activity and not isinstance(fig_posts_activity, str): logging.info("Posts activity plot generated.")
|
| 868 |
|
| 869 |
fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
|
| 870 |
+
if fig_engagement_type and not isinstance(fig_engagement_type, str): logging.info("Engagement type plot generated.")
|
| 871 |
|
| 872 |
# Dummy mentions for testing
|
| 873 |
mentions_data = {
|
|
|
|
| 876 |
}
|
| 877 |
sample_mentions_df = pd.DataFrame(mentions_data)
|
| 878 |
fig_mentions_activity = generate_mentions_activity_plot(sample_mentions_df.copy())
|
| 879 |
+
if fig_mentions_activity and not isinstance(fig_mentions_activity, str): logging.info("Mentions activity plot generated.")
|
| 880 |
fig_mention_sentiment = generate_mention_sentiment_plot(sample_mentions_df.copy())
|
| 881 |
+
if fig_mention_sentiment and not isinstance(fig_mention_sentiment, str): logging.info("Mention sentiment plot generated.")
|
| 882 |
|
| 883 |
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
|
| 884 |
+
if fig_eng_rate and not isinstance(fig_eng_rate, str): logging.info("Engagement Rate Over Time plot generated.")
|
| 885 |
|
| 886 |
fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
|
| 887 |
+
if fig_reach and not isinstance(fig_reach, str): logging.info("Reach Over Time (Clicks) plot generated.")
|
| 888 |
|
| 889 |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
| 890 |
+
if fig_impressions and not isinstance(fig_impressions, str): logging.info("Impressions Over Time plot generated.")
|
| 891 |
+
|
| 892 |
+
logging.info("--- Testing NEW Plot Generations ---")
|
| 893 |
+
fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
|
| 894 |
+
if fig_likes_time and not isinstance(fig_likes_time, str): logging.info("Likes Over Time plot generated.")
|
| 895 |
+
|
| 896 |
+
fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy())
|
| 897 |
+
if fig_clicks_time and not isinstance(fig_clicks_time, str): logging.info("Clicks Over Time plot generated.")
|
| 898 |
+
|
| 899 |
+
fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
|
| 900 |
+
if fig_shares_time and not isinstance(fig_shares_time, str): logging.info("Shares Over Time plot generated.")
|
| 901 |
+
|
| 902 |
+
fig_comments_time = generate_comments_over_time_plot(sample_merged_posts_df.copy())
|
| 903 |
+
if fig_comments_time and not isinstance(fig_comments_time, str): logging.info("Comments Over Time plot generated.")
|
| 904 |
+
|
| 905 |
+
fig_comments_sentiment = generate_comments_sentiment_breakdown_plot(sample_comments_with_sentiment_df.copy())
|
| 906 |
+
if fig_comments_sentiment and not isinstance(fig_comments_sentiment, str): logging.info("Comments Sentiment Breakdown plot generated (with dummy comment sentiment data).")
|
| 907 |
+
|
| 908 |
+
fig_comments_sentiment_no_data = generate_comments_sentiment_breakdown_plot(sample_merged_posts_df.copy()) # Test with df lacking the sentiment col
|
| 909 |
+
if fig_comments_sentiment_no_data and not isinstance(fig_comments_sentiment_no_data, str) : logging.info("Comments Sentiment Breakdown plot generated (placeholder, as expected).")
|
| 910 |
+
|
| 911 |
|
| 912 |
logging.info("Test script finished. Review plots if displayed locally or saved.")
|