Spaces:
Running
Running
Update analytics_plot_generator.py
Browse files- analytics_plot_generator.py +59 -29
analytics_plot_generator.py
CHANGED
@@ -243,8 +243,15 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
|
243 |
# Define a list of distinct colors for the pie slices
|
244 |
pie_slice_colors = plt.cm.get_cmap('Pastel2', len(sentiment_counts))
|
245 |
|
246 |
-
|
247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
|
249 |
# fig.tight_layout(pad=0.5) # tight_layout can sometimes mess with pie charts if labels are long
|
250 |
fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95) # Give pie chart space
|
@@ -294,7 +301,8 @@ def generate_followers_count_over_time_plot(df, date_info_column='category_name'
|
|
294 |
ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers', zorder=1)
|
295 |
ax.set_xlabel('Date')
|
296 |
ax.set_ylabel('Follower Count')
|
297 |
-
ax.legend(zorder
|
|
|
298 |
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
299 |
plt.xticks(rotation=45)
|
300 |
fig.tight_layout(pad=0.5)
|
@@ -359,7 +367,8 @@ def generate_followers_growth_rate_plot(df, date_info_column='category_name',
|
|
359 |
ax.set_xlabel('Date')
|
360 |
ax.set_ylabel('Growth Rate (%)')
|
361 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter())
|
362 |
-
ax.legend(zorder
|
|
|
363 |
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
364 |
plt.xticks(rotation=45)
|
365 |
fig.tight_layout(pad=0.5)
|
@@ -415,9 +424,6 @@ def generate_followers_by_demographics_plot(df, category_col='category_name',
|
|
415 |
bar_width = 0.35
|
416 |
index = np.arange(len(demographics_data.index))
|
417 |
|
418 |
-
# Using tab10 colormap for distinct colors for organic and paid
|
419 |
-
# If you wanted each CATEGORY (e.g., Italy, UK) to have different colors for its pair of bars,
|
420 |
-
# that would require a more complex color mapping. Current setup is distinct for Organic vs Paid.
|
421 |
color_organic = plt.cm.get_cmap('tab10')(0)
|
422 |
color_paid = plt.cm.get_cmap('tab10')(1)
|
423 |
|
@@ -428,7 +434,8 @@ def generate_followers_by_demographics_plot(df, category_col='category_name',
|
|
428 |
ax.set_ylabel('Number of Followers')
|
429 |
ax.set_xticks(index)
|
430 |
ax.set_xticklabels(demographics_data.index, rotation=45, ha="right")
|
431 |
-
ax.legend(zorder
|
|
|
432 |
ax.grid(axis='y', linestyle='--', alpha=0.6, zorder=0)
|
433 |
|
434 |
for bar_group in [bars1, bars2]:
|
@@ -694,11 +701,14 @@ def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sen
|
|
694 |
if df is None or df.empty:
|
695 |
return create_placeholder_plot(title=title, message="No data for comment sentiment.")
|
696 |
if sentiment_column not in df.columns:
|
697 |
-
if 'sentiment' in df.columns and sentiment_column != 'sentiment':
|
698 |
logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.")
|
699 |
-
sentiment_column = 'sentiment'
|
700 |
-
|
701 |
-
|
|
|
|
|
|
|
702 |
|
703 |
if df[sentiment_column].isnull().all():
|
704 |
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.")
|
@@ -716,8 +726,14 @@ def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sen
|
|
716 |
_apply_rounded_corners_and_transparent_bg(fig, ax)
|
717 |
|
718 |
pie_slice_colors = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
|
719 |
-
|
720 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
721 |
ax.axis('equal')
|
722 |
# fig.tight_layout(pad=0.5)
|
723 |
fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95)
|
@@ -907,9 +923,16 @@ if __name__ == '__main__':
|
|
907 |
"['AI', 'Development']",
|
908 |
['Tech']
|
909 |
],
|
|
|
|
|
|
|
910 |
'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive']
|
911 |
}
|
912 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
|
|
|
|
|
|
|
|
913 |
|
914 |
follower_data = {
|
915 |
'follower_count_type': [
|
@@ -942,37 +965,45 @@ if __name__ == '__main__':
|
|
942 |
]
|
943 |
}
|
944 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
945 |
|
946 |
# --- Test individual plots ---
|
|
|
947 |
plot_functions_to_test = {
|
948 |
-
"Posts Activity": (generate_posts_activity_plot, [sample_merged_posts_df.copy()]),
|
949 |
-
"Engagement Type": (generate_engagement_type_plot, [sample_merged_posts_df.copy()]),
|
950 |
-
"Mentions Activity": (generate_mentions_activity_plot, [sample_mentions_df.copy()]),
|
951 |
-
"Mention Sentiment": (generate_mention_sentiment_plot, [sample_mentions_df.copy()]),
|
952 |
"Followers Count": (generate_followers_count_over_time_plot, [sample_follower_stats_df.copy()], {'type_value':'follower_gains_monthly'}),
|
953 |
"Followers Growth": (generate_followers_growth_rate_plot, [sample_follower_stats_df.copy()], {'type_value':'follower_gains_monthly'}),
|
954 |
"Followers Geo": (generate_followers_by_demographics_plot, [sample_follower_stats_df.copy()], {'type_value':'follower_geo', 'plot_title':"Followers by Location"}),
|
955 |
-
"Engagement Rate": (generate_engagement_rate_over_time_plot, [sample_merged_posts_df.copy()]),
|
956 |
-
"Reach (Clicks)": (generate_reach_over_time_plot, [sample_merged_posts_df.copy()]),
|
957 |
-
"Impressions": (generate_impressions_over_time_plot, [sample_merged_posts_df.copy()]),
|
958 |
-
"Likes Over Time": (generate_likes_over_time_plot, [sample_merged_posts_df.copy()]),
|
959 |
-
"Shares Over Time": (generate_shares_over_time_plot, [sample_merged_posts_df.copy()]),
|
960 |
-
"Comments Over Time": (generate_comments_over_time_plot, [sample_merged_posts_df.copy()]),
|
961 |
"Comments Sentiment": (generate_comments_sentiment_breakdown_plot, [sample_merged_posts_df.copy()], {'sentiment_column':'comment_sentiment'}),
|
962 |
"Post Frequency Daily": (generate_post_frequency_plot, [sample_merged_posts_df.copy()], {'resample_period':'D'}),
|
963 |
"Post Frequency Weekly": (generate_post_frequency_plot, [sample_merged_posts_df.copy()], {'resample_period':'W'}),
|
964 |
-
"Content Format": (generate_content_format_breakdown_plot, [sample_merged_posts_df.copy()]),
|
965 |
"Content Topics": (generate_content_topic_breakdown_plot, [sample_merged_posts_df.copy()], {'top_n':5}),
|
966 |
}
|
967 |
|
968 |
# Create a directory to save plots if it doesn't exist
|
969 |
# import os
|
970 |
-
# output_dir = "
|
971 |
# os.makedirs(output_dir, exist_ok=True)
|
972 |
|
973 |
-
for name, (func, args,
|
974 |
logging.info(f"--- Testing: {name} ---")
|
975 |
-
fig = func(*args, **
|
976 |
if fig:
|
977 |
logging.info(f"{name} plot generated.")
|
978 |
# fig.savefig(os.path.join(output_dir, f"{name.lower().replace(' ', '_')}_test.png"))
|
@@ -981,4 +1012,3 @@ if __name__ == '__main__':
|
|
981 |
logging.warning(f"{name} plot generation failed or returned None.")
|
982 |
|
983 |
logging.info("Test script finished. Review plots if saved locally.")
|
984 |
-
|
|
|
243 |
# Define a list of distinct colors for the pie slices
|
244 |
pie_slice_colors = plt.cm.get_cmap('Pastel2', len(sentiment_counts))
|
245 |
|
246 |
+
# Removed zorder from ax.pie
|
247 |
+
wedges, texts, autotexts = ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90,
|
248 |
+
colors=[pie_slice_colors(i) for i in range(len(sentiment_counts))])
|
249 |
+
# Set zorder for pie elements if needed, though usually not necessary as they draw on top of the background patch
|
250 |
+
for wedge in wedges:
|
251 |
+
wedge.set_zorder(1)
|
252 |
+
for text_item in texts + autotexts:
|
253 |
+
text_item.set_zorder(2)
|
254 |
+
|
255 |
ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
|
256 |
# fig.tight_layout(pad=0.5) # tight_layout can sometimes mess with pie charts if labels are long
|
257 |
fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95) # Give pie chart space
|
|
|
301 |
ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers', zorder=1)
|
302 |
ax.set_xlabel('Date')
|
303 |
ax.set_ylabel('Follower Count')
|
304 |
+
legend = ax.legend() # Removed zorder from legend call
|
305 |
+
if legend: legend.set_zorder(2) # Set zorder on the legend object itself
|
306 |
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
307 |
plt.xticks(rotation=45)
|
308 |
fig.tight_layout(pad=0.5)
|
|
|
367 |
ax.set_xlabel('Date')
|
368 |
ax.set_ylabel('Growth Rate (%)')
|
369 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter())
|
370 |
+
legend = ax.legend() # Removed zorder from legend call
|
371 |
+
if legend: legend.set_zorder(2) # Set zorder on the legend object itself
|
372 |
ax.grid(True, linestyle='--', alpha=0.6, zorder=0)
|
373 |
plt.xticks(rotation=45)
|
374 |
fig.tight_layout(pad=0.5)
|
|
|
424 |
bar_width = 0.35
|
425 |
index = np.arange(len(demographics_data.index))
|
426 |
|
|
|
|
|
|
|
427 |
color_organic = plt.cm.get_cmap('tab10')(0)
|
428 |
color_paid = plt.cm.get_cmap('tab10')(1)
|
429 |
|
|
|
434 |
ax.set_ylabel('Number of Followers')
|
435 |
ax.set_xticks(index)
|
436 |
ax.set_xticklabels(demographics_data.index, rotation=45, ha="right")
|
437 |
+
legend = ax.legend() # Removed zorder from legend call
|
438 |
+
if legend: legend.set_zorder(2) # Set zorder on the legend object itself
|
439 |
ax.grid(axis='y', linestyle='--', alpha=0.6, zorder=0)
|
440 |
|
441 |
for bar_group in [bars1, bars2]:
|
|
|
701 |
if df is None or df.empty:
|
702 |
return create_placeholder_plot(title=title, message="No data for comment sentiment.")
|
703 |
if sentiment_column not in df.columns:
|
704 |
+
if 'sentiment' in df.columns and sentiment_column != 'sentiment': # Check for a common alternative name
|
705 |
logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.")
|
706 |
+
sentiment_column = 'sentiment'
|
707 |
+
if sentiment_column not in df.columns: # If fallback also not found
|
708 |
+
return create_placeholder_plot(title=title, message=f"Fallback sentiment column 'sentiment' also not found. Available: {df.columns.tolist()}")
|
709 |
+
else: # If original and 'sentiment' fallback are not found
|
710 |
+
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' not found. Available: {df.columns.tolist()}")
|
711 |
+
|
712 |
|
713 |
if df[sentiment_column].isnull().all():
|
714 |
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.")
|
|
|
726 |
_apply_rounded_corners_and_transparent_bg(fig, ax)
|
727 |
|
728 |
pie_slice_colors = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
|
729 |
+
# Removed zorder from ax.pie
|
730 |
+
wedges, texts, autotexts = ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90,
|
731 |
+
colors=[pie_slice_colors(i) for i in range(len(sentiment_counts))])
|
732 |
+
for wedge in wedges:
|
733 |
+
wedge.set_zorder(1)
|
734 |
+
for text_item in texts + autotexts:
|
735 |
+
text_item.set_zorder(2)
|
736 |
+
|
737 |
ax.axis('equal')
|
738 |
# fig.tight_layout(pad=0.5)
|
739 |
fig.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95)
|
|
|
923 |
"['AI', 'Development']",
|
924 |
['Tech']
|
925 |
],
|
926 |
+
# Corrected 'comment_sentiment' to match a potential fallback name 'sentiment' if the primary is missing.
|
927 |
+
# For testing, let's assume 'comment_sentiment' is the primary and 'sentiment' is a fallback.
|
928 |
+
# To test the fallback, you could rename 'comment_sentiment' to something else in one run.
|
929 |
'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive']
|
930 |
}
|
931 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
932 |
+
# Example: To test fallback for comment sentiment, you could do:
|
933 |
+
# sample_merged_posts_df_test_fallback = sample_merged_posts_df.rename(columns={'comment_sentiment': 'actual_comment_sentiment'})
|
934 |
+
# sample_merged_posts_df_test_fallback['sentiment'] = sample_merged_posts_df_test_fallback['actual_comment_sentiment']
|
935 |
+
|
936 |
|
937 |
follower_data = {
|
938 |
'follower_count_type': [
|
|
|
965 |
]
|
966 |
}
|
967 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
968 |
+
|
969 |
+
mentions_data = { # Added for mentions plot testing
|
970 |
+
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
|
971 |
+
'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
|
972 |
+
}
|
973 |
+
sample_mentions_df = pd.DataFrame(mentions_data)
|
974 |
+
|
975 |
|
976 |
# --- Test individual plots ---
|
977 |
+
# (Using a dictionary for easier management and selective testing)
|
978 |
plot_functions_to_test = {
|
979 |
+
"Posts Activity": (generate_posts_activity_plot, [sample_merged_posts_df.copy()], {}),
|
980 |
+
"Engagement Type": (generate_engagement_type_plot, [sample_merged_posts_df.copy()], {}),
|
981 |
+
"Mentions Activity": (generate_mentions_activity_plot, [sample_mentions_df.copy()], {}), # Added sample_mentions_df
|
982 |
+
"Mention Sentiment": (generate_mention_sentiment_plot, [sample_mentions_df.copy()], {}), # Added sample_mentions_df
|
983 |
"Followers Count": (generate_followers_count_over_time_plot, [sample_follower_stats_df.copy()], {'type_value':'follower_gains_monthly'}),
|
984 |
"Followers Growth": (generate_followers_growth_rate_plot, [sample_follower_stats_df.copy()], {'type_value':'follower_gains_monthly'}),
|
985 |
"Followers Geo": (generate_followers_by_demographics_plot, [sample_follower_stats_df.copy()], {'type_value':'follower_geo', 'plot_title':"Followers by Location"}),
|
986 |
+
"Engagement Rate": (generate_engagement_rate_over_time_plot, [sample_merged_posts_df.copy()], {}),
|
987 |
+
"Reach (Clicks)": (generate_reach_over_time_plot, [sample_merged_posts_df.copy()], {}),
|
988 |
+
"Impressions": (generate_impressions_over_time_plot, [sample_merged_posts_df.copy()], {}),
|
989 |
+
"Likes Over Time": (generate_likes_over_time_plot, [sample_merged_posts_df.copy()], {}),
|
990 |
+
"Shares Over Time": (generate_shares_over_time_plot, [sample_merged_posts_df.copy()], {}),
|
991 |
+
"Comments Over Time": (generate_comments_over_time_plot, [sample_merged_posts_df.copy()], {}),
|
992 |
"Comments Sentiment": (generate_comments_sentiment_breakdown_plot, [sample_merged_posts_df.copy()], {'sentiment_column':'comment_sentiment'}),
|
993 |
"Post Frequency Daily": (generate_post_frequency_plot, [sample_merged_posts_df.copy()], {'resample_period':'D'}),
|
994 |
"Post Frequency Weekly": (generate_post_frequency_plot, [sample_merged_posts_df.copy()], {'resample_period':'W'}),
|
995 |
+
"Content Format": (generate_content_format_breakdown_plot, [sample_merged_posts_df.copy()], {}),
|
996 |
"Content Topics": (generate_content_topic_breakdown_plot, [sample_merged_posts_df.copy()], {'top_n':5}),
|
997 |
}
|
998 |
|
999 |
# Create a directory to save plots if it doesn't exist
|
1000 |
# import os
|
1001 |
+
# output_dir = "test_plots_updated" # Changed dir name
|
1002 |
# os.makedirs(output_dir, exist_ok=True)
|
1003 |
|
1004 |
+
for name, (func, args, kwargs_dict) in plot_functions_to_test.items(): # Renamed kwargs to kwargs_dict
|
1005 |
logging.info(f"--- Testing: {name} ---")
|
1006 |
+
fig = func(*args, **kwargs_dict) # Use kwargs_dict
|
1007 |
if fig:
|
1008 |
logging.info(f"{name} plot generated.")
|
1009 |
# fig.savefig(os.path.join(output_dir, f"{name.lower().replace(' ', '_')}_test.png"))
|
|
|
1012 |
logging.warning(f"{name} plot generation failed or returned None.")
|
1013 |
|
1014 |
logging.info("Test script finished. Review plots if saved locally.")
|
|