GuglielmoTor commited on
Commit
8019346
·
verified ·
1 Parent(s): a11780d

Update analytics_plot_generators.py

Browse files
Files changed (1) hide show
  1. analytics_plot_generators.py +393 -103
analytics_plot_generators.py CHANGED
@@ -4,6 +4,7 @@ import logging
4
  from io import BytesIO
5
  import base64
6
  import numpy as np
 
7
 
8
  # Configure logging for this module
9
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
@@ -18,28 +19,21 @@ def create_placeholder_plot(title="No Data or Plot Error", message="Data might b
18
  return fig
19
  except Exception as e:
20
  logging.error(f"Error creating placeholder plot: {e}")
21
- fig, ax = plt.subplots()
22
- ax.text(0.5, 0.5, "Plot generation error", ha='center', va='center')
23
- ax.axis('off')
24
- return fig
25
- finally:
26
- # plt.close(fig) # Close the specific figure to free memory
27
- # More robustly, Gradio handles figure objects, explicit close might not always be needed here
28
- # but plt.close('all') in calling functions or after a block of plot generations is safer.
29
- pass
30
-
31
-
32
- def generate_posts_activity_plot(df, date_column='published_at'): # Default changed as per common use
33
- """
34
- Generates a plot for posts activity over time.
35
- Assumes df has a date_column (e.g., 'published_at') and groups by date to count posts.
36
- """
37
  logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
38
  if df is None or df.empty:
39
  logging.warning(f"Posts activity: DataFrame is empty.")
40
  return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.")
41
  if date_column not in df.columns:
42
- logging.warning(f"Posts activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.")
43
  return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
44
 
45
  try:
@@ -55,8 +49,8 @@ def generate_posts_activity_plot(df, date_column='published_at'): # Default chan
55
  posts_over_time = df_copy.set_index(date_column).resample('D').size()
56
 
57
  if posts_over_time.empty:
58
- logging.info("Posts activity: No posts after resampling by day.")
59
- return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
60
 
61
  fig, ax = plt.subplots(figsize=(10, 5))
62
  posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
@@ -74,11 +68,8 @@ def generate_posts_activity_plot(df, date_column='published_at'): # Default chan
74
  finally:
75
  plt.close('all')
76
 
77
- def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='comments_count', shares_col='shares_count'):
78
- """
79
- Generates a bar plot for total engagement types (likes, comments, shares).
80
- Input df is expected to be pre-filtered by date if necessary.
81
- """
82
  logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
83
 
84
  required_cols = [likes_col, comments_col, shares_col]
@@ -93,8 +84,8 @@ def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='com
93
  return create_placeholder_plot(title="Post Engagement Types", message=msg)
94
 
95
  try:
96
- df_copy = df.copy() # Work on a copy
97
- for col in required_cols: # Ensure numeric, fill NaNs with 0
98
  df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
99
 
100
  total_likes = df_copy[likes_col].sum()
@@ -131,19 +122,16 @@ def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='com
131
  finally:
132
  plt.close('all')
133
 
134
- def generate_mentions_activity_plot(df, date_column='date'): # Default changed as per common use
135
- """
136
- Generates a plot for mentions activity over time.
137
- Assumes df has a date_column (e.g., 'date') and groups by date to count mentions.
138
- """
139
  logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
140
  if df is None or df.empty:
141
  logging.warning(f"Mentions activity: DataFrame is empty.")
142
  return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.")
143
  if date_column not in df.columns:
144
- logging.warning(f"Mentions activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.")
145
  return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
146
-
147
  try:
148
  df_copy = df.copy()
149
  if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
@@ -177,10 +165,7 @@ def generate_mentions_activity_plot(df, date_column='date'): # Default changed a
177
  plt.close('all')
178
 
179
  def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
180
- """
181
- Generates a pie chart for mention sentiment distribution.
182
- Input df is expected to be pre-filtered by date if necessary.
183
- """
184
  logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
185
 
186
  if df is None or df.empty:
@@ -199,8 +184,12 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
199
  return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
200
 
201
  fig, ax = plt.subplots(figsize=(8, 5))
202
- colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'}
203
- pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index] # Default color for unknown sentiments
 
 
 
 
204
 
205
 
206
  ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
@@ -215,104 +204,405 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
215
  finally:
216
  plt.close('all')
217
 
218
- def generate_follower_growth_plot(df, date_column='date', count_column='total_followers'):
219
- """
220
- Generates a plot for follower growth over time.
221
- This function receives the *unfiltered* follower DataFrame.
222
- """
223
- logging.info(f"Generating follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. Input df rows: {len(df) if df is not None else 'None'}")
224
-
225
  if df is None or df.empty:
226
- logging.warning("Follower growth: DataFrame is empty.")
227
- return create_placeholder_plot(title="Follower Growth Over Time", message="No follower data available.")
228
  if date_column not in df.columns or count_column not in df.columns:
229
- missing = []
230
- if date_column not in df.columns: missing.append(date_column)
231
- if count_column not in df.columns: missing.append(count_column)
232
- msg = f"Follower growth: Columns missing: {missing}. Available: {df.columns.tolist()}"
233
- logging.warning(msg)
234
- return create_placeholder_plot(title="Follower Growth Over Time", message=msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  try:
237
  df_copy = df.copy()
238
- if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
239
- df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
- df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce')
242
- df_copy = df_copy.dropna(subset=[date_column, count_column])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  if df_copy.empty:
245
- logging.info("Follower growth: DataFrame empty after NaNs dropped from date/count columns.")
246
- return create_placeholder_plot(title="Follower Growth Over Time", message="No valid data for follower growth.")
247
 
248
- df_copy = df_copy.sort_values(by=date_column)
249
 
 
 
 
 
 
 
250
  fig, ax = plt.subplots(figsize=(10, 5))
251
- ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green')
252
- ax.set_title('Follower Growth Over Time')
253
  ax.set_xlabel('Date')
254
- ax.set_ylabel('Total Followers')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  ax.grid(True, linestyle='--', alpha=0.7)
256
  plt.xticks(rotation=45)
257
  plt.tight_layout()
258
- logging.info("Successfully generated follower growth plot.")
259
  return fig
260
  except Exception as e:
261
- logging.error(f"Error generating follower growth plot: {e}", exc_info=True)
262
- return create_placeholder_plot(title="Follower Growth Error", message=str(e))
263
  finally:
264
  plt.close('all')
265
 
 
266
  if __name__ == '__main__':
267
  # Create dummy data for testing
 
268
  posts_data = {
269
- 'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03']),
270
- 'likes_count': [10, 5, 12, 8, 15, 3],
271
- 'comments_count': [2, 1, 3, 1, 4, 0],
272
- 'shares_count': [1, 0, 1, 1, 2, 0]
273
- }
274
- sample_posts_df = pd.DataFrame(posts_data)
275
-
276
- mentions_data = {
277
- 'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
278
- 'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
279
  }
280
- sample_mentions_df = pd.DataFrame(mentions_data)
281
 
 
282
  follower_data = {
283
- 'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']),
284
- 'total_followers': [100, 105, 115, 120, 118] # Example data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  }
286
  sample_follower_stats_df = pd.DataFrame(follower_data)
 
 
287
 
288
- logging.info("--- Testing Plot Generations ---")
289
 
290
- fig1 = generate_posts_activity_plot(sample_posts_df.copy(), date_column='published_at')
291
- if fig1: logging.info("Posts activity plot generated.") # plt.show() for local test
292
 
293
- fig2 = generate_engagement_type_plot(sample_posts_df.copy())
294
- if fig2: logging.info("Engagement type plot generated.")
295
 
296
- fig3 = generate_mentions_activity_plot(sample_mentions_df.copy(), date_column='date')
297
- if fig3: logging.info("Mentions activity plot generated.")
298
 
299
- fig4 = generate_mention_sentiment_plot(sample_mentions_df.copy())
300
- if fig4: logging.info("Mention sentiment plot generated.")
301
-
302
- fig5 = generate_follower_growth_plot(sample_follower_stats_df.copy(), date_column='date', count_column='total_followers')
303
- if fig5: logging.info("Follower growth plot generated.")
304
-
305
- logging.info("--- Testing Placeholders ---")
306
- fig_placeholder = create_placeholder_plot()
307
- if fig_placeholder: logging.info("Placeholder plot generated.")
308
 
309
- empty_df = pd.DataFrame(columns=['published_at']) # Empty df with column
310
- fig_empty_posts = generate_posts_activity_plot(empty_df, date_column='published_at')
311
- if fig_empty_posts: logging.info("Empty posts activity plot (placeholder) generated.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
- df_no_col = pd.DataFrame({'some_other_date': pd.to_datetime(['2023-01-01'])})
314
- fig_no_col_posts = generate_posts_activity_plot(df_no_col, date_column='published_at')
315
- if fig_no_col_posts: logging.info("Posts activity with missing column (placeholder) generated.")
 
 
316
 
317
 
318
- logging.info("Test script finished.")
 
4
  from io import BytesIO
5
  import base64
6
  import numpy as np
7
+ import matplotlib.ticker as mticker
8
 
9
  # Configure logging for this module
10
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
 
19
  return fig
20
  except Exception as e:
21
  logging.error(f"Error creating placeholder plot: {e}")
22
+ # Fallback placeholder if the above fails
23
+ fig_err, ax_err = plt.subplots()
24
+ ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center')
25
+ ax_err.axis('off')
26
+ return fig_err
27
+ # No plt.close(fig) here as Gradio handles the figure object.
28
+
29
+ def generate_posts_activity_plot(df, date_column='published_at'):
30
+ """Generates a plot for posts activity over time."""
 
 
 
 
 
 
 
31
  logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
32
  if df is None or df.empty:
33
  logging.warning(f"Posts activity: DataFrame is empty.")
34
  return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.")
35
  if date_column not in df.columns:
36
+ logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
37
  return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
38
 
39
  try:
 
49
  posts_over_time = df_copy.set_index(date_column).resample('D').size()
50
 
51
  if posts_over_time.empty:
52
+ logging.info("Posts activity: No posts after resampling by day.")
53
+ return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
54
 
55
  fig, ax = plt.subplots(figsize=(10, 5))
56
  posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
 
68
  finally:
69
  plt.close('all')
70
 
71
+ def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='commentCount', shares_col='shareCount'): # Updated col names
72
+ """Generates a bar plot for total engagement types (likes, comments, shares)."""
 
 
 
73
  logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
74
 
75
  required_cols = [likes_col, comments_col, shares_col]
 
84
  return create_placeholder_plot(title="Post Engagement Types", message=msg)
85
 
86
  try:
87
+ df_copy = df.copy()
88
+ for col in required_cols:
89
  df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
90
 
91
  total_likes = df_copy[likes_col].sum()
 
122
  finally:
123
  plt.close('all')
124
 
125
+ def generate_mentions_activity_plot(df, date_column='date'):
126
+ """Generates a plot for mentions activity over time."""
 
 
 
127
  logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
128
  if df is None or df.empty:
129
  logging.warning(f"Mentions activity: DataFrame is empty.")
130
  return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.")
131
  if date_column not in df.columns:
132
+ logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
133
  return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
134
+
135
  try:
136
  df_copy = df.copy()
137
  if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
 
165
  plt.close('all')
166
 
167
  def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
168
+ """Generates a pie chart for mention sentiment distribution."""
 
 
 
169
  logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
170
 
171
  if df is None or df.empty:
 
184
  return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
185
 
186
  fig, ax = plt.subplots(figsize=(8, 5))
187
+ # Define a broader range of colors or a colormap for more sentiment types
188
+ colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
189
+ pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
190
+ # Or keep your specific colors if sentiment labels are fixed:
191
+ # colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'}
192
+ # pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index]
193
 
194
 
195
  ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
 
204
  finally:
205
  plt.close('all')
206
 
207
+ # --- Existing Follower Growth Plot (can be reused or adapted) ---
208
+ def generate_total_follower_growth_plot(df, date_column='date', count_column='total_followers'):
209
+ """ Generates a plot for TOTAL follower growth over time. """
210
+ # This is your existing function, ensure it's called with the correct data for overall growth.
211
+ # For 'Follower Count Over Time (follower_gains_monthly)', we'll make a new specific one if structure differs.
212
+ logging.info(f"Generating total follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. DF rows: {len(df) if df is not None else 'None'}")
 
213
  if df is None or df.empty:
214
+ return create_placeholder_plot(title="Total Follower Growth", message="No follower data.")
 
215
  if date_column not in df.columns or count_column not in df.columns:
216
+ return create_placeholder_plot(title="Total Follower Growth", message=f"Missing columns: {date_column} or {count_column}.")
217
+ try:
218
+ df_copy = df.copy()
219
+ df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
220
+ df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce')
221
+ df_copy = df_copy.dropna(subset=[date_column, count_column]).sort_values(by=date_column)
222
+ if df_copy.empty:
223
+ return create_placeholder_plot(title="Total Follower Growth", message="No valid data after cleaning.")
224
+
225
+ fig, ax = plt.subplots(figsize=(10,5))
226
+ ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green')
227
+ ax.set_title('Total Follower Growth Over Time')
228
+ ax.set_xlabel('Date')
229
+ ax.set_ylabel('Total Followers')
230
+ ax.grid(True, linestyle='--', alpha=0.7)
231
+ plt.xticks(rotation=45)
232
+ plt.tight_layout()
233
+ return fig
234
+ except Exception as e:
235
+ logging.error(f"Error in generate_total_follower_growth_plot: {e}", exc_info=True)
236
+ return create_placeholder_plot(title="Total Follower Growth Error", message=str(e))
237
+ finally:
238
+ plt.close('all')
239
+
240
+ # --- New Plot Functions ---
241
+
242
+ def generate_followers_count_over_time_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'):
243
+ """Generates a plot for specific follower counts over time (e.g., monthly gains)."""
244
+ title = f"Followers Count Over Time ({type_value})"
245
+ logging.info(f"Generating {title}. Date: '{date_column}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
246
+
247
+ if df is None or df.empty:
248
+ return create_placeholder_plot(title=title, message="No follower data available.")
249
+
250
+ required_cols = [date_column, count_column, type_filter_column]
251
+ missing_cols = [col for col in required_cols if col not in df.columns]
252
+ if missing_cols:
253
+ return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.")
254
 
255
  try:
256
  df_copy = df.copy()
257
+ df_filtered = df_copy[df_copy[type_filter_column] == type_value]
258
+
259
+ if df_filtered.empty:
260
+ return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
261
+
262
+ df_filtered[date_column] = pd.to_datetime(df_filtered[date_column], errors='coerce')
263
+ df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce')
264
+ df_filtered = df_filtered.dropna(subset=[date_column, count_column]).sort_values(by=date_column)
265
+
266
+ if df_filtered.empty:
267
+ return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.")
268
+
269
+ fig, ax = plt.subplots(figsize=(10, 5))
270
+ ax.plot(df_filtered[date_column], df_filtered[count_column], marker='o', linestyle='-', color='dodgerblue')
271
+ ax.set_title(title)
272
+ ax.set_xlabel('Date')
273
+ ax.set_ylabel('Follower Count')
274
+ ax.grid(True, linestyle='--', alpha=0.7)
275
+ plt.xticks(rotation=45)
276
+ plt.tight_layout()
277
+ return fig
278
+ except Exception as e:
279
+ logging.error(f"Error generating {title}: {e}", exc_info=True)
280
+ return create_placeholder_plot(title=f"{title} Error", message=str(e))
281
+ finally:
282
+ plt.close('all')
283
+
284
+ def generate_followers_growth_rate_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'):
285
+ """Calculates and plots follower growth rate over time."""
286
+ title = f"Follower Growth Rate ({type_value})"
287
+ logging.info(f"Generating {title}. Date: '{date_column}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
288
+
289
+ if df is None or df.empty:
290
+ return create_placeholder_plot(title=title, message="No follower data available.")
291
+
292
+ required_cols = [date_column, count_column, type_filter_column]
293
+ missing_cols = [col for col in required_cols if col not in df.columns]
294
+ if missing_cols:
295
+ return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.")
296
 
297
+ try:
298
+ df_copy = df.copy()
299
+ df_filtered = df_copy[df_copy[type_filter_column] == type_value]
300
+
301
+ if df_filtered.empty:
302
+ return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
303
+
304
+ df_filtered[date_column] = pd.to_datetime(df_filtered[date_column], errors='coerce')
305
+ df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce')
306
+ df_filtered = df_filtered.dropna(subset=[date_column, count_column]).sort_values(by=date_column).set_index(date_column)
307
+
308
+ if df_filtered.empty or len(df_filtered) < 2:
309
+ return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
310
+
311
+ # Calculate growth rate: (current - previous) / previous * 100
312
+ # Ensure previous is not zero to avoid division by zero
313
+ df_filtered['growth_rate'] = df_filtered[count_column].pct_change() * 100
314
+ # Replace inf with NaN (e.g. if previous was 0 and current is non-zero) then drop NaNs
315
+ df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
316
+ df_filtered.dropna(subset=['growth_rate'], inplace=True)
317
+
318
+
319
+ if df_filtered.empty:
320
+ return create_placeholder_plot(title=title, message="No valid growth rate data after calculation.")
321
+
322
+ fig, ax = plt.subplots(figsize=(10, 5))
323
+ ax.plot(df_filtered.index, df_filtered['growth_rate'], marker='o', linestyle='-', color='lightcoral')
324
+ ax.set_title(title)
325
+ ax.set_xlabel('Date')
326
+ ax.set_ylabel('Growth Rate (%)')
327
+ ax.yaxis.set_major_formatter(mticker.PercentFormatter())
328
+ ax.grid(True, linestyle='--', alpha=0.7)
329
+ plt.xticks(rotation=45)
330
+ plt.tight_layout()
331
+ return fig
332
+ except Exception as e:
333
+ logging.error(f"Error generating {title}: {e}", exc_info=True)
334
+ return create_placeholder_plot(title=f"{title} Error", message=str(e))
335
+ finally:
336
+ plt.close('all')
337
+
338
+ def generate_followers_by_demographics_plot(df, category_col='category_name', count_column='follower_count_o', type_filter_column='follower_count_type', type_value=None, plot_title="Followers by Demographics"):
339
+ """Generates a bar chart for follower demographics (e.g., by location, industry)."""
340
+ logging.info(f"Generating {plot_title}. Category: '{category_col}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
341
+
342
+ if df is None or df.empty:
343
+ return create_placeholder_plot(title=plot_title, message="No follower data available.")
344
+
345
+ required_cols = [category_col, count_column, type_filter_column]
346
+ missing_cols = [col for col in required_cols if col not in df.columns]
347
+ if missing_cols:
348
+ return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}.")
349
+
350
+ if type_value is None: # Should be specified
351
+ return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
352
+
353
+ try:
354
+ df_copy = df.copy()
355
+ df_filtered = df_copy[df_copy[type_filter_column] == type_value]
356
+
357
+ if df_filtered.empty:
358
+ return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.")
359
+
360
+ df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce').fillna(0)
361
+
362
+ # Group by the category column and sum the count column
363
+ demographics_data = df_filtered.groupby(category_col)[count_column].sum().sort_values(ascending=False)
364
+
365
+ if demographics_data.empty:
366
+ return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
367
+
368
+ # Limit to top N for readability if too many categories
369
+ top_n = 10
370
+ if len(demographics_data) > top_n:
371
+ demographics_data = demographics_data.head(top_n)
372
+ plot_title += f" (Top {top_n})"
373
+
374
+
375
+ fig, ax = plt.subplots(figsize=(10, 6) if len(demographics_data) > 5 else (8,5) )
376
+ demographics_data.plot(kind='bar', ax=ax, color='teal')
377
+ ax.set_title(plot_title)
378
+ ax.set_xlabel(category_col.replace('_', ' ').title())
379
+ ax.set_ylabel('Number of Followers')
380
+ ax.grid(axis='y', linestyle='--', alpha=0.7)
381
+ plt.xticks(rotation=45, ha="right")
382
+ plt.tight_layout()
383
+ return fig
384
+ except Exception as e:
385
+ logging.error(f"Error generating {plot_title}: {e}", exc_info=True)
386
+ return create_placeholder_plot(title=f"{plot_title} Error", message=str(e))
387
+ finally:
388
+ plt.close('all')
389
+
390
+
391
+ def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'):
392
+ """Generates a plot for engagement rate over time."""
393
+ title = "Engagement Rate Over Time"
394
+ logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}")
395
+
396
+ if df is None or df.empty:
397
+ return create_placeholder_plot(title=title, message="No post data for engagement rate.")
398
+
399
+ required_cols = [date_column, engagement_rate_col]
400
+ missing_cols = [col for col in required_cols if col not in df.columns]
401
+ if missing_cols:
402
+ return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
403
+
404
+ try:
405
+ df_copy = df.copy()
406
+ df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
407
+ # Assuming 'engagement' is already a rate (e.g., 0.05 for 5%). If it's an absolute count, this logic needs change.
408
+ df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce')
409
+ df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column)
410
+
411
+ if df_copy.empty:
412
+ return create_placeholder_plot(title=title, message="No valid data after cleaning.")
413
+
414
+ # Resample daily and calculate mean engagement rate
415
+ engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
416
+ engagement_over_time = engagement_over_time.dropna() # Remove days with no data after resampling
417
+
418
+ if engagement_over_time.empty:
419
+ return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
420
+
421
+ fig, ax = plt.subplots(figsize=(10, 5))
422
+ ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange')
423
+ ax.set_title(title)
424
+ ax.set_xlabel('Date')
425
+ ax.set_ylabel('Engagement Rate')
426
+ ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0 if engagement_over_time.max() <=1 else 100.0)) # Adjust based on rate scale
427
+ ax.grid(True, linestyle='--', alpha=0.7)
428
+ plt.xticks(rotation=45)
429
+ plt.tight_layout()
430
+ return fig
431
+ except Exception as e:
432
+ logging.error(f"Error generating {title}: {e}", exc_info=True)
433
+ return create_placeholder_plot(title=f"{title} Error", message=str(e))
434
+ finally:
435
+ plt.close('all')
436
+
437
+ def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'): # Using clickCount as proxy for Reach
438
+ """Generates a plot for reach (clicks) over time."""
439
+ title = "Reach Over Time (Clicks)"
440
+ logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
441
+
442
+ if df is None or df.empty:
443
+ return create_placeholder_plot(title=title, message="No post data for reach.")
444
+
445
+ required_cols = [date_column, reach_col]
446
+ missing_cols = [col for col in required_cols if col not in df.columns]
447
+ if missing_cols:
448
+ return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
449
+
450
+ try:
451
+ df_copy = df.copy()
452
+ df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
453
+ df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
454
+ df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
455
 
456
  if df_copy.empty:
457
+ return create_placeholder_plot(title=title, message="No valid data after cleaning.")
 
458
 
459
+ reach_over_time = df_copy.resample('D')[reach_col].sum()
460
 
461
+ if reach_over_time.empty and not df_copy.empty : # if original had data but resampling resulted in empty (e.g. all NaNs for sum)
462
+ pass # allow plot of zeros if that's the case
463
+ elif reach_over_time.sum() == 0 and not df_copy.empty : # if all values are zero
464
+ pass
465
+
466
+
467
  fig, ax = plt.subplots(figsize=(10, 5))
468
+ ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
469
+ ax.set_title(title)
470
  ax.set_xlabel('Date')
471
+ ax.set_ylabel('Total Clicks')
472
+ ax.grid(True, linestyle='--', alpha=0.7)
473
+ plt.xticks(rotation=45)
474
+ plt.tight_layout()
475
+ return fig
476
+ except Exception as e:
477
+ logging.error(f"Error generating {title}: {e}", exc_info=True)
478
+ return create_placeholder_plot(title=f"{title} Error", message=str(e))
479
+ finally:
480
+ plt.close('all')
481
+
482
+ def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'):
483
+ """Generates a plot for impressions over time."""
484
+ title = "Impressions Over Time"
485
+ logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}")
486
+
487
+ if df is None or df.empty:
488
+ return create_placeholder_plot(title=title, message="No post data for impressions.")
489
+
490
+ required_cols = [date_column, impressions_col]
491
+ missing_cols = [col for col in required_cols if col not in df.columns]
492
+ if missing_cols:
493
+ return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
494
+
495
+ try:
496
+ df_copy = df.copy()
497
+ df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
498
+ df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
499
+ df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
500
+
501
+ if df_copy.empty:
502
+ return create_placeholder_plot(title=title, message="No valid data after cleaning.")
503
+
504
+ impressions_over_time = df_copy.resample('D')[impressions_col].sum()
505
+
506
+ fig, ax = plt.subplots(figsize=(10, 5))
507
+ ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue')
508
+ ax.set_title(title)
509
+ ax.set_xlabel('Date')
510
+ ax.set_ylabel('Total Impressions')
511
  ax.grid(True, linestyle='--', alpha=0.7)
512
  plt.xticks(rotation=45)
513
  plt.tight_layout()
 
514
  return fig
515
  except Exception as e:
516
+ logging.error(f"Error generating {title}: {e}", exc_info=True)
517
+ return create_placeholder_plot(title=f"{title} Error", message=str(e))
518
  finally:
519
  plt.close('all')
520
 
521
+
522
  if __name__ == '__main__':
523
  # Create dummy data for testing
524
+ # Posts Data (merged with stats)
525
  posts_data = {
526
+ 'id': [f'post{i}' for i in range(1, 7)],
527
+ 'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
528
+ 'likeCount': [10, 5, 12, 8, 15, 3, 20],
529
+ 'commentCount': [2, 1, 3, 1, 4, 0, 5],
530
+ 'shareCount': [1, 0, 1, 1, 2, 0, 1],
531
+ 'clickCount': [20, 15, 30, 22, 40, 10, 50],
532
+ 'impressionCount': [200, 150, 300, 220, 400, 100, 500],
533
+ 'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08] # Engagement Rate
 
 
534
  }
535
+ sample_merged_posts_df = pd.DataFrame(posts_data)
536
 
537
+ # Follower Stats Data
538
  follower_data = {
539
+ 'date': pd.to_datetime(['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15', '2023-03-01', # For time series
540
+ '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', # For demographics (snapshot)
541
+ '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01',
542
+ '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01'
543
+ ]),
544
+ 'follower_count_type': ['follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
545
+ 'follower_geo', 'follower_geo', 'follower_geo', # Location
546
+ 'follower_function', 'follower_function', 'follower_function', # Role
547
+ 'follower_industry', 'follower_industry', 'follower_industry', # Industry
548
+ 'follower_seniority', 'follower_seniority', 'follower_seniority', # Seniority
549
+ 'total_followers_snapshot', 'total_followers_snapshot', 'total_followers_snapshot' # For existing total growth
550
+ ],
551
+ 'category_name': ['Jan', 'Jan-Mid', 'Feb', 'Feb-Mid', 'Mar', # Corresponds to follower_gains_monthly
552
+ 'USA', 'Canada', 'UK', # Geo
553
+ 'Engineering', 'Sales', 'Marketing', # Function/Role
554
+ 'Tech', 'Finance', 'Healthcare', # Industry
555
+ 'Senior', 'Junior', 'Manager', # Seniority
556
+ 'Overall1', 'Overall2', 'Overall3' # For total_followers_snapshot
557
+ ],
558
+ 'follower_count_o': [100, 105, 115, 120, 130, # Counts for monthly gains
559
+ 500, 300, 200, # Geo counts
560
+ 400, 350, 250, # Role counts
561
+ 600, 200, 200, # Industry counts
562
+ 300, 400, 300, # Seniority counts
563
+ 1000, 1010, 1025 # For total_followers_snapshot
564
+ ],
565
+ 'total_followers': [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,100,115,130] # For existing total growth plot
566
  }
567
  sample_follower_stats_df = pd.DataFrame(follower_data)
568
+ # Ensure 'total_followers' for generate_total_follower_growth_plot is correctly populated for its specific rows
569
+ sample_follower_stats_df.loc[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot', 'total_followers'] = sample_follower_stats_df['follower_count_o']
570
 
 
571
 
572
+ logging.info("--- Testing New Plot Generations ---")
 
573
 
574
+ fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly')
575
+ if fig_followers_count: logging.info("Followers Count Over Time (monthly) plot generated.")
576
 
577
+ fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly')
578
+ if fig_followers_rate: logging.info("Followers Growth Rate (monthly) plot generated.")
579
 
580
+ fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
581
+ if fig_geo: logging.info("Followers by Location plot generated.")
 
 
 
 
 
 
 
582
 
583
+ fig_role = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_function', plot_title="Followers by Role")
584
+ if fig_role: logging.info("Followers by Role plot generated.")
585
+
586
+ fig_industry = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_industry', plot_title="Followers by Industry")
587
+ if fig_industry: logging.info("Followers by Industry plot generated.")
588
+
589
+ fig_seniority = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_seniority', plot_title="Followers by Seniority")
590
+ if fig_seniority: logging.info("Followers by Seniority plot generated.")
591
+
592
+ fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
593
+ if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
594
+
595
+ fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
596
+ if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
597
+
598
+ fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
599
+ if fig_impressions: logging.info("Impressions Over Time plot generated.")
600
 
601
+ # Test existing total follower growth plot with appropriate data
602
+ total_followers_df = sample_follower_stats_df[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot'].copy()
603
+ total_followers_df['date'] = pd.to_datetime(total_followers_df['date']) # Ensure date is datetime
604
+ fig_total_growth = generate_total_follower_growth_plot(total_followers_df, date_column='date', count_column='total_followers')
605
+ if fig_total_growth: logging.info("Total Follower Growth plot (existing function) generated.")
606
 
607
 
608
+ logging.info("Test script finished. Review plots if displayed locally or saved.")