GuglielmoTor commited on
Commit
7f147c5
·
verified ·
1 Parent(s): 6e6119d

Update analytics_plot_generator.py

Browse files
Files changed (1) hide show
  1. analytics_plot_generator.py +218 -226
analytics_plot_generator.py CHANGED
@@ -13,10 +13,12 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
13
  def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
14
  """Creates a placeholder Matplotlib plot indicating no data or an error."""
15
  try:
16
- fig, ax = plt.subplots(figsize=(8, 4))
17
  ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True)
18
- ax.axis('off')
19
  plt.tight_layout()
 
 
20
  return fig
21
  except Exception as e:
22
  logging.error(f"Error creating placeholder plot: {e}")
@@ -24,6 +26,7 @@ def create_placeholder_plot(title="No Data or Plot Error", message="Data might b
24
  fig_err, ax_err = plt.subplots()
25
  ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center')
26
  ax_err.axis('off')
 
27
  return fig_err
28
  # No plt.close(fig) here as Gradio handles the figure object.
29
 
@@ -37,56 +40,64 @@ def generate_posts_activity_plot(df, date_column='published_at'):
37
  logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
38
  return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
39
 
 
40
  try:
41
- df_copy = df.copy()
42
  if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
43
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
44
-
45
  df_copy = df_copy.dropna(subset=[date_column])
46
  if df_copy.empty:
47
  logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.")
48
  return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")
49
 
50
- posts_over_time = df_copy.set_index(date_column).resample('D').size()
51
-
52
  if posts_over_time.empty:
53
  logging.info("Posts activity: No posts after resampling by day.")
54
  return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
55
 
56
  fig, ax = plt.subplots(figsize=(10, 5))
57
  posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
58
- ax.set_title('Posts Activity Over Time')
59
  ax.set_xlabel('Date')
60
  ax.set_ylabel('Number of Posts')
61
  ax.grid(True, linestyle='--', alpha=0.7)
62
  plt.xticks(rotation=45)
63
- plt.tight_layout()
 
64
  logging.info("Successfully generated posts activity plot.")
65
  return fig
66
  except Exception as e:
67
  logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
 
68
  return create_placeholder_plot(title="Posts Activity Error", message=str(e))
69
  finally:
70
- plt.close('all')
 
 
 
71
 
72
- def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='commentCount', shares_col='shareCount'): # Updated col names
 
73
  """Generates a bar plot for total engagement types (likes, comments, shares)."""
74
  logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
75
-
76
  required_cols = [likes_col, comments_col, shares_col]
77
  if df is None or df.empty:
78
  logging.warning("Engagement type: DataFrame is empty.")
79
  return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.")
80
-
81
  missing_cols = [col for col in required_cols if col not in df.columns]
82
  if missing_cols:
83
  msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}"
84
  logging.warning(msg)
85
  return create_placeholder_plot(title="Post Engagement Types", message=msg)
86
 
 
87
  try:
88
- df_copy = df.copy()
89
- for col in required_cols:
90
  df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
91
 
92
  total_likes = df_copy[likes_col].sum()
@@ -102,26 +113,28 @@ def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='comme
102
  'Comments': total_comments,
103
  'Shares': total_shares
104
  }
105
-
106
  fig, ax = plt.subplots(figsize=(8, 5))
107
  bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon'])
108
- ax.set_title('Total Post Engagement Types')
109
  ax.set_xlabel('Engagement Type')
110
  ax.set_ylabel('Total Count')
111
  ax.grid(axis='y', linestyle='--', alpha=0.7)
112
-
113
  for bar in bars:
114
  yval = bar.get_height()
115
  ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom')
116
-
117
- plt.tight_layout()
 
118
  logging.info("Successfully generated engagement type plot.")
119
  return fig
120
  except Exception as e:
121
  logging.error(f"Error generating engagement type plot: {e}", exc_info=True)
 
122
  return create_placeholder_plot(title="Engagement Type Error", message=str(e))
123
  finally:
124
- plt.close('all')
125
 
126
  def generate_mentions_activity_plot(df, date_column='date'):
127
  """Generates a plot for mentions activity over time."""
@@ -132,43 +145,46 @@ def generate_mentions_activity_plot(df, date_column='date'):
132
  if date_column not in df.columns:
133
  logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
134
  return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
135
-
 
136
  try:
137
  df_copy = df.copy()
138
  if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
139
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
140
-
141
  df_copy = df_copy.dropna(subset=[date_column])
142
  if df_copy.empty:
143
  logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.")
144
  return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")
145
 
146
  mentions_over_time = df_copy.set_index(date_column).resample('D').size()
147
-
148
  if mentions_over_time.empty:
149
  logging.info("Mentions activity: No mentions after resampling by day.")
150
  return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")
151
 
152
  fig, ax = plt.subplots(figsize=(10, 5))
153
  mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple')
154
- ax.set_title('Mentions Activity Over Time')
155
  ax.set_xlabel('Date')
156
  ax.set_ylabel('Number of Mentions')
157
  ax.grid(True, linestyle='--', alpha=0.7)
158
  plt.xticks(rotation=45)
159
- plt.tight_layout()
 
160
  logging.info("Successfully generated mentions activity plot.")
161
  return fig
162
  except Exception as e:
163
  logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
 
164
  return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
165
  finally:
166
- plt.close('all')
167
 
168
- def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
169
  """Generates a pie chart for mention sentiment distribution."""
170
  logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
171
-
172
  if df is None or df.empty:
173
  logging.warning("Mention sentiment: DataFrame is empty.")
174
  return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.")
@@ -177,6 +193,7 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
177
  logging.warning(msg)
178
  return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg)
179
 
 
180
  try:
181
  df_copy = df.copy()
182
  sentiment_counts = df_copy[sentiment_column].value_counts()
@@ -185,56 +202,51 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
185
  return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
186
 
187
  fig, ax = plt.subplots(figsize=(8, 5))
188
- # Using a qualitative colormap like 'Pastel1' or 'Set3' can be good for categorical data
189
- colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
190
  pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
191
  ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
192
- ax.set_title('Mention Sentiment Distribution')
193
- ax.axis('equal')
194
- plt.tight_layout()
 
195
  logging.info("Successfully generated mention sentiment plot.")
196
  return fig
197
  except Exception as e:
198
  logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
 
199
  return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
200
  finally:
201
- plt.close('all')
202
 
203
  # --- Updated Follower Plot Functions ---
204
 
205
- def generate_followers_count_over_time_plot(df, date_info_column='category_name',
206
- organic_count_col='follower_count_organic',
207
  paid_count_col='follower_count_paid',
208
- type_filter_column='follower_count_type',
209
  type_value='follower_gains_monthly'):
210
- """
211
- Generates a plot for specific follower counts (organic and paid) over time.
212
- Date information is expected in 'date_info_column' as strings (e.g., "2024-08-01").
213
- """
214
  title = f"Followers Count Over Time ({type_value})"
215
  logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
216
 
217
  if df is None or df.empty:
218
  return create_placeholder_plot(title=title, message="No follower data available.")
219
-
220
  required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column]
221
  missing_cols = [col for col in required_cols if col not in df.columns]
222
  if missing_cols:
223
  return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
224
 
 
225
  try:
226
  df_copy = df.copy()
227
- df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy() # Use .copy() to avoid SettingWithCopyWarning
228
 
229
  if df_filtered.empty:
230
  return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
231
 
232
- # Convert date_info_column to datetime
233
  df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
234
-
235
  df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
236
  df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
237
-
238
  df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj')
239
 
240
  if df_filtered.empty:
@@ -243,30 +255,27 @@ def generate_followers_count_over_time_plot(df, date_info_column='category_name'
243
  fig, ax = plt.subplots(figsize=(10, 5))
244
  ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers')
245
  ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers')
246
-
247
- ax.set_title(title)
248
  ax.set_xlabel('Date')
249
  ax.set_ylabel('Follower Count')
250
  ax.legend()
251
  ax.grid(True, linestyle='--', alpha=0.7)
252
  plt.xticks(rotation=45)
253
- plt.tight_layout()
 
254
  return fig
255
  except Exception as e:
256
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
257
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
258
  finally:
259
- plt.close('all')
260
 
261
- def generate_followers_growth_rate_plot(df, date_info_column='category_name',
262
- organic_count_col='follower_count_organic',
263
  paid_count_col='follower_count_paid',
264
- type_filter_column='follower_count_type',
265
  type_value='follower_gains_monthly'):
266
- """
267
- Calculates and plots follower growth rate (organic and paid) over time.
268
- Date information is expected in 'date_info_column' as strings (e.g., "2024-08-01").
269
- """
270
  title = f"Follower Growth Rate ({type_value})"
271
  logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
272
 
@@ -277,7 +286,8 @@ def generate_followers_growth_rate_plot(df, date_info_column='category_name',
277
  missing_cols = [col for col in required_cols if col not in df.columns]
278
  if missing_cols:
279
  return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
280
-
 
281
  try:
282
  df_copy = df.copy()
283
  df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
@@ -288,26 +298,20 @@ def generate_followers_growth_rate_plot(df, date_info_column='category_name',
288
  df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
289
  df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce')
290
  df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce')
291
-
292
  df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj')
293
 
294
- if df_filtered.empty or len(df_filtered) < 2: # Need at least 2 points for pct_change
295
  return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
296
 
297
  df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100
298
  df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100
299
-
300
- # Replace inf with NaN then drop NaNs for growth rates
301
  df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
302
- # df_filtered.dropna(subset=['organic_growth_rate', 'paid_growth_rate'], how='all', inplace=True) # Keep row if at least one rate is valid
303
 
304
  fig, ax = plt.subplots(figsize=(10, 5))
305
-
306
  plotted_organic = False
307
  if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty:
308
  ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate')
309
  plotted_organic = True
310
-
311
  plotted_paid = False
312
  if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty:
313
  ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate')
@@ -316,43 +320,42 @@ def generate_followers_growth_rate_plot(df, date_info_column='category_name',
316
  if not plotted_organic and not plotted_paid:
317
  return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.")
318
 
319
- ax.set_title(title)
320
  ax.set_xlabel('Date')
321
  ax.set_ylabel('Growth Rate (%)')
322
  ax.yaxis.set_major_formatter(mticker.PercentFormatter())
323
  ax.legend()
324
  ax.grid(True, linestyle='--', alpha=0.7)
325
  plt.xticks(rotation=45)
326
- plt.tight_layout()
 
327
  return fig
328
  except Exception as e:
329
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
330
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
331
  finally:
332
- plt.close('all')
333
 
334
- def generate_followers_by_demographics_plot(df, category_col='category_name',
335
- organic_count_col='follower_count_organic',
336
  paid_count_col='follower_count_paid',
337
- type_filter_column='follower_count_type',
338
  type_value=None, plot_title="Followers by Demographics"):
339
- """
340
- Generates a grouped bar chart for follower demographics (organic and paid).
341
- 'category_col' here is the demographic attribute (e.g., Location, Industry).
342
- """
343
  logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
344
 
345
  if df is None or df.empty:
346
  return create_placeholder_plot(title=plot_title, message="No follower data available.")
347
-
348
  required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column]
349
  missing_cols = [col for col in required_cols if col not in df.columns]
350
  if missing_cols:
351
  return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
352
-
353
- if type_value is None:
354
  return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
355
 
 
356
  try:
357
  df_copy = df.copy()
358
  df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
@@ -362,17 +365,14 @@ def generate_followers_by_demographics_plot(df, category_col='category_name',
362
 
363
  df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
364
  df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
365
-
366
  demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum()
367
- # Sort by total followers (organic + paid) for better visualization
368
  demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col]
369
  demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort'])
370
 
371
-
372
  if demographics_data.empty:
373
  return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
374
-
375
- top_n = 10
376
  if len(demographics_data) > top_n:
377
  demographics_data = demographics_data.head(top_n)
378
  plot_title_updated = f"{plot_title} (Top {top_n})"
@@ -380,51 +380,49 @@ def generate_followers_by_demographics_plot(df, category_col='category_name',
380
  plot_title_updated = plot_title
381
 
382
  fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) )
383
-
384
  bar_width = 0.35
385
  index = np.arange(len(demographics_data.index))
386
-
387
  bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color='skyblue')
388
  bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color='lightcoral')
389
 
390
- ax.set_title(plot_title_updated)
391
  ax.set_xlabel(category_col.replace('_', ' ').title())
392
  ax.set_ylabel('Number of Followers')
393
  ax.set_xticks(index)
394
  ax.set_xticklabels(demographics_data.index, rotation=45, ha="right")
395
  ax.legend()
396
  ax.grid(axis='y', linestyle='--', alpha=0.7)
397
-
398
- # Add labels on top of bars
399
  for bar_group in [bars1, bars2]:
400
- for bar in bar_group:
401
- yval = bar.get_height()
402
- if yval > 0: # Only add label if value is not zero
403
- ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]),
404
  str(int(yval)), ha='center', va='bottom', fontsize=8)
405
 
406
- plt.tight_layout()
 
407
  return fig
408
  except Exception as e:
409
  logging.error(f"Error generating {plot_title}: {e}", exc_info=True)
 
410
  return create_placeholder_plot(title=f"{plot_title} Error", message=str(e))
411
  finally:
412
- plt.close('all')
413
-
414
 
415
  def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'):
416
- """Generates a plot for engagement rate over time."""
417
  title = "Engagement Rate Over Time"
418
  logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}")
419
 
420
  if df is None or df.empty:
421
  return create_placeholder_plot(title=title, message="No post data for engagement rate.")
422
-
423
  required_cols = [date_column, engagement_rate_col]
424
  missing_cols = [col for col in required_cols if col not in df.columns]
425
  if missing_cols:
426
  return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
427
 
 
428
  try:
429
  df_copy = df.copy()
430
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
@@ -435,38 +433,38 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
435
  return create_placeholder_plot(title=title, message="No valid data after cleaning.")
436
 
437
  engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
438
- engagement_over_time = engagement_over_time.dropna()
439
 
440
  if engagement_over_time.empty:
441
  return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
442
 
443
  fig, ax = plt.subplots(figsize=(10, 5))
444
  ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange')
445
- ax.set_title(title)
446
  ax.set_xlabel('Date')
447
  ax.set_ylabel('Engagement Rate')
448
- # Adjust xmax for PercentFormatter based on whether rate is 0-1 or 0-100
449
- max_rate_val = engagement_over_time.max()
450
- formatter_xmax = 1.0 if max_rate_val <= 1.5 and max_rate_val >=0 else 100.0 # Heuristic for 0-1 vs 0-100 scale
451
- if max_rate_val > 1.5 and formatter_xmax == 1.0: # If data seems to be percentage but formatted as decimal
452
  formatter_xmax = 100.0
453
- elif max_rate_val > 100 and formatter_xmax == 1.0: # If data is clearly > 100 but we assumed 0-1
454
- formatter_xmax = max_rate_val # Or some other sensible upper bound for formatting
455
 
456
- ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
457
  ax.grid(True, linestyle='--', alpha=0.7)
458
  plt.xticks(rotation=45)
459
- plt.tight_layout()
 
460
  return fig
461
  except Exception as e:
462
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
463
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
464
  finally:
465
- plt.close('all')
466
 
467
- def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'):
468
- """Generates a plot for reach (clicks) over time."""
469
- title = "Reach Over Time (Clicks)"
470
  logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
471
 
472
  if df is None or df.empty:
@@ -477,36 +475,36 @@ def generate_reach_over_time_plot(df, date_column='published_at', reach_col='cli
477
  if missing_cols:
478
  return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
479
 
 
480
  try:
481
  df_copy = df.copy()
482
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
483
  df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
484
  df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
485
 
486
- if df_copy.empty: # After dropping NaNs for essential columns
487
  return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.")
488
 
489
  reach_over_time = df_copy.resample('D')[reach_col].sum()
490
- # No need to check if reach_over_time is empty if df_copy wasn't, sum of NaNs is 0.
491
- # Plot will show 0 if all sums are 0.
492
 
493
  fig, ax = plt.subplots(figsize=(10, 5))
494
  ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
495
- ax.set_title(title)
496
  ax.set_xlabel('Date')
497
- ax.set_ylabel('Total Clicks')
498
  ax.grid(True, linestyle='--', alpha=0.7)
499
  plt.xticks(rotation=45)
500
- plt.tight_layout()
 
501
  return fig
502
  except Exception as e:
503
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
504
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
505
  finally:
506
- plt.close('all')
507
 
508
  def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'):
509
- """Generates a plot for impressions over time."""
510
  title = "Impressions Over Time"
511
  logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}")
512
 
@@ -518,35 +516,36 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
518
  if missing_cols:
519
  return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
520
 
 
521
  try:
522
  df_copy = df.copy()
523
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
524
  df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
525
  df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
526
 
527
- if df_copy.empty: # After dropping NaNs for essential columns
528
- return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
529
 
530
  impressions_over_time = df_copy.resample('D')[impressions_col].sum()
531
 
532
  fig, ax = plt.subplots(figsize=(10, 5))
533
  ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue')
534
- ax.set_title(title)
535
  ax.set_xlabel('Date')
536
  ax.set_ylabel('Total Impressions')
537
  ax.grid(True, linestyle='--', alpha=0.7)
538
  plt.xticks(rotation=45)
539
- plt.tight_layout()
 
540
  return fig
541
  except Exception as e:
542
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
543
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
544
  finally:
545
- plt.close('all')
546
 
547
- # --- New Plot Functions from User Request ---
548
  def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
549
- """Generates a plot for likes over time."""
550
  title = "Reactions (Likes) Over Time"
551
  logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
552
  if df is None or df.empty:
@@ -554,6 +553,8 @@ def generate_likes_over_time_plot(df, date_column='published_at', likes_col='lik
554
  required_cols = [date_column, likes_col]
555
  if any(col not in df.columns for col in required_cols):
556
  return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
 
 
557
  try:
558
  df_copy = df.copy()
559
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
@@ -561,35 +562,34 @@ def generate_likes_over_time_plot(df, date_column='published_at', likes_col='lik
561
  df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
562
  if df_copy.empty:
563
  return create_placeholder_plot(title=title, message="No valid data after cleaning.")
564
-
565
  data_over_time = df_copy.resample('D')[likes_col].sum()
566
  fig, ax = plt.subplots(figsize=(10, 5))
567
  ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson')
568
- ax.set_title(title)
569
  ax.set_xlabel('Date')
570
  ax.set_ylabel('Total Likes')
571
  ax.grid(True, linestyle='--', alpha=0.7)
572
  plt.xticks(rotation=45)
573
- plt.tight_layout()
 
574
  return fig
575
  except Exception as e:
576
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
577
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
578
  finally:
579
- plt.close('all')
580
 
581
  def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
582
- """Generates a plot for clicks over time (can be same as reach if clicks are primary reach metric)."""
583
- # This is essentially the same as generate_reach_over_time_plot if reach_col is 'clickCount'.
584
- # For clarity, keeping it separate if user wants to distinguish or use a different column later.
585
  title = "Clicks Over Time"
586
  logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
587
- # Reusing logic from generate_reach_over_time_plot
 
588
  return generate_reach_over_time_plot(df, date_column, clicks_col)
589
 
590
 
591
  def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
592
- """Generates a plot for shares over time."""
593
  title = "Shares Over Time"
594
  logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
595
  if df is None or df.empty:
@@ -597,6 +597,8 @@ def generate_shares_over_time_plot(df, date_column='published_at', shares_col='s
597
  required_cols = [date_column, shares_col]
598
  if any(col not in df.columns for col in required_cols):
599
  return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
 
 
600
  try:
601
  df_copy = df.copy()
602
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
@@ -608,21 +610,22 @@ def generate_shares_over_time_plot(df, date_column='published_at', shares_col='s
608
  data_over_time = df_copy.resample('D')[shares_col].sum()
609
  fig, ax = plt.subplots(figsize=(10, 5))
610
  ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal')
611
- ax.set_title(title)
612
  ax.set_xlabel('Date')
613
  ax.set_ylabel('Total Shares')
614
  ax.grid(True, linestyle='--', alpha=0.7)
615
  plt.xticks(rotation=45)
616
- plt.tight_layout()
 
617
  return fig
618
  except Exception as e:
619
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
620
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
621
  finally:
622
- plt.close('all')
623
 
624
  def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
625
- """Generates a plot for comments over time."""
626
  title = "Comments Over Time"
627
  logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
628
  if df is None or df.empty:
@@ -630,6 +633,8 @@ def generate_comments_over_time_plot(df, date_column='published_at', comments_co
630
  required_cols = [date_column, comments_col]
631
  if any(col not in df.columns for col in required_cols):
632
  return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
 
 
633
  try:
634
  df_copy = df.copy()
635
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
@@ -641,48 +646,42 @@ def generate_comments_over_time_plot(df, date_column='published_at', comments_co
641
  data_over_time = df_copy.resample('D')[comments_col].sum()
642
  fig, ax = plt.subplots(figsize=(10, 5))
643
  ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold')
644
- ax.set_title(title)
645
  ax.set_xlabel('Date')
646
  ax.set_ylabel('Total Comments')
647
  ax.grid(True, linestyle='--', alpha=0.7)
648
  plt.xticks(rotation=45)
649
- plt.tight_layout()
 
650
  return fig
651
  except Exception as e:
652
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
653
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
654
  finally:
655
- plt.close('all')
656
 
657
  def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', date_column=None):
658
- """
659
- Generates a pie chart for comment sentiment distribution.
660
- Assumes df might be post-level with an aggregated or example sentiment,
661
- or ideally, a comment-level df with sentiment per comment.
662
- If date_column is provided, it's for logging/context but not directly used for filtering here.
663
- """
664
  title = "Breakdown of Comments by Sentiment"
665
  logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
666
 
667
  if df is None or df.empty:
668
  return create_placeholder_plot(title=title, message="No data for comment sentiment.")
669
  if sentiment_column not in df.columns:
670
- # Check for a common alternative if the primary is missing (e.g. from post-level data)
671
  if 'sentiment' in df.columns and sentiment_column != 'sentiment':
672
  logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.")
673
- sentiment_column = 'sentiment' # Use fallback
674
  else:
675
  return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}")
676
-
677
- # If the sentiment column has no valid data (all NaNs, or not convertible)
678
  if df[sentiment_column].isnull().all():
679
  return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.")
680
 
 
681
  try:
682
  df_copy = df.copy()
683
- # Ensure the sentiment column is treated as categorical (string)
684
  df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
685
- sentiment_counts = df_copy[sentiment_column].value_counts().dropna() # Dropna for safety
686
 
687
  if sentiment_counts.empty or sentiment_counts.sum() == 0:
688
  return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
@@ -690,21 +689,20 @@ def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sen
690
  fig, ax = plt.subplots(figsize=(8, 5))
691
  colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
692
  pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
693
-
694
  ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
695
- ax.set_title(title)
696
- ax.axis('equal')
697
- plt.tight_layout()
 
698
  return fig
699
  except Exception as e:
700
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
701
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
702
  finally:
703
- plt.close('all')
704
 
705
- # --- NEW PLOT FUNCTIONS FOR CONTENT STRATEGY ---
706
  def generate_post_frequency_plot(df, date_column='published_at', resample_period='D'):
707
- """Generates a plot for post frequency over time (e.g., daily, weekly, monthly)."""
708
  title = f"Post Frequency Over Time ({resample_period})"
709
  logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
710
 
@@ -713,38 +711,40 @@ def generate_post_frequency_plot(df, date_column='published_at', resample_period
713
  if date_column not in df.columns:
714
  return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.")
715
 
 
716
  try:
717
  df_copy = df.copy()
718
  if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
719
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
720
-
721
  df_copy = df_copy.dropna(subset=[date_column])
722
  if df_copy.empty:
723
  return create_placeholder_plot(title=title, message="No valid date entries found.")
724
 
725
  post_frequency = df_copy.set_index(date_column).resample(resample_period).size()
726
-
727
  if post_frequency.empty:
728
  return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.")
729
 
730
  fig, ax = plt.subplots(figsize=(10, 5))
731
  post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None)
732
- ax.set_title(title)
733
  ax.set_xlabel('Date' if resample_period == 'D' else 'Period')
734
  ax.set_ylabel('Number of Posts')
735
  ax.grid(True, linestyle='--', alpha=0.7)
736
  plt.xticks(rotation=45)
737
- plt.tight_layout()
 
738
  logging.info(f"Successfully generated {title} plot.")
739
  return fig
740
  except Exception as e:
741
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
742
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
743
  finally:
744
- plt.close('all')
745
 
746
  def generate_content_format_breakdown_plot(df, format_col='media_type'):
747
- """Generates a bar chart for breakdown of content by format."""
748
  title = "Breakdown of Content by Format"
749
  logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}")
750
 
@@ -753,6 +753,7 @@ def generate_content_format_breakdown_plot(df, format_col='media_type'):
753
  if format_col not in df.columns:
754
  return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}")
755
 
 
756
  try:
757
  df_copy = df.copy()
758
  format_counts = df_copy[format_col].value_counts().dropna()
@@ -762,46 +763,42 @@ def generate_content_format_breakdown_plot(df, format_col='media_type'):
762
 
763
  fig, ax = plt.subplots(figsize=(8, 6))
764
  format_counts.plot(kind='bar', ax=ax, color='skyblue')
765
- ax.set_title(title)
766
  ax.set_xlabel('Media Type')
767
  ax.set_ylabel('Number of Posts')
768
  ax.grid(axis='y', linestyle='--', alpha=0.7)
769
  plt.xticks(rotation=45, ha="right")
770
- plt.tight_layout()
771
-
772
- # Add counts on top of bars
773
  for i, v in enumerate(format_counts):
774
  ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom')
775
-
 
 
776
  logging.info(f"Successfully generated {title} plot.")
777
  return fig
778
  except Exception as e:
779
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
780
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
781
  finally:
782
- plt.close('all')
783
 
784
  def _parse_eb_label(label_data):
785
- """Helper to parse eb_labels which might be lists or string representations of lists."""
786
  if isinstance(label_data, list):
787
  return label_data
788
  if isinstance(label_data, str):
789
  try:
790
- # Try to evaluate as a list
791
  parsed = ast.literal_eval(label_data)
792
  if isinstance(parsed, list):
793
  return parsed
794
- # If it's a single string not in list format, treat as a single label
795
- return [str(parsed)]
796
  except (ValueError, SyntaxError):
797
- # If not a list string, treat the whole string as one label
798
  return [label_data] if label_data.strip() else []
799
  if pd.isna(label_data):
800
  return []
801
- return [] # Default for other types
802
 
803
  def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
804
- """Generates a horizontal bar chart for breakdown of content by topics."""
805
  title = f"Breakdown of Content by Topics (Top {top_n})"
806
  logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}")
807
 
@@ -810,10 +807,9 @@ def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
810
  if topics_col not in df.columns:
811
  return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}")
812
 
 
813
  try:
814
  df_copy = df.copy()
815
-
816
- # Apply parsing and explode
817
  parsed_labels = df_copy[topics_col].apply(_parse_eb_label)
818
  exploded_labels = parsed_labels.explode().dropna()
819
 
@@ -824,93 +820,91 @@ def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
824
 
825
  if topic_counts.empty:
826
  return create_placeholder_plot(title=title, message="No topics to display after counting.")
827
-
828
- # Take top N and sort for plotting (descending for horizontal bar)
829
  top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True)
830
 
831
  fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6))
832
  top_topics.plot(kind='barh', ax=ax, color='mediumseagreen')
833
- ax.set_title(title)
834
  ax.set_xlabel('Number of Posts')
835
  ax.set_ylabel('Topic')
836
-
837
- # Add counts next to bars
838
  for i, (topic, count) in enumerate(top_topics.items()):
839
  ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center')
840
-
841
- plt.tight_layout()
 
842
  logging.info(f"Successfully generated {title} plot.")
843
  return fig
844
  except Exception as e:
845
  logging.error(f"Error generating {title}: {e}", exc_info=True)
 
846
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
847
  finally:
848
- plt.close('all')
849
 
850
 
851
  if __name__ == '__main__':
852
  # Create dummy data for testing
853
  posts_data = {
854
- 'id': [f'post{i}' for i in range(1, 8)], # Increased to 7 for more data
855
  'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
856
  'likeCount': [10, 5, 12, 8, 15, 3, 20],
857
  'commentCount': [2, 1, 3, 1, 4, 0, 5],
858
- 'shareCount': [1, 0, 1, 1, 2, 0, 1],
859
  'clickCount': [20, 15, 30, 22, 40, 10, 50],
860
  'impressionCount': [200, 150, 300, 220, 400, 100, 500],
861
  'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08],
862
- 'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'], # New column
863
- 'eb_labels': [ # New column with various formats
864
- "['AI', 'Tech']",
865
- ['Innovation'],
866
- 'General',
867
- None,
868
  ['Tech', 'Future'],
869
  "['AI', 'Development']",
870
  ['Tech']
871
  ],
872
- 'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive'] # For comment sentiment plot
873
  }
874
  sample_merged_posts_df = pd.DataFrame(posts_data)
875
 
876
- # Updated Follower Stats Data
877
  follower_data = {
878
  'follower_count_type': [
879
- 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
880
  'follower_geo', 'follower_geo', 'follower_geo',
881
  'follower_function', 'follower_function',
882
  'follower_industry', 'follower_industry',
883
  'follower_seniority', 'follower_seniority'
884
  ],
885
  'category_name': [
886
- '2024-01-01', '2024-02-01', '2024-03-01', # Dates for monthly gains
887
- 'USA', 'Canada', 'UK', # Geo
888
- 'Engineering', 'Sales', # Function/Role
889
- 'Tech', 'Finance', # Industry
890
- 'Senior', 'Junior' # Seniority
891
  ],
892
  'follower_count_organic': [
893
- 100, 110, 125, # Organic monthly gains
894
- 500, 300, 150, # Organic Geo counts
895
- 400, 200, # Organic Role counts
896
- 250, 180, # Organic Industry counts
897
- 300, 220 # Organic Seniority counts
898
  ],
899
  'follower_count_paid': [
900
- 20, 30, 25, # Paid monthly gains
901
- 50, 40, 60, # Paid Geo counts
902
- 30, 20, # Paid Role counts
903
- 45, 35, # Paid Industry counts
904
- 60, 40 # Paid Seniority counts
905
  ]
906
  }
907
  sample_follower_stats_df = pd.DataFrame(follower_data)
908
 
909
  logging.info("--- Testing Existing Plot Generations ---")
910
- # ... (keep existing tests for older plots) ...
911
  fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
912
  if fig_posts_activity: logging.info("Posts activity plot generated.")
913
-
914
  fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
915
  if fig_engagement_type: logging.info("Engagement type plot generated.")
916
 
@@ -930,7 +924,6 @@ if __name__ == '__main__':
930
  if fig_followers_rate: logging.info("Followers Growth Rate plot generated.")
931
  fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
932
  if fig_geo: logging.info("Followers by Location plot generated.")
933
- # ... add other follower demographic tests ...
934
 
935
  fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
936
  if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
@@ -938,10 +931,10 @@ if __name__ == '__main__':
938
  if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
939
  fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
940
  if fig_impressions: logging.info("Impressions Over Time plot generated.")
941
-
942
  fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
943
  if fig_likes_time: logging.info("Likes Over Time plot generated.")
944
- fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy()) # Uses reach logic
945
  if fig_clicks_time: logging.info("Clicks Over Time plot generated.")
946
  fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
947
  if fig_shares_time: logging.info("Shares Over Time plot generated.")
@@ -954,7 +947,7 @@ if __name__ == '__main__':
954
  logging.info("--- Testing NEW Plot Generations for Content Strategy ---")
955
  fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D')
956
  if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.")
957
-
958
  fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W')
959
  if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.")
960
 
@@ -963,17 +956,16 @@ if __name__ == '__main__':
963
 
964
  fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5)
965
  if fig_content_topics: logging.info("Content Topic Breakdown plot generated.")
966
-
967
- # Test with missing columns / empty data for new plots
968
  logging.info("--- Testing NEW Plot Generations with Edge Cases ---")
969
  empty_df = pd.DataFrame()
970
  fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy())
971
  if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.")
972
-
973
  fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col')
974
  if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.")
975
 
976
- fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels') # eb_labels won't exist
977
  if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.")
978
 
979
  df_no_topics_data = sample_merged_posts_df.copy()
 
13
  def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
14
  """Creates a placeholder Matplotlib plot indicating no data or an error."""
15
  try:
16
+ fig, ax = plt.subplots(figsize=(8, 4))
17
  ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True)
18
+ ax.axis('off')
19
  plt.tight_layout()
20
+ # Add spacing for consistency, though it might be less critical for placeholders
21
+ fig.subplots_adjust(top=0.90)
22
  return fig
23
  except Exception as e:
24
  logging.error(f"Error creating placeholder plot: {e}")
 
26
  fig_err, ax_err = plt.subplots()
27
  ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center')
28
  ax_err.axis('off')
29
+ fig_err.subplots_adjust(top=0.90)
30
  return fig_err
31
  # No plt.close(fig) here as Gradio handles the figure object.
32
 
 
40
  logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
41
  return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
42
 
43
+ fig = None # Initialize fig to None
44
  try:
45
+ df_copy = df.copy()
46
  if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
47
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
48
+
49
  df_copy = df_copy.dropna(subset=[date_column])
50
  if df_copy.empty:
51
  logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.")
52
  return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")
53
 
54
+ posts_over_time = df_copy.set_index(date_column).resample('D').size()
55
+
56
  if posts_over_time.empty:
57
  logging.info("Posts activity: No posts after resampling by day.")
58
  return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
59
 
60
  fig, ax = plt.subplots(figsize=(10, 5))
61
  posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
62
+ ax.set_title('Posts Activity Over Time', y=1.03) # Matplotlib title
63
  ax.set_xlabel('Date')
64
  ax.set_ylabel('Number of Posts')
65
  ax.grid(True, linestyle='--', alpha=0.7)
66
  plt.xticks(rotation=45)
67
+ plt.tight_layout(fig=fig)
68
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
69
  logging.info("Successfully generated posts activity plot.")
70
  return fig
71
  except Exception as e:
72
  logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
73
+ if fig: plt.close(fig) # Close if fig was created before error
74
  return create_placeholder_plot(title="Posts Activity Error", message=str(e))
75
  finally:
76
+ # If fig was not returned (e.g. placeholder was returned), and it exists, close it.
77
+ # However, if fig is returned, Gradio handles it.
78
+ # The plt.close('all') was too broad. We only close specific figures if not returned.
79
+ pass # Let Gradio handle the returned figure. If a placeholder is returned, its figure is handled there.
80
 
81
+
82
+ def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='commentCount', shares_col='shareCount'):
83
  """Generates a bar plot for total engagement types (likes, comments, shares)."""
84
  logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
85
+
86
  required_cols = [likes_col, comments_col, shares_col]
87
  if df is None or df.empty:
88
  logging.warning("Engagement type: DataFrame is empty.")
89
  return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.")
90
+
91
  missing_cols = [col for col in required_cols if col not in df.columns]
92
  if missing_cols:
93
  msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}"
94
  logging.warning(msg)
95
  return create_placeholder_plot(title="Post Engagement Types", message=msg)
96
 
97
+ fig = None
98
  try:
99
+ df_copy = df.copy()
100
+ for col in required_cols:
101
  df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
102
 
103
  total_likes = df_copy[likes_col].sum()
 
113
  'Comments': total_comments,
114
  'Shares': total_shares
115
  }
116
+
117
  fig, ax = plt.subplots(figsize=(8, 5))
118
  bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon'])
119
+ ax.set_title('Total Post Engagement Types', y=1.03) # Matplotlib title
120
  ax.set_xlabel('Engagement Type')
121
  ax.set_ylabel('Total Count')
122
  ax.grid(axis='y', linestyle='--', alpha=0.7)
123
+
124
  for bar in bars:
125
  yval = bar.get_height()
126
  ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom')
127
+
128
+ plt.tight_layout(fig=fig)
129
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
130
  logging.info("Successfully generated engagement type plot.")
131
  return fig
132
  except Exception as e:
133
  logging.error(f"Error generating engagement type plot: {e}", exc_info=True)
134
+ if fig: plt.close(fig)
135
  return create_placeholder_plot(title="Engagement Type Error", message=str(e))
136
  finally:
137
+ pass
138
 
139
  def generate_mentions_activity_plot(df, date_column='date'):
140
  """Generates a plot for mentions activity over time."""
 
145
  if date_column not in df.columns:
146
  logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
147
  return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
148
+
149
+ fig = None
150
  try:
151
  df_copy = df.copy()
152
  if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
153
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
154
+
155
  df_copy = df_copy.dropna(subset=[date_column])
156
  if df_copy.empty:
157
  logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.")
158
  return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")
159
 
160
  mentions_over_time = df_copy.set_index(date_column).resample('D').size()
161
+
162
  if mentions_over_time.empty:
163
  logging.info("Mentions activity: No mentions after resampling by day.")
164
  return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")
165
 
166
  fig, ax = plt.subplots(figsize=(10, 5))
167
  mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple')
168
+ ax.set_title('Mentions Activity Over Time', y=1.03) # Matplotlib title
169
  ax.set_xlabel('Date')
170
  ax.set_ylabel('Number of Mentions')
171
  ax.grid(True, linestyle='--', alpha=0.7)
172
  plt.xticks(rotation=45)
173
+ plt.tight_layout(fig=fig)
174
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
175
  logging.info("Successfully generated mentions activity plot.")
176
  return fig
177
  except Exception as e:
178
  logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
179
+ if fig: plt.close(fig)
180
  return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
181
  finally:
182
+ pass
183
 
184
+ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
185
  """Generates a pie chart for mention sentiment distribution."""
186
  logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
187
+
188
  if df is None or df.empty:
189
  logging.warning("Mention sentiment: DataFrame is empty.")
190
  return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.")
 
193
  logging.warning(msg)
194
  return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg)
195
 
196
+ fig = None
197
  try:
198
  df_copy = df.copy()
199
  sentiment_counts = df_copy[sentiment_column].value_counts()
 
202
  return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
203
 
204
  fig, ax = plt.subplots(figsize=(8, 5))
205
+ colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
 
206
  pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
207
  ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
208
+ ax.set_title('Mention Sentiment Distribution', y=1.03) # Matplotlib title
209
+ ax.axis('equal')
210
+ plt.tight_layout(fig=fig)
211
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
212
  logging.info("Successfully generated mention sentiment plot.")
213
  return fig
214
  except Exception as e:
215
  logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
216
+ if fig: plt.close(fig)
217
  return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
218
  finally:
219
+ pass
220
 
221
  # --- Updated Follower Plot Functions ---
222
 
223
+ def generate_followers_count_over_time_plot(df, date_info_column='category_name',
224
+ organic_count_col='follower_count_organic',
225
  paid_count_col='follower_count_paid',
226
+ type_filter_column='follower_count_type',
227
  type_value='follower_gains_monthly'):
 
 
 
 
228
  title = f"Followers Count Over Time ({type_value})"
229
  logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
230
 
231
  if df is None or df.empty:
232
  return create_placeholder_plot(title=title, message="No follower data available.")
233
+
234
  required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column]
235
  missing_cols = [col for col in required_cols if col not in df.columns]
236
  if missing_cols:
237
  return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
238
 
239
+ fig = None
240
  try:
241
  df_copy = df.copy()
242
+ df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
243
 
244
  if df_filtered.empty:
245
  return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
246
 
 
247
  df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
 
248
  df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
249
  df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
 
250
  df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj')
251
 
252
  if df_filtered.empty:
 
255
  fig, ax = plt.subplots(figsize=(10, 5))
256
  ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers')
257
  ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers')
258
+ ax.set_title(title, y=1.03) # Matplotlib title
 
259
  ax.set_xlabel('Date')
260
  ax.set_ylabel('Follower Count')
261
  ax.legend()
262
  ax.grid(True, linestyle='--', alpha=0.7)
263
  plt.xticks(rotation=45)
264
+ plt.tight_layout(fig=fig)
265
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
266
  return fig
267
  except Exception as e:
268
  logging.error(f"Error generating {title}: {e}", exc_info=True)
269
+ if fig: plt.close(fig)
270
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
271
  finally:
272
+ pass
273
 
274
+ def generate_followers_growth_rate_plot(df, date_info_column='category_name',
275
+ organic_count_col='follower_count_organic',
276
  paid_count_col='follower_count_paid',
277
+ type_filter_column='follower_count_type',
278
  type_value='follower_gains_monthly'):
 
 
 
 
279
  title = f"Follower Growth Rate ({type_value})"
280
  logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
281
 
 
286
  missing_cols = [col for col in required_cols if col not in df.columns]
287
  if missing_cols:
288
  return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
289
+
290
+ fig = None
291
  try:
292
  df_copy = df.copy()
293
  df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
 
298
  df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
299
  df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce')
300
  df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce')
 
301
  df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj')
302
 
303
+ if df_filtered.empty or len(df_filtered) < 2:
304
  return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
305
 
306
  df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100
307
  df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100
 
 
308
  df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
 
309
 
310
  fig, ax = plt.subplots(figsize=(10, 5))
 
311
  plotted_organic = False
312
  if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty:
313
  ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate')
314
  plotted_organic = True
 
315
  plotted_paid = False
316
  if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty:
317
  ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate')
 
320
  if not plotted_organic and not plotted_paid:
321
  return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.")
322
 
323
+ ax.set_title(title, y=1.03) # Matplotlib title
324
  ax.set_xlabel('Date')
325
  ax.set_ylabel('Growth Rate (%)')
326
  ax.yaxis.set_major_formatter(mticker.PercentFormatter())
327
  ax.legend()
328
  ax.grid(True, linestyle='--', alpha=0.7)
329
  plt.xticks(rotation=45)
330
+ plt.tight_layout(fig=fig)
331
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
332
  return fig
333
  except Exception as e:
334
  logging.error(f"Error generating {title}: {e}", exc_info=True)
335
+ if fig: plt.close(fig)
336
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
337
  finally:
338
+ pass
339
 
340
+ def generate_followers_by_demographics_plot(df, category_col='category_name',
341
+ organic_count_col='follower_count_organic',
342
  paid_count_col='follower_count_paid',
343
+ type_filter_column='follower_count_type',
344
  type_value=None, plot_title="Followers by Demographics"):
 
 
 
 
345
  logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
346
 
347
  if df is None or df.empty:
348
  return create_placeholder_plot(title=plot_title, message="No follower data available.")
349
+
350
  required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column]
351
  missing_cols = [col for col in required_cols if col not in df.columns]
352
  if missing_cols:
353
  return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
354
+
355
+ if type_value is None:
356
  return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
357
 
358
+ fig = None
359
  try:
360
  df_copy = df.copy()
361
  df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
 
365
 
366
  df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
367
  df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
 
368
  demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum()
 
369
  demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col]
370
  demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort'])
371
 
 
372
  if demographics_data.empty:
373
  return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
374
+
375
+ top_n = 10
376
  if len(demographics_data) > top_n:
377
  demographics_data = demographics_data.head(top_n)
378
  plot_title_updated = f"{plot_title} (Top {top_n})"
 
380
  plot_title_updated = plot_title
381
 
382
  fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) )
 
383
  bar_width = 0.35
384
  index = np.arange(len(demographics_data.index))
 
385
  bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color='skyblue')
386
  bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color='lightcoral')
387
 
388
+ ax.set_title(plot_title_updated, y=1.03) # Matplotlib title
389
  ax.set_xlabel(category_col.replace('_', ' ').title())
390
  ax.set_ylabel('Number of Followers')
391
  ax.set_xticks(index)
392
  ax.set_xticklabels(demographics_data.index, rotation=45, ha="right")
393
  ax.legend()
394
  ax.grid(axis='y', linestyle='--', alpha=0.7)
395
+
 
396
  for bar_group in [bars1, bars2]:
397
+ for bar_item in bar_group: # Renamed 'bar' to 'bar_item' to avoid conflict
398
+ yval = bar_item.get_height()
399
+ if yval > 0:
400
+ ax.text(bar_item.get_x() + bar_item.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]),
401
  str(int(yval)), ha='center', va='bottom', fontsize=8)
402
 
403
+ plt.tight_layout(fig=fig)
404
+ fig.subplots_adjust(top=0.85) # Adjust top for more space, especially with rotated labels
405
  return fig
406
  except Exception as e:
407
  logging.error(f"Error generating {plot_title}: {e}", exc_info=True)
408
+ if fig: plt.close(fig)
409
  return create_placeholder_plot(title=f"{plot_title} Error", message=str(e))
410
  finally:
411
+ pass
 
412
 
413
  def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'):
 
414
  title = "Engagement Rate Over Time"
415
  logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}")
416
 
417
  if df is None or df.empty:
418
  return create_placeholder_plot(title=title, message="No post data for engagement rate.")
419
+
420
  required_cols = [date_column, engagement_rate_col]
421
  missing_cols = [col for col in required_cols if col not in df.columns]
422
  if missing_cols:
423
  return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
424
 
425
+ fig = None
426
  try:
427
  df_copy = df.copy()
428
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
 
433
  return create_placeholder_plot(title=title, message="No valid data after cleaning.")
434
 
435
  engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
436
+ engagement_over_time = engagement_over_time.dropna()
437
 
438
  if engagement_over_time.empty:
439
  return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
440
 
441
  fig, ax = plt.subplots(figsize=(10, 5))
442
  ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange')
443
+ ax.set_title(title, y=1.03) # Matplotlib title
444
  ax.set_xlabel('Date')
445
  ax.set_ylabel('Engagement Rate')
446
+ max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0
447
+ formatter_xmax = 1.0 if 0 <= max_rate_val <= 1.5 else 100.0
448
+ if max_rate_val > 1.5 and formatter_xmax == 1.0:
 
449
  formatter_xmax = 100.0
450
+ elif max_rate_val > 100 and formatter_xmax == 1.0:
451
+ formatter_xmax = max_rate_val
452
 
453
+ ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
454
  ax.grid(True, linestyle='--', alpha=0.7)
455
  plt.xticks(rotation=45)
456
+ plt.tight_layout(fig=fig)
457
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
458
  return fig
459
  except Exception as e:
460
  logging.error(f"Error generating {title}: {e}", exc_info=True)
461
+ if fig: plt.close(fig)
462
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
463
  finally:
464
+ pass
465
 
466
+ def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'):
467
+ title = "Reach Over Time (Clicks)" # Title matches the config in app.py
 
468
  logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
469
 
470
  if df is None or df.empty:
 
475
  if missing_cols:
476
  return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
477
 
478
+ fig = None
479
  try:
480
  df_copy = df.copy()
481
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
482
  df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
483
  df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
484
 
485
+ if df_copy.empty:
486
  return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.")
487
 
488
  reach_over_time = df_copy.resample('D')[reach_col].sum()
 
 
489
 
490
  fig, ax = plt.subplots(figsize=(10, 5))
491
  ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
492
+ ax.set_title(title, y=1.03) # Matplotlib title
493
  ax.set_xlabel('Date')
494
+ ax.set_ylabel('Total Clicks') # Label consistent with reach_col='clickCount'
495
  ax.grid(True, linestyle='--', alpha=0.7)
496
  plt.xticks(rotation=45)
497
+ plt.tight_layout(fig=fig)
498
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
499
  return fig
500
  except Exception as e:
501
  logging.error(f"Error generating {title}: {e}", exc_info=True)
502
+ if fig: plt.close(fig)
503
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
504
  finally:
505
+ pass
506
 
507
  def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'):
 
508
  title = "Impressions Over Time"
509
  logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}")
510
 
 
516
  if missing_cols:
517
  return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
518
 
519
+ fig = None
520
  try:
521
  df_copy = df.copy()
522
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
523
  df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
524
  df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
525
 
526
+ if df_copy.empty:
527
+ return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
528
 
529
  impressions_over_time = df_copy.resample('D')[impressions_col].sum()
530
 
531
  fig, ax = plt.subplots(figsize=(10, 5))
532
  ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue')
533
+ ax.set_title(title, y=1.03) # Matplotlib title
534
  ax.set_xlabel('Date')
535
  ax.set_ylabel('Total Impressions')
536
  ax.grid(True, linestyle='--', alpha=0.7)
537
  plt.xticks(rotation=45)
538
+ plt.tight_layout(fig=fig)
539
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
540
  return fig
541
  except Exception as e:
542
  logging.error(f"Error generating {title}: {e}", exc_info=True)
543
+ if fig: plt.close(fig)
544
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
545
  finally:
546
+ pass
547
 
 
548
  def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
 
549
  title = "Reactions (Likes) Over Time"
550
  logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
551
  if df is None or df.empty:
 
553
  required_cols = [date_column, likes_col]
554
  if any(col not in df.columns for col in required_cols):
555
  return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
556
+
557
+ fig = None
558
  try:
559
  df_copy = df.copy()
560
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
 
562
  df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
563
  if df_copy.empty:
564
  return create_placeholder_plot(title=title, message="No valid data after cleaning.")
565
+
566
  data_over_time = df_copy.resample('D')[likes_col].sum()
567
  fig, ax = plt.subplots(figsize=(10, 5))
568
  ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson')
569
+ ax.set_title(title, y=1.03) # Matplotlib title
570
  ax.set_xlabel('Date')
571
  ax.set_ylabel('Total Likes')
572
  ax.grid(True, linestyle='--', alpha=0.7)
573
  plt.xticks(rotation=45)
574
+ plt.tight_layout(fig=fig)
575
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
576
  return fig
577
  except Exception as e:
578
  logging.error(f"Error generating {title}: {e}", exc_info=True)
579
+ if fig: plt.close(fig)
580
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
581
  finally:
582
+ pass
583
 
584
  def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
 
 
 
585
  title = "Clicks Over Time"
586
  logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
587
+ # This function essentially calls generate_reach_over_time_plot with specific params
588
+ # The fig.subplots_adjust will be handled within that function.
589
  return generate_reach_over_time_plot(df, date_column, clicks_col)
590
 
591
 
592
  def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
 
593
  title = "Shares Over Time"
594
  logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
595
  if df is None or df.empty:
 
597
  required_cols = [date_column, shares_col]
598
  if any(col not in df.columns for col in required_cols):
599
  return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
600
+
601
+ fig = None
602
  try:
603
  df_copy = df.copy()
604
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
 
610
  data_over_time = df_copy.resample('D')[shares_col].sum()
611
  fig, ax = plt.subplots(figsize=(10, 5))
612
  ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal')
613
+ ax.set_title(title, y=1.03) # Matplotlib title
614
  ax.set_xlabel('Date')
615
  ax.set_ylabel('Total Shares')
616
  ax.grid(True, linestyle='--', alpha=0.7)
617
  plt.xticks(rotation=45)
618
+ plt.tight_layout(fig=fig)
619
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
620
  return fig
621
  except Exception as e:
622
  logging.error(f"Error generating {title}: {e}", exc_info=True)
623
+ if fig: plt.close(fig)
624
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
625
  finally:
626
+ pass
627
 
628
  def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
 
629
  title = "Comments Over Time"
630
  logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
631
  if df is None or df.empty:
 
633
  required_cols = [date_column, comments_col]
634
  if any(col not in df.columns for col in required_cols):
635
  return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
636
+
637
+ fig = None
638
  try:
639
  df_copy = df.copy()
640
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
 
646
  data_over_time = df_copy.resample('D')[comments_col].sum()
647
  fig, ax = plt.subplots(figsize=(10, 5))
648
  ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold')
649
+ ax.set_title(title, y=1.03) # Matplotlib title
650
  ax.set_xlabel('Date')
651
  ax.set_ylabel('Total Comments')
652
  ax.grid(True, linestyle='--', alpha=0.7)
653
  plt.xticks(rotation=45)
654
+ plt.tight_layout(fig=fig)
655
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
656
  return fig
657
  except Exception as e:
658
  logging.error(f"Error generating {title}: {e}", exc_info=True)
659
+ if fig: plt.close(fig)
660
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
661
  finally:
662
+ pass
663
 
664
  def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', date_column=None):
 
 
 
 
 
 
665
  title = "Breakdown of Comments by Sentiment"
666
  logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
667
 
668
  if df is None or df.empty:
669
  return create_placeholder_plot(title=title, message="No data for comment sentiment.")
670
  if sentiment_column not in df.columns:
 
671
  if 'sentiment' in df.columns and sentiment_column != 'sentiment':
672
  logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.")
673
+ sentiment_column = 'sentiment'
674
  else:
675
  return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}")
676
+
 
677
  if df[sentiment_column].isnull().all():
678
  return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.")
679
 
680
+ fig = None
681
  try:
682
  df_copy = df.copy()
 
683
  df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
684
+ sentiment_counts = df_copy[sentiment_column].value_counts().dropna()
685
 
686
  if sentiment_counts.empty or sentiment_counts.sum() == 0:
687
  return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
 
689
  fig, ax = plt.subplots(figsize=(8, 5))
690
  colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
691
  pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
 
692
  ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
693
+ ax.set_title(title, y=1.03) # Matplotlib title
694
+ ax.axis('equal')
695
+ plt.tight_layout(fig=fig)
696
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
697
  return fig
698
  except Exception as e:
699
  logging.error(f"Error generating {title}: {e}", exc_info=True)
700
+ if fig: plt.close(fig)
701
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
702
  finally:
703
+ pass
704
 
 
705
  def generate_post_frequency_plot(df, date_column='published_at', resample_period='D'):
 
706
  title = f"Post Frequency Over Time ({resample_period})"
707
  logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
708
 
 
711
  if date_column not in df.columns:
712
  return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.")
713
 
714
+ fig = None
715
  try:
716
  df_copy = df.copy()
717
  if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
718
  df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
719
+
720
  df_copy = df_copy.dropna(subset=[date_column])
721
  if df_copy.empty:
722
  return create_placeholder_plot(title=title, message="No valid date entries found.")
723
 
724
  post_frequency = df_copy.set_index(date_column).resample(resample_period).size()
725
+
726
  if post_frequency.empty:
727
  return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.")
728
 
729
  fig, ax = plt.subplots(figsize=(10, 5))
730
  post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None)
731
+ ax.set_title(title, y=1.03) # Matplotlib title
732
  ax.set_xlabel('Date' if resample_period == 'D' else 'Period')
733
  ax.set_ylabel('Number of Posts')
734
  ax.grid(True, linestyle='--', alpha=0.7)
735
  plt.xticks(rotation=45)
736
+ plt.tight_layout(fig=fig)
737
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
738
  logging.info(f"Successfully generated {title} plot.")
739
  return fig
740
  except Exception as e:
741
  logging.error(f"Error generating {title}: {e}", exc_info=True)
742
+ if fig: plt.close(fig)
743
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
744
  finally:
745
+ pass
746
 
747
  def generate_content_format_breakdown_plot(df, format_col='media_type'):
 
748
  title = "Breakdown of Content by Format"
749
  logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}")
750
 
 
753
  if format_col not in df.columns:
754
  return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}")
755
 
756
+ fig = None
757
  try:
758
  df_copy = df.copy()
759
  format_counts = df_copy[format_col].value_counts().dropna()
 
763
 
764
  fig, ax = plt.subplots(figsize=(8, 6))
765
  format_counts.plot(kind='bar', ax=ax, color='skyblue')
766
+ ax.set_title(title, y=1.03) # Matplotlib title
767
  ax.set_xlabel('Media Type')
768
  ax.set_ylabel('Number of Posts')
769
  ax.grid(axis='y', linestyle='--', alpha=0.7)
770
  plt.xticks(rotation=45, ha="right")
771
+
 
 
772
  for i, v in enumerate(format_counts):
773
  ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom')
774
+
775
+ plt.tight_layout(fig=fig)
776
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
777
  logging.info(f"Successfully generated {title} plot.")
778
  return fig
779
  except Exception as e:
780
  logging.error(f"Error generating {title}: {e}", exc_info=True)
781
+ if fig: plt.close(fig)
782
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
783
  finally:
784
+ pass
785
 
786
  def _parse_eb_label(label_data):
 
787
  if isinstance(label_data, list):
788
  return label_data
789
  if isinstance(label_data, str):
790
  try:
 
791
  parsed = ast.literal_eval(label_data)
792
  if isinstance(parsed, list):
793
  return parsed
794
+ return [str(parsed)]
 
795
  except (ValueError, SyntaxError):
 
796
  return [label_data] if label_data.strip() else []
797
  if pd.isna(label_data):
798
  return []
799
+ return []
800
 
801
  def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
 
802
  title = f"Breakdown of Content by Topics (Top {top_n})"
803
  logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}")
804
 
 
807
  if topics_col not in df.columns:
808
  return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}")
809
 
810
+ fig = None
811
  try:
812
  df_copy = df.copy()
 
 
813
  parsed_labels = df_copy[topics_col].apply(_parse_eb_label)
814
  exploded_labels = parsed_labels.explode().dropna()
815
 
 
820
 
821
  if topic_counts.empty:
822
  return create_placeholder_plot(title=title, message="No topics to display after counting.")
823
+
 
824
  top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True)
825
 
826
  fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6))
827
  top_topics.plot(kind='barh', ax=ax, color='mediumseagreen')
828
+ ax.set_title(title, y=1.03) # Matplotlib title
829
  ax.set_xlabel('Number of Posts')
830
  ax.set_ylabel('Topic')
831
+
 
832
  for i, (topic, count) in enumerate(top_topics.items()):
833
  ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center')
834
+
835
+ plt.tight_layout(fig=fig)
836
+ fig.subplots_adjust(top=0.88) # Add space for Gradio label
837
  logging.info(f"Successfully generated {title} plot.")
838
  return fig
839
  except Exception as e:
840
  logging.error(f"Error generating {title}: {e}", exc_info=True)
841
+ if fig: plt.close(fig)
842
  return create_placeholder_plot(title=f"{title} Error", message=str(e))
843
  finally:
844
+ pass
845
 
846
 
847
  if __name__ == '__main__':
848
  # Create dummy data for testing
849
  posts_data = {
850
+ 'id': [f'post{i}' for i in range(1, 8)],
851
  'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
852
  'likeCount': [10, 5, 12, 8, 15, 3, 20],
853
  'commentCount': [2, 1, 3, 1, 4, 0, 5],
854
+ 'shareCount': [1, 0, 1, 1, 2, 0, 1],
855
  'clickCount': [20, 15, 30, 22, 40, 10, 50],
856
  'impressionCount': [200, 150, 300, 220, 400, 100, 500],
857
  'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08],
858
+ 'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'],
859
+ 'eb_labels': [
860
+ "['AI', 'Tech']",
861
+ ['Innovation'],
862
+ 'General',
863
+ None,
864
  ['Tech', 'Future'],
865
  "['AI', 'Development']",
866
  ['Tech']
867
  ],
868
+ 'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive']
869
  }
870
  sample_merged_posts_df = pd.DataFrame(posts_data)
871
 
 
872
  follower_data = {
873
  'follower_count_type': [
874
+ 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
875
  'follower_geo', 'follower_geo', 'follower_geo',
876
  'follower_function', 'follower_function',
877
  'follower_industry', 'follower_industry',
878
  'follower_seniority', 'follower_seniority'
879
  ],
880
  'category_name': [
881
+ '2024-01-01', '2024-02-01', '2024-03-01',
882
+ 'USA', 'Canada', 'UK',
883
+ 'Engineering', 'Sales',
884
+ 'Tech', 'Finance',
885
+ 'Senior', 'Junior'
886
  ],
887
  'follower_count_organic': [
888
+ 100, 110, 125,
889
+ 500, 300, 150,
890
+ 400, 200,
891
+ 250, 180,
892
+ 300, 220
893
  ],
894
  'follower_count_paid': [
895
+ 20, 30, 25,
896
+ 50, 40, 60,
897
+ 30, 20,
898
+ 45, 35,
899
+ 60, 40
900
  ]
901
  }
902
  sample_follower_stats_df = pd.DataFrame(follower_data)
903
 
904
  logging.info("--- Testing Existing Plot Generations ---")
 
905
  fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
906
  if fig_posts_activity: logging.info("Posts activity plot generated.")
907
+
908
  fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
909
  if fig_engagement_type: logging.info("Engagement type plot generated.")
910
 
 
924
  if fig_followers_rate: logging.info("Followers Growth Rate plot generated.")
925
  fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
926
  if fig_geo: logging.info("Followers by Location plot generated.")
 
927
 
928
  fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
929
  if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
 
931
  if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
932
  fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
933
  if fig_impressions: logging.info("Impressions Over Time plot generated.")
934
+
935
  fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
936
  if fig_likes_time: logging.info("Likes Over Time plot generated.")
937
+ fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy())
938
  if fig_clicks_time: logging.info("Clicks Over Time plot generated.")
939
  fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
940
  if fig_shares_time: logging.info("Shares Over Time plot generated.")
 
947
  logging.info("--- Testing NEW Plot Generations for Content Strategy ---")
948
  fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D')
949
  if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.")
950
+
951
  fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W')
952
  if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.")
953
 
 
956
 
957
  fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5)
958
  if fig_content_topics: logging.info("Content Topic Breakdown plot generated.")
959
+
 
960
  logging.info("--- Testing NEW Plot Generations with Edge Cases ---")
961
  empty_df = pd.DataFrame()
962
  fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy())
963
  if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.")
964
+
965
  fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col')
966
  if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.")
967
 
968
+ fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels')
969
  if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.")
970
 
971
  df_no_topics_data = sample_merged_posts_df.copy()