Spaces:
Running
Running
Update analytics_plot_generator.py
Browse files- analytics_plot_generator.py +218 -226
analytics_plot_generator.py
CHANGED
@@ -13,10 +13,12 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
13 |
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
|
14 |
"""Creates a placeholder Matplotlib plot indicating no data or an error."""
|
15 |
try:
|
16 |
-
fig, ax = plt.subplots(figsize=(8, 4))
|
17 |
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True)
|
18 |
-
ax.axis('off')
|
19 |
plt.tight_layout()
|
|
|
|
|
20 |
return fig
|
21 |
except Exception as e:
|
22 |
logging.error(f"Error creating placeholder plot: {e}")
|
@@ -24,6 +26,7 @@ def create_placeholder_plot(title="No Data or Plot Error", message="Data might b
|
|
24 |
fig_err, ax_err = plt.subplots()
|
25 |
ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center')
|
26 |
ax_err.axis('off')
|
|
|
27 |
return fig_err
|
28 |
# No plt.close(fig) here as Gradio handles the figure object.
|
29 |
|
@@ -37,56 +40,64 @@ def generate_posts_activity_plot(df, date_column='published_at'):
|
|
37 |
logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
|
38 |
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
|
39 |
|
|
|
40 |
try:
|
41 |
-
df_copy = df.copy()
|
42 |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
43 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
44 |
-
|
45 |
df_copy = df_copy.dropna(subset=[date_column])
|
46 |
if df_copy.empty:
|
47 |
logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.")
|
48 |
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")
|
49 |
|
50 |
-
posts_over_time = df_copy.set_index(date_column).resample('D').size()
|
51 |
-
|
52 |
if posts_over_time.empty:
|
53 |
logging.info("Posts activity: No posts after resampling by day.")
|
54 |
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
|
55 |
|
56 |
fig, ax = plt.subplots(figsize=(10, 5))
|
57 |
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
|
58 |
-
ax.set_title('Posts Activity Over Time')
|
59 |
ax.set_xlabel('Date')
|
60 |
ax.set_ylabel('Number of Posts')
|
61 |
ax.grid(True, linestyle='--', alpha=0.7)
|
62 |
plt.xticks(rotation=45)
|
63 |
-
plt.tight_layout()
|
|
|
64 |
logging.info("Successfully generated posts activity plot.")
|
65 |
return fig
|
66 |
except Exception as e:
|
67 |
logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
|
|
|
68 |
return create_placeholder_plot(title="Posts Activity Error", message=str(e))
|
69 |
finally:
|
70 |
-
|
|
|
|
|
|
|
71 |
|
72 |
-
|
|
|
73 |
"""Generates a bar plot for total engagement types (likes, comments, shares)."""
|
74 |
logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
|
75 |
-
|
76 |
required_cols = [likes_col, comments_col, shares_col]
|
77 |
if df is None or df.empty:
|
78 |
logging.warning("Engagement type: DataFrame is empty.")
|
79 |
return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.")
|
80 |
-
|
81 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
82 |
if missing_cols:
|
83 |
msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}"
|
84 |
logging.warning(msg)
|
85 |
return create_placeholder_plot(title="Post Engagement Types", message=msg)
|
86 |
|
|
|
87 |
try:
|
88 |
-
df_copy = df.copy()
|
89 |
-
for col in required_cols:
|
90 |
df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
|
91 |
|
92 |
total_likes = df_copy[likes_col].sum()
|
@@ -102,26 +113,28 @@ def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='comme
|
|
102 |
'Comments': total_comments,
|
103 |
'Shares': total_shares
|
104 |
}
|
105 |
-
|
106 |
fig, ax = plt.subplots(figsize=(8, 5))
|
107 |
bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon'])
|
108 |
-
ax.set_title('Total Post Engagement Types')
|
109 |
ax.set_xlabel('Engagement Type')
|
110 |
ax.set_ylabel('Total Count')
|
111 |
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
112 |
-
|
113 |
for bar in bars:
|
114 |
yval = bar.get_height()
|
115 |
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom')
|
116 |
-
|
117 |
-
plt.tight_layout()
|
|
|
118 |
logging.info("Successfully generated engagement type plot.")
|
119 |
return fig
|
120 |
except Exception as e:
|
121 |
logging.error(f"Error generating engagement type plot: {e}", exc_info=True)
|
|
|
122 |
return create_placeholder_plot(title="Engagement Type Error", message=str(e))
|
123 |
finally:
|
124 |
-
|
125 |
|
126 |
def generate_mentions_activity_plot(df, date_column='date'):
|
127 |
"""Generates a plot for mentions activity over time."""
|
@@ -132,43 +145,46 @@ def generate_mentions_activity_plot(df, date_column='date'):
|
|
132 |
if date_column not in df.columns:
|
133 |
logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
|
134 |
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
|
135 |
-
|
|
|
136 |
try:
|
137 |
df_copy = df.copy()
|
138 |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
139 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
140 |
-
|
141 |
df_copy = df_copy.dropna(subset=[date_column])
|
142 |
if df_copy.empty:
|
143 |
logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.")
|
144 |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")
|
145 |
|
146 |
mentions_over_time = df_copy.set_index(date_column).resample('D').size()
|
147 |
-
|
148 |
if mentions_over_time.empty:
|
149 |
logging.info("Mentions activity: No mentions after resampling by day.")
|
150 |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")
|
151 |
|
152 |
fig, ax = plt.subplots(figsize=(10, 5))
|
153 |
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple')
|
154 |
-
ax.set_title('Mentions Activity Over Time')
|
155 |
ax.set_xlabel('Date')
|
156 |
ax.set_ylabel('Number of Mentions')
|
157 |
ax.grid(True, linestyle='--', alpha=0.7)
|
158 |
plt.xticks(rotation=45)
|
159 |
-
plt.tight_layout()
|
|
|
160 |
logging.info("Successfully generated mentions activity plot.")
|
161 |
return fig
|
162 |
except Exception as e:
|
163 |
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
|
|
|
164 |
return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
|
165 |
finally:
|
166 |
-
|
167 |
|
168 |
-
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
169 |
"""Generates a pie chart for mention sentiment distribution."""
|
170 |
logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
171 |
-
|
172 |
if df is None or df.empty:
|
173 |
logging.warning("Mention sentiment: DataFrame is empty.")
|
174 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.")
|
@@ -177,6 +193,7 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
|
177 |
logging.warning(msg)
|
178 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg)
|
179 |
|
|
|
180 |
try:
|
181 |
df_copy = df.copy()
|
182 |
sentiment_counts = df_copy[sentiment_column].value_counts()
|
@@ -185,56 +202,51 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
|
185 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
186 |
|
187 |
fig, ax = plt.subplots(figsize=(8, 5))
|
188 |
-
|
189 |
-
colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
|
190 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
191 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
192 |
-
ax.set_title('Mention Sentiment Distribution')
|
193 |
-
ax.axis('equal')
|
194 |
-
plt.tight_layout()
|
|
|
195 |
logging.info("Successfully generated mention sentiment plot.")
|
196 |
return fig
|
197 |
except Exception as e:
|
198 |
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
|
|
|
199 |
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
|
200 |
finally:
|
201 |
-
|
202 |
|
203 |
# --- Updated Follower Plot Functions ---
|
204 |
|
205 |
-
def generate_followers_count_over_time_plot(df, date_info_column='category_name',
|
206 |
-
organic_count_col='follower_count_organic',
|
207 |
paid_count_col='follower_count_paid',
|
208 |
-
type_filter_column='follower_count_type',
|
209 |
type_value='follower_gains_monthly'):
|
210 |
-
"""
|
211 |
-
Generates a plot for specific follower counts (organic and paid) over time.
|
212 |
-
Date information is expected in 'date_info_column' as strings (e.g., "2024-08-01").
|
213 |
-
"""
|
214 |
title = f"Followers Count Over Time ({type_value})"
|
215 |
logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
216 |
|
217 |
if df is None or df.empty:
|
218 |
return create_placeholder_plot(title=title, message="No follower data available.")
|
219 |
-
|
220 |
required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column]
|
221 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
222 |
if missing_cols:
|
223 |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
224 |
|
|
|
225 |
try:
|
226 |
df_copy = df.copy()
|
227 |
-
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
228 |
|
229 |
if df_filtered.empty:
|
230 |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
231 |
|
232 |
-
# Convert date_info_column to datetime
|
233 |
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
|
234 |
-
|
235 |
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
|
236 |
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
|
237 |
-
|
238 |
df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj')
|
239 |
|
240 |
if df_filtered.empty:
|
@@ -243,30 +255,27 @@ def generate_followers_count_over_time_plot(df, date_info_column='category_name'
|
|
243 |
fig, ax = plt.subplots(figsize=(10, 5))
|
244 |
ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers')
|
245 |
ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers')
|
246 |
-
|
247 |
-
ax.set_title(title)
|
248 |
ax.set_xlabel('Date')
|
249 |
ax.set_ylabel('Follower Count')
|
250 |
ax.legend()
|
251 |
ax.grid(True, linestyle='--', alpha=0.7)
|
252 |
plt.xticks(rotation=45)
|
253 |
-
plt.tight_layout()
|
|
|
254 |
return fig
|
255 |
except Exception as e:
|
256 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
257 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
258 |
finally:
|
259 |
-
|
260 |
|
261 |
-
def generate_followers_growth_rate_plot(df, date_info_column='category_name',
|
262 |
-
organic_count_col='follower_count_organic',
|
263 |
paid_count_col='follower_count_paid',
|
264 |
-
type_filter_column='follower_count_type',
|
265 |
type_value='follower_gains_monthly'):
|
266 |
-
"""
|
267 |
-
Calculates and plots follower growth rate (organic and paid) over time.
|
268 |
-
Date information is expected in 'date_info_column' as strings (e.g., "2024-08-01").
|
269 |
-
"""
|
270 |
title = f"Follower Growth Rate ({type_value})"
|
271 |
logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
272 |
|
@@ -277,7 +286,8 @@ def generate_followers_growth_rate_plot(df, date_info_column='category_name',
|
|
277 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
278 |
if missing_cols:
|
279 |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
280 |
-
|
|
|
281 |
try:
|
282 |
df_copy = df.copy()
|
283 |
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
@@ -288,26 +298,20 @@ def generate_followers_growth_rate_plot(df, date_info_column='category_name',
|
|
288 |
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
|
289 |
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce')
|
290 |
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce')
|
291 |
-
|
292 |
df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj')
|
293 |
|
294 |
-
if df_filtered.empty or len(df_filtered) < 2:
|
295 |
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
|
296 |
|
297 |
df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100
|
298 |
df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100
|
299 |
-
|
300 |
-
# Replace inf with NaN then drop NaNs for growth rates
|
301 |
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
|
302 |
-
# df_filtered.dropna(subset=['organic_growth_rate', 'paid_growth_rate'], how='all', inplace=True) # Keep row if at least one rate is valid
|
303 |
|
304 |
fig, ax = plt.subplots(figsize=(10, 5))
|
305 |
-
|
306 |
plotted_organic = False
|
307 |
if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty:
|
308 |
ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate')
|
309 |
plotted_organic = True
|
310 |
-
|
311 |
plotted_paid = False
|
312 |
if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty:
|
313 |
ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate')
|
@@ -316,43 +320,42 @@ def generate_followers_growth_rate_plot(df, date_info_column='category_name',
|
|
316 |
if not plotted_organic and not plotted_paid:
|
317 |
return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.")
|
318 |
|
319 |
-
ax.set_title(title)
|
320 |
ax.set_xlabel('Date')
|
321 |
ax.set_ylabel('Growth Rate (%)')
|
322 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter())
|
323 |
ax.legend()
|
324 |
ax.grid(True, linestyle='--', alpha=0.7)
|
325 |
plt.xticks(rotation=45)
|
326 |
-
plt.tight_layout()
|
|
|
327 |
return fig
|
328 |
except Exception as e:
|
329 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
330 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
331 |
finally:
|
332 |
-
|
333 |
|
334 |
-
def generate_followers_by_demographics_plot(df, category_col='category_name',
|
335 |
-
organic_count_col='follower_count_organic',
|
336 |
paid_count_col='follower_count_paid',
|
337 |
-
type_filter_column='follower_count_type',
|
338 |
type_value=None, plot_title="Followers by Demographics"):
|
339 |
-
"""
|
340 |
-
Generates a grouped bar chart for follower demographics (organic and paid).
|
341 |
-
'category_col' here is the demographic attribute (e.g., Location, Industry).
|
342 |
-
"""
|
343 |
logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
344 |
|
345 |
if df is None or df.empty:
|
346 |
return create_placeholder_plot(title=plot_title, message="No follower data available.")
|
347 |
-
|
348 |
required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column]
|
349 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
350 |
if missing_cols:
|
351 |
return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
352 |
-
|
353 |
-
if type_value is None:
|
354 |
return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
|
355 |
|
|
|
356 |
try:
|
357 |
df_copy = df.copy()
|
358 |
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
@@ -362,17 +365,14 @@ def generate_followers_by_demographics_plot(df, category_col='category_name',
|
|
362 |
|
363 |
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
|
364 |
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
|
365 |
-
|
366 |
demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum()
|
367 |
-
# Sort by total followers (organic + paid) for better visualization
|
368 |
demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col]
|
369 |
demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort'])
|
370 |
|
371 |
-
|
372 |
if demographics_data.empty:
|
373 |
return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
|
374 |
-
|
375 |
-
top_n = 10
|
376 |
if len(demographics_data) > top_n:
|
377 |
demographics_data = demographics_data.head(top_n)
|
378 |
plot_title_updated = f"{plot_title} (Top {top_n})"
|
@@ -380,51 +380,49 @@ def generate_followers_by_demographics_plot(df, category_col='category_name',
|
|
380 |
plot_title_updated = plot_title
|
381 |
|
382 |
fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) )
|
383 |
-
|
384 |
bar_width = 0.35
|
385 |
index = np.arange(len(demographics_data.index))
|
386 |
-
|
387 |
bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color='skyblue')
|
388 |
bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color='lightcoral')
|
389 |
|
390 |
-
ax.set_title(plot_title_updated)
|
391 |
ax.set_xlabel(category_col.replace('_', ' ').title())
|
392 |
ax.set_ylabel('Number of Followers')
|
393 |
ax.set_xticks(index)
|
394 |
ax.set_xticklabels(demographics_data.index, rotation=45, ha="right")
|
395 |
ax.legend()
|
396 |
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
397 |
-
|
398 |
-
# Add labels on top of bars
|
399 |
for bar_group in [bars1, bars2]:
|
400 |
-
for
|
401 |
-
yval =
|
402 |
-
if yval > 0:
|
403 |
-
ax.text(
|
404 |
str(int(yval)), ha='center', va='bottom', fontsize=8)
|
405 |
|
406 |
-
plt.tight_layout()
|
|
|
407 |
return fig
|
408 |
except Exception as e:
|
409 |
logging.error(f"Error generating {plot_title}: {e}", exc_info=True)
|
|
|
410 |
return create_placeholder_plot(title=f"{plot_title} Error", message=str(e))
|
411 |
finally:
|
412 |
-
|
413 |
-
|
414 |
|
415 |
def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'):
|
416 |
-
"""Generates a plot for engagement rate over time."""
|
417 |
title = "Engagement Rate Over Time"
|
418 |
logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
419 |
|
420 |
if df is None or df.empty:
|
421 |
return create_placeholder_plot(title=title, message="No post data for engagement rate.")
|
422 |
-
|
423 |
required_cols = [date_column, engagement_rate_col]
|
424 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
425 |
if missing_cols:
|
426 |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
427 |
|
|
|
428 |
try:
|
429 |
df_copy = df.copy()
|
430 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
@@ -435,38 +433,38 @@ def generate_engagement_rate_over_time_plot(df, date_column='published_at', enga
|
|
435 |
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
436 |
|
437 |
engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
|
438 |
-
engagement_over_time = engagement_over_time.dropna()
|
439 |
|
440 |
if engagement_over_time.empty:
|
441 |
return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
|
442 |
|
443 |
fig, ax = plt.subplots(figsize=(10, 5))
|
444 |
ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange')
|
445 |
-
ax.set_title(title)
|
446 |
ax.set_xlabel('Date')
|
447 |
ax.set_ylabel('Engagement Rate')
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
if max_rate_val > 1.5 and formatter_xmax == 1.0: # If data seems to be percentage but formatted as decimal
|
452 |
formatter_xmax = 100.0
|
453 |
-
elif max_rate_val > 100 and formatter_xmax == 1.0:
|
454 |
-
|
455 |
|
456 |
-
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
457 |
ax.grid(True, linestyle='--', alpha=0.7)
|
458 |
plt.xticks(rotation=45)
|
459 |
-
plt.tight_layout()
|
|
|
460 |
return fig
|
461 |
except Exception as e:
|
462 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
463 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
464 |
finally:
|
465 |
-
|
466 |
|
467 |
-
def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'):
|
468 |
-
|
469 |
-
title = "Reach Over Time (Clicks)"
|
470 |
logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
471 |
|
472 |
if df is None or df.empty:
|
@@ -477,36 +475,36 @@ def generate_reach_over_time_plot(df, date_column='published_at', reach_col='cli
|
|
477 |
if missing_cols:
|
478 |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
479 |
|
|
|
480 |
try:
|
481 |
df_copy = df.copy()
|
482 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
483 |
df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
|
484 |
df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
|
485 |
|
486 |
-
if df_copy.empty:
|
487 |
return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.")
|
488 |
|
489 |
reach_over_time = df_copy.resample('D')[reach_col].sum()
|
490 |
-
# No need to check if reach_over_time is empty if df_copy wasn't, sum of NaNs is 0.
|
491 |
-
# Plot will show 0 if all sums are 0.
|
492 |
|
493 |
fig, ax = plt.subplots(figsize=(10, 5))
|
494 |
ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
|
495 |
-
ax.set_title(title)
|
496 |
ax.set_xlabel('Date')
|
497 |
-
ax.set_ylabel('Total Clicks')
|
498 |
ax.grid(True, linestyle='--', alpha=0.7)
|
499 |
plt.xticks(rotation=45)
|
500 |
-
plt.tight_layout()
|
|
|
501 |
return fig
|
502 |
except Exception as e:
|
503 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
504 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
505 |
finally:
|
506 |
-
|
507 |
|
508 |
def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'):
|
509 |
-
"""Generates a plot for impressions over time."""
|
510 |
title = "Impressions Over Time"
|
511 |
logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
512 |
|
@@ -518,35 +516,36 @@ def generate_impressions_over_time_plot(df, date_column='published_at', impressi
|
|
518 |
if missing_cols:
|
519 |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
520 |
|
|
|
521 |
try:
|
522 |
df_copy = df.copy()
|
523 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
524 |
df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
|
525 |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
526 |
|
527 |
-
if df_copy.empty:
|
528 |
-
|
529 |
|
530 |
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
531 |
|
532 |
fig, ax = plt.subplots(figsize=(10, 5))
|
533 |
ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue')
|
534 |
-
ax.set_title(title)
|
535 |
ax.set_xlabel('Date')
|
536 |
ax.set_ylabel('Total Impressions')
|
537 |
ax.grid(True, linestyle='--', alpha=0.7)
|
538 |
plt.xticks(rotation=45)
|
539 |
-
plt.tight_layout()
|
|
|
540 |
return fig
|
541 |
except Exception as e:
|
542 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
543 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
544 |
finally:
|
545 |
-
|
546 |
|
547 |
-
# --- New Plot Functions from User Request ---
|
548 |
def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
|
549 |
-
"""Generates a plot for likes over time."""
|
550 |
title = "Reactions (Likes) Over Time"
|
551 |
logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
552 |
if df is None or df.empty:
|
@@ -554,6 +553,8 @@ def generate_likes_over_time_plot(df, date_column='published_at', likes_col='lik
|
|
554 |
required_cols = [date_column, likes_col]
|
555 |
if any(col not in df.columns for col in required_cols):
|
556 |
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
|
|
|
|
557 |
try:
|
558 |
df_copy = df.copy()
|
559 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
@@ -561,35 +562,34 @@ def generate_likes_over_time_plot(df, date_column='published_at', likes_col='lik
|
|
561 |
df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
|
562 |
if df_copy.empty:
|
563 |
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
564 |
-
|
565 |
data_over_time = df_copy.resample('D')[likes_col].sum()
|
566 |
fig, ax = plt.subplots(figsize=(10, 5))
|
567 |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson')
|
568 |
-
ax.set_title(title)
|
569 |
ax.set_xlabel('Date')
|
570 |
ax.set_ylabel('Total Likes')
|
571 |
ax.grid(True, linestyle='--', alpha=0.7)
|
572 |
plt.xticks(rotation=45)
|
573 |
-
plt.tight_layout()
|
|
|
574 |
return fig
|
575 |
except Exception as e:
|
576 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
577 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
578 |
finally:
|
579 |
-
|
580 |
|
581 |
def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
|
582 |
-
"""Generates a plot for clicks over time (can be same as reach if clicks are primary reach metric)."""
|
583 |
-
# This is essentially the same as generate_reach_over_time_plot if reach_col is 'clickCount'.
|
584 |
-
# For clarity, keeping it separate if user wants to distinguish or use a different column later.
|
585 |
title = "Clicks Over Time"
|
586 |
logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
587 |
-
#
|
|
|
588 |
return generate_reach_over_time_plot(df, date_column, clicks_col)
|
589 |
|
590 |
|
591 |
def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
|
592 |
-
"""Generates a plot for shares over time."""
|
593 |
title = "Shares Over Time"
|
594 |
logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
595 |
if df is None or df.empty:
|
@@ -597,6 +597,8 @@ def generate_shares_over_time_plot(df, date_column='published_at', shares_col='s
|
|
597 |
required_cols = [date_column, shares_col]
|
598 |
if any(col not in df.columns for col in required_cols):
|
599 |
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
|
|
|
|
600 |
try:
|
601 |
df_copy = df.copy()
|
602 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
@@ -608,21 +610,22 @@ def generate_shares_over_time_plot(df, date_column='published_at', shares_col='s
|
|
608 |
data_over_time = df_copy.resample('D')[shares_col].sum()
|
609 |
fig, ax = plt.subplots(figsize=(10, 5))
|
610 |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal')
|
611 |
-
ax.set_title(title)
|
612 |
ax.set_xlabel('Date')
|
613 |
ax.set_ylabel('Total Shares')
|
614 |
ax.grid(True, linestyle='--', alpha=0.7)
|
615 |
plt.xticks(rotation=45)
|
616 |
-
plt.tight_layout()
|
|
|
617 |
return fig
|
618 |
except Exception as e:
|
619 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
620 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
621 |
finally:
|
622 |
-
|
623 |
|
624 |
def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
|
625 |
-
"""Generates a plot for comments over time."""
|
626 |
title = "Comments Over Time"
|
627 |
logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
628 |
if df is None or df.empty:
|
@@ -630,6 +633,8 @@ def generate_comments_over_time_plot(df, date_column='published_at', comments_co
|
|
630 |
required_cols = [date_column, comments_col]
|
631 |
if any(col not in df.columns for col in required_cols):
|
632 |
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
|
|
|
|
633 |
try:
|
634 |
df_copy = df.copy()
|
635 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
@@ -641,48 +646,42 @@ def generate_comments_over_time_plot(df, date_column='published_at', comments_co
|
|
641 |
data_over_time = df_copy.resample('D')[comments_col].sum()
|
642 |
fig, ax = plt.subplots(figsize=(10, 5))
|
643 |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold')
|
644 |
-
ax.set_title(title)
|
645 |
ax.set_xlabel('Date')
|
646 |
ax.set_ylabel('Total Comments')
|
647 |
ax.grid(True, linestyle='--', alpha=0.7)
|
648 |
plt.xticks(rotation=45)
|
649 |
-
plt.tight_layout()
|
|
|
650 |
return fig
|
651 |
except Exception as e:
|
652 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
653 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
654 |
finally:
|
655 |
-
|
656 |
|
657 |
def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', date_column=None):
|
658 |
-
"""
|
659 |
-
Generates a pie chart for comment sentiment distribution.
|
660 |
-
Assumes df might be post-level with an aggregated or example sentiment,
|
661 |
-
or ideally, a comment-level df with sentiment per comment.
|
662 |
-
If date_column is provided, it's for logging/context but not directly used for filtering here.
|
663 |
-
"""
|
664 |
title = "Breakdown of Comments by Sentiment"
|
665 |
logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
|
666 |
|
667 |
if df is None or df.empty:
|
668 |
return create_placeholder_plot(title=title, message="No data for comment sentiment.")
|
669 |
if sentiment_column not in df.columns:
|
670 |
-
# Check for a common alternative if the primary is missing (e.g. from post-level data)
|
671 |
if 'sentiment' in df.columns and sentiment_column != 'sentiment':
|
672 |
logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.")
|
673 |
-
sentiment_column = 'sentiment'
|
674 |
else:
|
675 |
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}")
|
676 |
-
|
677 |
-
# If the sentiment column has no valid data (all NaNs, or not convertible)
|
678 |
if df[sentiment_column].isnull().all():
|
679 |
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.")
|
680 |
|
|
|
681 |
try:
|
682 |
df_copy = df.copy()
|
683 |
-
# Ensure the sentiment column is treated as categorical (string)
|
684 |
df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
|
685 |
-
sentiment_counts = df_copy[sentiment_column].value_counts().dropna()
|
686 |
|
687 |
if sentiment_counts.empty or sentiment_counts.sum() == 0:
|
688 |
return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
|
@@ -690,21 +689,20 @@ def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sen
|
|
690 |
fig, ax = plt.subplots(figsize=(8, 5))
|
691 |
colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
|
692 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
693 |
-
|
694 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
695 |
-
ax.set_title(title)
|
696 |
-
ax.axis('equal')
|
697 |
-
plt.tight_layout()
|
|
|
698 |
return fig
|
699 |
except Exception as e:
|
700 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
701 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
702 |
finally:
|
703 |
-
|
704 |
|
705 |
-
# --- NEW PLOT FUNCTIONS FOR CONTENT STRATEGY ---
|
706 |
def generate_post_frequency_plot(df, date_column='published_at', resample_period='D'):
|
707 |
-
"""Generates a plot for post frequency over time (e.g., daily, weekly, monthly)."""
|
708 |
title = f"Post Frequency Over Time ({resample_period})"
|
709 |
logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
710 |
|
@@ -713,38 +711,40 @@ def generate_post_frequency_plot(df, date_column='published_at', resample_period
|
|
713 |
if date_column not in df.columns:
|
714 |
return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.")
|
715 |
|
|
|
716 |
try:
|
717 |
df_copy = df.copy()
|
718 |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
719 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
720 |
-
|
721 |
df_copy = df_copy.dropna(subset=[date_column])
|
722 |
if df_copy.empty:
|
723 |
return create_placeholder_plot(title=title, message="No valid date entries found.")
|
724 |
|
725 |
post_frequency = df_copy.set_index(date_column).resample(resample_period).size()
|
726 |
-
|
727 |
if post_frequency.empty:
|
728 |
return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.")
|
729 |
|
730 |
fig, ax = plt.subplots(figsize=(10, 5))
|
731 |
post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None)
|
732 |
-
ax.set_title(title)
|
733 |
ax.set_xlabel('Date' if resample_period == 'D' else 'Period')
|
734 |
ax.set_ylabel('Number of Posts')
|
735 |
ax.grid(True, linestyle='--', alpha=0.7)
|
736 |
plt.xticks(rotation=45)
|
737 |
-
plt.tight_layout()
|
|
|
738 |
logging.info(f"Successfully generated {title} plot.")
|
739 |
return fig
|
740 |
except Exception as e:
|
741 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
742 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
743 |
finally:
|
744 |
-
|
745 |
|
746 |
def generate_content_format_breakdown_plot(df, format_col='media_type'):
|
747 |
-
"""Generates a bar chart for breakdown of content by format."""
|
748 |
title = "Breakdown of Content by Format"
|
749 |
logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}")
|
750 |
|
@@ -753,6 +753,7 @@ def generate_content_format_breakdown_plot(df, format_col='media_type'):
|
|
753 |
if format_col not in df.columns:
|
754 |
return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}")
|
755 |
|
|
|
756 |
try:
|
757 |
df_copy = df.copy()
|
758 |
format_counts = df_copy[format_col].value_counts().dropna()
|
@@ -762,46 +763,42 @@ def generate_content_format_breakdown_plot(df, format_col='media_type'):
|
|
762 |
|
763 |
fig, ax = plt.subplots(figsize=(8, 6))
|
764 |
format_counts.plot(kind='bar', ax=ax, color='skyblue')
|
765 |
-
ax.set_title(title)
|
766 |
ax.set_xlabel('Media Type')
|
767 |
ax.set_ylabel('Number of Posts')
|
768 |
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
769 |
plt.xticks(rotation=45, ha="right")
|
770 |
-
|
771 |
-
|
772 |
-
# Add counts on top of bars
|
773 |
for i, v in enumerate(format_counts):
|
774 |
ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom')
|
775 |
-
|
|
|
|
|
776 |
logging.info(f"Successfully generated {title} plot.")
|
777 |
return fig
|
778 |
except Exception as e:
|
779 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
780 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
781 |
finally:
|
782 |
-
|
783 |
|
784 |
def _parse_eb_label(label_data):
|
785 |
-
"""Helper to parse eb_labels which might be lists or string representations of lists."""
|
786 |
if isinstance(label_data, list):
|
787 |
return label_data
|
788 |
if isinstance(label_data, str):
|
789 |
try:
|
790 |
-
# Try to evaluate as a list
|
791 |
parsed = ast.literal_eval(label_data)
|
792 |
if isinstance(parsed, list):
|
793 |
return parsed
|
794 |
-
|
795 |
-
return [str(parsed)]
|
796 |
except (ValueError, SyntaxError):
|
797 |
-
# If not a list string, treat the whole string as one label
|
798 |
return [label_data] if label_data.strip() else []
|
799 |
if pd.isna(label_data):
|
800 |
return []
|
801 |
-
return []
|
802 |
|
803 |
def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
|
804 |
-
"""Generates a horizontal bar chart for breakdown of content by topics."""
|
805 |
title = f"Breakdown of Content by Topics (Top {top_n})"
|
806 |
logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}")
|
807 |
|
@@ -810,10 +807,9 @@ def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
|
|
810 |
if topics_col not in df.columns:
|
811 |
return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}")
|
812 |
|
|
|
813 |
try:
|
814 |
df_copy = df.copy()
|
815 |
-
|
816 |
-
# Apply parsing and explode
|
817 |
parsed_labels = df_copy[topics_col].apply(_parse_eb_label)
|
818 |
exploded_labels = parsed_labels.explode().dropna()
|
819 |
|
@@ -824,93 +820,91 @@ def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
|
|
824 |
|
825 |
if topic_counts.empty:
|
826 |
return create_placeholder_plot(title=title, message="No topics to display after counting.")
|
827 |
-
|
828 |
-
# Take top N and sort for plotting (descending for horizontal bar)
|
829 |
top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True)
|
830 |
|
831 |
fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6))
|
832 |
top_topics.plot(kind='barh', ax=ax, color='mediumseagreen')
|
833 |
-
ax.set_title(title)
|
834 |
ax.set_xlabel('Number of Posts')
|
835 |
ax.set_ylabel('Topic')
|
836 |
-
|
837 |
-
# Add counts next to bars
|
838 |
for i, (topic, count) in enumerate(top_topics.items()):
|
839 |
ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center')
|
840 |
-
|
841 |
-
plt.tight_layout()
|
|
|
842 |
logging.info(f"Successfully generated {title} plot.")
|
843 |
return fig
|
844 |
except Exception as e:
|
845 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
|
|
846 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
847 |
finally:
|
848 |
-
|
849 |
|
850 |
|
851 |
if __name__ == '__main__':
|
852 |
# Create dummy data for testing
|
853 |
posts_data = {
|
854 |
-
'id': [f'post{i}' for i in range(1, 8)],
|
855 |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
856 |
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
857 |
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
858 |
-
'shareCount': [1, 0, 1, 1, 2, 0, 1],
|
859 |
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
860 |
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
861 |
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08],
|
862 |
-
'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'],
|
863 |
-
'eb_labels': [
|
864 |
-
"['AI', 'Tech']",
|
865 |
-
['Innovation'],
|
866 |
-
'General',
|
867 |
-
None,
|
868 |
['Tech', 'Future'],
|
869 |
"['AI', 'Development']",
|
870 |
['Tech']
|
871 |
],
|
872 |
-
'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive']
|
873 |
}
|
874 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
875 |
|
876 |
-
# Updated Follower Stats Data
|
877 |
follower_data = {
|
878 |
'follower_count_type': [
|
879 |
-
'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
|
880 |
'follower_geo', 'follower_geo', 'follower_geo',
|
881 |
'follower_function', 'follower_function',
|
882 |
'follower_industry', 'follower_industry',
|
883 |
'follower_seniority', 'follower_seniority'
|
884 |
],
|
885 |
'category_name': [
|
886 |
-
'2024-01-01', '2024-02-01', '2024-03-01',
|
887 |
-
'USA', 'Canada', 'UK',
|
888 |
-
'Engineering', 'Sales',
|
889 |
-
'Tech', 'Finance',
|
890 |
-
'Senior', 'Junior'
|
891 |
],
|
892 |
'follower_count_organic': [
|
893 |
-
100, 110, 125,
|
894 |
-
500, 300, 150,
|
895 |
-
400, 200,
|
896 |
-
250, 180,
|
897 |
-
300, 220
|
898 |
],
|
899 |
'follower_count_paid': [
|
900 |
-
20, 30, 25,
|
901 |
-
50, 40, 60,
|
902 |
-
30, 20,
|
903 |
-
45, 35,
|
904 |
-
60, 40
|
905 |
]
|
906 |
}
|
907 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
908 |
|
909 |
logging.info("--- Testing Existing Plot Generations ---")
|
910 |
-
# ... (keep existing tests for older plots) ...
|
911 |
fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
|
912 |
if fig_posts_activity: logging.info("Posts activity plot generated.")
|
913 |
-
|
914 |
fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
|
915 |
if fig_engagement_type: logging.info("Engagement type plot generated.")
|
916 |
|
@@ -930,7 +924,6 @@ if __name__ == '__main__':
|
|
930 |
if fig_followers_rate: logging.info("Followers Growth Rate plot generated.")
|
931 |
fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
|
932 |
if fig_geo: logging.info("Followers by Location plot generated.")
|
933 |
-
# ... add other follower demographic tests ...
|
934 |
|
935 |
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
|
936 |
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
|
@@ -938,10 +931,10 @@ if __name__ == '__main__':
|
|
938 |
if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
|
939 |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
940 |
if fig_impressions: logging.info("Impressions Over Time plot generated.")
|
941 |
-
|
942 |
fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
|
943 |
if fig_likes_time: logging.info("Likes Over Time plot generated.")
|
944 |
-
fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy())
|
945 |
if fig_clicks_time: logging.info("Clicks Over Time plot generated.")
|
946 |
fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
|
947 |
if fig_shares_time: logging.info("Shares Over Time plot generated.")
|
@@ -954,7 +947,7 @@ if __name__ == '__main__':
|
|
954 |
logging.info("--- Testing NEW Plot Generations for Content Strategy ---")
|
955 |
fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D')
|
956 |
if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.")
|
957 |
-
|
958 |
fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W')
|
959 |
if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.")
|
960 |
|
@@ -963,17 +956,16 @@ if __name__ == '__main__':
|
|
963 |
|
964 |
fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5)
|
965 |
if fig_content_topics: logging.info("Content Topic Breakdown plot generated.")
|
966 |
-
|
967 |
-
# Test with missing columns / empty data for new plots
|
968 |
logging.info("--- Testing NEW Plot Generations with Edge Cases ---")
|
969 |
empty_df = pd.DataFrame()
|
970 |
fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy())
|
971 |
if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.")
|
972 |
-
|
973 |
fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col')
|
974 |
if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.")
|
975 |
|
976 |
-
fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels')
|
977 |
if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.")
|
978 |
|
979 |
df_no_topics_data = sample_merged_posts_df.copy()
|
|
|
13 |
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
|
14 |
"""Creates a placeholder Matplotlib plot indicating no data or an error."""
|
15 |
try:
|
16 |
+
fig, ax = plt.subplots(figsize=(8, 4))
|
17 |
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True)
|
18 |
+
ax.axis('off')
|
19 |
plt.tight_layout()
|
20 |
+
# Add spacing for consistency, though it might be less critical for placeholders
|
21 |
+
fig.subplots_adjust(top=0.90)
|
22 |
return fig
|
23 |
except Exception as e:
|
24 |
logging.error(f"Error creating placeholder plot: {e}")
|
|
|
26 |
fig_err, ax_err = plt.subplots()
|
27 |
ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center')
|
28 |
ax_err.axis('off')
|
29 |
+
fig_err.subplots_adjust(top=0.90)
|
30 |
return fig_err
|
31 |
# No plt.close(fig) here as Gradio handles the figure object.
|
32 |
|
|
|
40 |
logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
|
41 |
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
|
42 |
|
43 |
+
fig = None # Initialize fig to None
|
44 |
try:
|
45 |
+
df_copy = df.copy()
|
46 |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
47 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
48 |
+
|
49 |
df_copy = df_copy.dropna(subset=[date_column])
|
50 |
if df_copy.empty:
|
51 |
logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.")
|
52 |
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")
|
53 |
|
54 |
+
posts_over_time = df_copy.set_index(date_column).resample('D').size()
|
55 |
+
|
56 |
if posts_over_time.empty:
|
57 |
logging.info("Posts activity: No posts after resampling by day.")
|
58 |
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
|
59 |
|
60 |
fig, ax = plt.subplots(figsize=(10, 5))
|
61 |
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
|
62 |
+
ax.set_title('Posts Activity Over Time', y=1.03) # Matplotlib title
|
63 |
ax.set_xlabel('Date')
|
64 |
ax.set_ylabel('Number of Posts')
|
65 |
ax.grid(True, linestyle='--', alpha=0.7)
|
66 |
plt.xticks(rotation=45)
|
67 |
+
plt.tight_layout(fig=fig)
|
68 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
69 |
logging.info("Successfully generated posts activity plot.")
|
70 |
return fig
|
71 |
except Exception as e:
|
72 |
logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
|
73 |
+
if fig: plt.close(fig) # Close if fig was created before error
|
74 |
return create_placeholder_plot(title="Posts Activity Error", message=str(e))
|
75 |
finally:
|
76 |
+
# If fig was not returned (e.g. placeholder was returned), and it exists, close it.
|
77 |
+
# However, if fig is returned, Gradio handles it.
|
78 |
+
# The plt.close('all') was too broad. We only close specific figures if not returned.
|
79 |
+
pass # Let Gradio handle the returned figure. If a placeholder is returned, its figure is handled there.
|
80 |
|
81 |
+
|
82 |
+
def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='commentCount', shares_col='shareCount'):
|
83 |
"""Generates a bar plot for total engagement types (likes, comments, shares)."""
|
84 |
logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
|
85 |
+
|
86 |
required_cols = [likes_col, comments_col, shares_col]
|
87 |
if df is None or df.empty:
|
88 |
logging.warning("Engagement type: DataFrame is empty.")
|
89 |
return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.")
|
90 |
+
|
91 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
92 |
if missing_cols:
|
93 |
msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}"
|
94 |
logging.warning(msg)
|
95 |
return create_placeholder_plot(title="Post Engagement Types", message=msg)
|
96 |
|
97 |
+
fig = None
|
98 |
try:
|
99 |
+
df_copy = df.copy()
|
100 |
+
for col in required_cols:
|
101 |
df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
|
102 |
|
103 |
total_likes = df_copy[likes_col].sum()
|
|
|
113 |
'Comments': total_comments,
|
114 |
'Shares': total_shares
|
115 |
}
|
116 |
+
|
117 |
fig, ax = plt.subplots(figsize=(8, 5))
|
118 |
bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon'])
|
119 |
+
ax.set_title('Total Post Engagement Types', y=1.03) # Matplotlib title
|
120 |
ax.set_xlabel('Engagement Type')
|
121 |
ax.set_ylabel('Total Count')
|
122 |
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
123 |
+
|
124 |
for bar in bars:
|
125 |
yval = bar.get_height()
|
126 |
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom')
|
127 |
+
|
128 |
+
plt.tight_layout(fig=fig)
|
129 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
130 |
logging.info("Successfully generated engagement type plot.")
|
131 |
return fig
|
132 |
except Exception as e:
|
133 |
logging.error(f"Error generating engagement type plot: {e}", exc_info=True)
|
134 |
+
if fig: plt.close(fig)
|
135 |
return create_placeholder_plot(title="Engagement Type Error", message=str(e))
|
136 |
finally:
|
137 |
+
pass
|
138 |
|
139 |
def generate_mentions_activity_plot(df, date_column='date'):
|
140 |
"""Generates a plot for mentions activity over time."""
|
|
|
145 |
if date_column not in df.columns:
|
146 |
logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
|
147 |
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
|
148 |
+
|
149 |
+
fig = None
|
150 |
try:
|
151 |
df_copy = df.copy()
|
152 |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
153 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
154 |
+
|
155 |
df_copy = df_copy.dropna(subset=[date_column])
|
156 |
if df_copy.empty:
|
157 |
logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.")
|
158 |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")
|
159 |
|
160 |
mentions_over_time = df_copy.set_index(date_column).resample('D').size()
|
161 |
+
|
162 |
if mentions_over_time.empty:
|
163 |
logging.info("Mentions activity: No mentions after resampling by day.")
|
164 |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")
|
165 |
|
166 |
fig, ax = plt.subplots(figsize=(10, 5))
|
167 |
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple')
|
168 |
+
ax.set_title('Mentions Activity Over Time', y=1.03) # Matplotlib title
|
169 |
ax.set_xlabel('Date')
|
170 |
ax.set_ylabel('Number of Mentions')
|
171 |
ax.grid(True, linestyle='--', alpha=0.7)
|
172 |
plt.xticks(rotation=45)
|
173 |
+
plt.tight_layout(fig=fig)
|
174 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
175 |
logging.info("Successfully generated mentions activity plot.")
|
176 |
return fig
|
177 |
except Exception as e:
|
178 |
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
|
179 |
+
if fig: plt.close(fig)
|
180 |
return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
|
181 |
finally:
|
182 |
+
pass
|
183 |
|
184 |
+
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
185 |
"""Generates a pie chart for mention sentiment distribution."""
|
186 |
logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
187 |
+
|
188 |
if df is None or df.empty:
|
189 |
logging.warning("Mention sentiment: DataFrame is empty.")
|
190 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.")
|
|
|
193 |
logging.warning(msg)
|
194 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg)
|
195 |
|
196 |
+
fig = None
|
197 |
try:
|
198 |
df_copy = df.copy()
|
199 |
sentiment_counts = df_copy[sentiment_column].value_counts()
|
|
|
202 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
203 |
|
204 |
fig, ax = plt.subplots(figsize=(8, 5))
|
205 |
+
colors_map = plt.cm.get_cmap('Pastel1', len(sentiment_counts))
|
|
|
206 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
207 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
208 |
+
ax.set_title('Mention Sentiment Distribution', y=1.03) # Matplotlib title
|
209 |
+
ax.axis('equal')
|
210 |
+
plt.tight_layout(fig=fig)
|
211 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
212 |
logging.info("Successfully generated mention sentiment plot.")
|
213 |
return fig
|
214 |
except Exception as e:
|
215 |
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
|
216 |
+
if fig: plt.close(fig)
|
217 |
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
|
218 |
finally:
|
219 |
+
pass
|
220 |
|
221 |
# --- Updated Follower Plot Functions ---
|
222 |
|
223 |
+
def generate_followers_count_over_time_plot(df, date_info_column='category_name',
|
224 |
+
organic_count_col='follower_count_organic',
|
225 |
paid_count_col='follower_count_paid',
|
226 |
+
type_filter_column='follower_count_type',
|
227 |
type_value='follower_gains_monthly'):
|
|
|
|
|
|
|
|
|
228 |
title = f"Followers Count Over Time ({type_value})"
|
229 |
logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
230 |
|
231 |
if df is None or df.empty:
|
232 |
return create_placeholder_plot(title=title, message="No follower data available.")
|
233 |
+
|
234 |
required_cols = [date_info_column, organic_count_col, paid_count_col, type_filter_column]
|
235 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
236 |
if missing_cols:
|
237 |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
238 |
|
239 |
+
fig = None
|
240 |
try:
|
241 |
df_copy = df.copy()
|
242 |
+
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
243 |
|
244 |
if df_filtered.empty:
|
245 |
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
246 |
|
|
|
247 |
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
|
|
|
248 |
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
|
249 |
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
|
|
|
250 |
df_filtered = df_filtered.dropna(subset=['datetime_obj', organic_count_col, paid_count_col]).sort_values(by='datetime_obj')
|
251 |
|
252 |
if df_filtered.empty:
|
|
|
255 |
fig, ax = plt.subplots(figsize=(10, 5))
|
256 |
ax.plot(df_filtered['datetime_obj'], df_filtered[organic_count_col], marker='o', linestyle='-', color='dodgerblue', label='Organic Followers')
|
257 |
ax.plot(df_filtered['datetime_obj'], df_filtered[paid_count_col], marker='x', linestyle='--', color='seagreen', label='Paid Followers')
|
258 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
|
|
259 |
ax.set_xlabel('Date')
|
260 |
ax.set_ylabel('Follower Count')
|
261 |
ax.legend()
|
262 |
ax.grid(True, linestyle='--', alpha=0.7)
|
263 |
plt.xticks(rotation=45)
|
264 |
+
plt.tight_layout(fig=fig)
|
265 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
266 |
return fig
|
267 |
except Exception as e:
|
268 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
269 |
+
if fig: plt.close(fig)
|
270 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
271 |
finally:
|
272 |
+
pass
|
273 |
|
274 |
+
def generate_followers_growth_rate_plot(df, date_info_column='category_name',
|
275 |
+
organic_count_col='follower_count_organic',
|
276 |
paid_count_col='follower_count_paid',
|
277 |
+
type_filter_column='follower_count_type',
|
278 |
type_value='follower_gains_monthly'):
|
|
|
|
|
|
|
|
|
279 |
title = f"Follower Growth Rate ({type_value})"
|
280 |
logging.info(f"Generating {title}. Date Info: '{date_info_column}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
281 |
|
|
|
286 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
287 |
if missing_cols:
|
288 |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
289 |
+
|
290 |
+
fig = None
|
291 |
try:
|
292 |
df_copy = df.copy()
|
293 |
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
|
|
298 |
df_filtered['datetime_obj'] = pd.to_datetime(df_filtered[date_info_column], errors='coerce')
|
299 |
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce')
|
300 |
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce')
|
|
|
301 |
df_filtered = df_filtered.dropna(subset=['datetime_obj']).sort_values(by='datetime_obj').set_index('datetime_obj')
|
302 |
|
303 |
+
if df_filtered.empty or len(df_filtered) < 2:
|
304 |
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
|
305 |
|
306 |
df_filtered['organic_growth_rate'] = df_filtered[organic_count_col].pct_change() * 100
|
307 |
df_filtered['paid_growth_rate'] = df_filtered[paid_count_col].pct_change() * 100
|
|
|
|
|
308 |
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
|
|
|
309 |
|
310 |
fig, ax = plt.subplots(figsize=(10, 5))
|
|
|
311 |
plotted_organic = False
|
312 |
if 'organic_growth_rate' in df_filtered.columns and not df_filtered['organic_growth_rate'].dropna().empty:
|
313 |
ax.plot(df_filtered.index, df_filtered['organic_growth_rate'], marker='o', linestyle='-', color='lightcoral', label='Organic Growth Rate')
|
314 |
plotted_organic = True
|
|
|
315 |
plotted_paid = False
|
316 |
if 'paid_growth_rate' in df_filtered.columns and not df_filtered['paid_growth_rate'].dropna().empty:
|
317 |
ax.plot(df_filtered.index, df_filtered['paid_growth_rate'], marker='x', linestyle='--', color='mediumpurple', label='Paid Growth Rate')
|
|
|
320 |
if not plotted_organic and not plotted_paid:
|
321 |
return create_placeholder_plot(title=title, message="No valid growth rate data to display after calculation.")
|
322 |
|
323 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
324 |
ax.set_xlabel('Date')
|
325 |
ax.set_ylabel('Growth Rate (%)')
|
326 |
ax.yaxis.set_major_formatter(mticker.PercentFormatter())
|
327 |
ax.legend()
|
328 |
ax.grid(True, linestyle='--', alpha=0.7)
|
329 |
plt.xticks(rotation=45)
|
330 |
+
plt.tight_layout(fig=fig)
|
331 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
332 |
return fig
|
333 |
except Exception as e:
|
334 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
335 |
+
if fig: plt.close(fig)
|
336 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
337 |
finally:
|
338 |
+
pass
|
339 |
|
340 |
+
def generate_followers_by_demographics_plot(df, category_col='category_name',
|
341 |
+
organic_count_col='follower_count_organic',
|
342 |
paid_count_col='follower_count_paid',
|
343 |
+
type_filter_column='follower_count_type',
|
344 |
type_value=None, plot_title="Followers by Demographics"):
|
|
|
|
|
|
|
|
|
345 |
logging.info(f"Generating {plot_title}. Category: '{category_col}', Organic: '{organic_count_col}', Paid: '{paid_count_col}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
346 |
|
347 |
if df is None or df.empty:
|
348 |
return create_placeholder_plot(title=plot_title, message="No follower data available.")
|
349 |
+
|
350 |
required_cols = [category_col, organic_count_col, paid_count_col, type_filter_column]
|
351 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
352 |
if missing_cols:
|
353 |
return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
354 |
+
|
355 |
+
if type_value is None:
|
356 |
return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
|
357 |
|
358 |
+
fig = None
|
359 |
try:
|
360 |
df_copy = df.copy()
|
361 |
df_filtered = df_copy[df_copy[type_filter_column] == type_value].copy()
|
|
|
365 |
|
366 |
df_filtered[organic_count_col] = pd.to_numeric(df_filtered[organic_count_col], errors='coerce').fillna(0)
|
367 |
df_filtered[paid_count_col] = pd.to_numeric(df_filtered[paid_count_col], errors='coerce').fillna(0)
|
|
|
368 |
demographics_data = df_filtered.groupby(category_col)[[organic_count_col, paid_count_col]].sum()
|
|
|
369 |
demographics_data['total_for_sort'] = demographics_data[organic_count_col] + demographics_data[paid_count_col]
|
370 |
demographics_data = demographics_data.sort_values(by='total_for_sort', ascending=False).drop(columns=['total_for_sort'])
|
371 |
|
|
|
372 |
if demographics_data.empty:
|
373 |
return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
|
374 |
+
|
375 |
+
top_n = 10
|
376 |
if len(demographics_data) > top_n:
|
377 |
demographics_data = demographics_data.head(top_n)
|
378 |
plot_title_updated = f"{plot_title} (Top {top_n})"
|
|
|
380 |
plot_title_updated = plot_title
|
381 |
|
382 |
fig, ax = plt.subplots(figsize=(12, 7) if len(demographics_data) > 5 else (10,6) )
|
|
|
383 |
bar_width = 0.35
|
384 |
index = np.arange(len(demographics_data.index))
|
|
|
385 |
bars1 = ax.bar(index - bar_width/2, demographics_data[organic_count_col], bar_width, label='Organic', color='skyblue')
|
386 |
bars2 = ax.bar(index + bar_width/2, demographics_data[paid_count_col], bar_width, label='Paid', color='lightcoral')
|
387 |
|
388 |
+
ax.set_title(plot_title_updated, y=1.03) # Matplotlib title
|
389 |
ax.set_xlabel(category_col.replace('_', ' ').title())
|
390 |
ax.set_ylabel('Number of Followers')
|
391 |
ax.set_xticks(index)
|
392 |
ax.set_xticklabels(demographics_data.index, rotation=45, ha="right")
|
393 |
ax.legend()
|
394 |
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
395 |
+
|
|
|
396 |
for bar_group in [bars1, bars2]:
|
397 |
+
for bar_item in bar_group: # Renamed 'bar' to 'bar_item' to avoid conflict
|
398 |
+
yval = bar_item.get_height()
|
399 |
+
if yval > 0:
|
400 |
+
ax.text(bar_item.get_x() + bar_item.get_width()/2.0, yval + (0.01 * ax.get_ylim()[1]),
|
401 |
str(int(yval)), ha='center', va='bottom', fontsize=8)
|
402 |
|
403 |
+
plt.tight_layout(fig=fig)
|
404 |
+
fig.subplots_adjust(top=0.85) # Adjust top for more space, especially with rotated labels
|
405 |
return fig
|
406 |
except Exception as e:
|
407 |
logging.error(f"Error generating {plot_title}: {e}", exc_info=True)
|
408 |
+
if fig: plt.close(fig)
|
409 |
return create_placeholder_plot(title=f"{plot_title} Error", message=str(e))
|
410 |
finally:
|
411 |
+
pass
|
|
|
412 |
|
413 |
def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'):
|
|
|
414 |
title = "Engagement Rate Over Time"
|
415 |
logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
416 |
|
417 |
if df is None or df.empty:
|
418 |
return create_placeholder_plot(title=title, message="No post data for engagement rate.")
|
419 |
+
|
420 |
required_cols = [date_column, engagement_rate_col]
|
421 |
missing_cols = [col for col in required_cols if col not in df.columns]
|
422 |
if missing_cols:
|
423 |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
424 |
|
425 |
+
fig = None
|
426 |
try:
|
427 |
df_copy = df.copy()
|
428 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
|
|
433 |
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
434 |
|
435 |
engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
|
436 |
+
engagement_over_time = engagement_over_time.dropna()
|
437 |
|
438 |
if engagement_over_time.empty:
|
439 |
return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
|
440 |
|
441 |
fig, ax = plt.subplots(figsize=(10, 5))
|
442 |
ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange')
|
443 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
444 |
ax.set_xlabel('Date')
|
445 |
ax.set_ylabel('Engagement Rate')
|
446 |
+
max_rate_val = engagement_over_time.max() if not engagement_over_time.empty else 0
|
447 |
+
formatter_xmax = 1.0 if 0 <= max_rate_val <= 1.5 else 100.0
|
448 |
+
if max_rate_val > 1.5 and formatter_xmax == 1.0:
|
|
|
449 |
formatter_xmax = 100.0
|
450 |
+
elif max_rate_val > 100 and formatter_xmax == 1.0:
|
451 |
+
formatter_xmax = max_rate_val
|
452 |
|
453 |
+
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=formatter_xmax))
|
454 |
ax.grid(True, linestyle='--', alpha=0.7)
|
455 |
plt.xticks(rotation=45)
|
456 |
+
plt.tight_layout(fig=fig)
|
457 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
458 |
return fig
|
459 |
except Exception as e:
|
460 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
461 |
+
if fig: plt.close(fig)
|
462 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
463 |
finally:
|
464 |
+
pass
|
465 |
|
466 |
+
def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'):
|
467 |
+
title = "Reach Over Time (Clicks)" # Title matches the config in app.py
|
|
|
468 |
logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
469 |
|
470 |
if df is None or df.empty:
|
|
|
475 |
if missing_cols:
|
476 |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
477 |
|
478 |
+
fig = None
|
479 |
try:
|
480 |
df_copy = df.copy()
|
481 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
482 |
df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
|
483 |
df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
|
484 |
|
485 |
+
if df_copy.empty:
|
486 |
return create_placeholder_plot(title=title, message="No valid data after cleaning for reach plot.")
|
487 |
|
488 |
reach_over_time = df_copy.resample('D')[reach_col].sum()
|
|
|
|
|
489 |
|
490 |
fig, ax = plt.subplots(figsize=(10, 5))
|
491 |
ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
|
492 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
493 |
ax.set_xlabel('Date')
|
494 |
+
ax.set_ylabel('Total Clicks') # Label consistent with reach_col='clickCount'
|
495 |
ax.grid(True, linestyle='--', alpha=0.7)
|
496 |
plt.xticks(rotation=45)
|
497 |
+
plt.tight_layout(fig=fig)
|
498 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
499 |
return fig
|
500 |
except Exception as e:
|
501 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
502 |
+
if fig: plt.close(fig)
|
503 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
504 |
finally:
|
505 |
+
pass
|
506 |
|
507 |
def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'):
|
|
|
508 |
title = "Impressions Over Time"
|
509 |
logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
510 |
|
|
|
516 |
if missing_cols:
|
517 |
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
518 |
|
519 |
+
fig = None
|
520 |
try:
|
521 |
df_copy = df.copy()
|
522 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
523 |
df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
|
524 |
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
525 |
|
526 |
+
if df_copy.empty:
|
527 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning for impressions plot.")
|
528 |
|
529 |
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
530 |
|
531 |
fig, ax = plt.subplots(figsize=(10, 5))
|
532 |
ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue')
|
533 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
534 |
ax.set_xlabel('Date')
|
535 |
ax.set_ylabel('Total Impressions')
|
536 |
ax.grid(True, linestyle='--', alpha=0.7)
|
537 |
plt.xticks(rotation=45)
|
538 |
+
plt.tight_layout(fig=fig)
|
539 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
540 |
return fig
|
541 |
except Exception as e:
|
542 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
543 |
+
if fig: plt.close(fig)
|
544 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
545 |
finally:
|
546 |
+
pass
|
547 |
|
|
|
548 |
def generate_likes_over_time_plot(df, date_column='published_at', likes_col='likeCount'):
|
|
|
549 |
title = "Reactions (Likes) Over Time"
|
550 |
logging.info(f"Generating {title}. Date: '{date_column}', Likes Col: '{likes_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
551 |
if df is None or df.empty:
|
|
|
553 |
required_cols = [date_column, likes_col]
|
554 |
if any(col not in df.columns for col in required_cols):
|
555 |
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
556 |
+
|
557 |
+
fig = None
|
558 |
try:
|
559 |
df_copy = df.copy()
|
560 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
|
|
562 |
df_copy = df_copy.dropna(subset=[date_column, likes_col]).set_index(date_column)
|
563 |
if df_copy.empty:
|
564 |
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
565 |
+
|
566 |
data_over_time = df_copy.resample('D')[likes_col].sum()
|
567 |
fig, ax = plt.subplots(figsize=(10, 5))
|
568 |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='crimson')
|
569 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
570 |
ax.set_xlabel('Date')
|
571 |
ax.set_ylabel('Total Likes')
|
572 |
ax.grid(True, linestyle='--', alpha=0.7)
|
573 |
plt.xticks(rotation=45)
|
574 |
+
plt.tight_layout(fig=fig)
|
575 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
576 |
return fig
|
577 |
except Exception as e:
|
578 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
579 |
+
if fig: plt.close(fig)
|
580 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
581 |
finally:
|
582 |
+
pass
|
583 |
|
584 |
def generate_clicks_over_time_plot(df, date_column='published_at', clicks_col='clickCount'):
|
|
|
|
|
|
|
585 |
title = "Clicks Over Time"
|
586 |
logging.info(f"Generating {title}. Date: '{date_column}', Clicks Col: '{clicks_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
587 |
+
# This function essentially calls generate_reach_over_time_plot with specific params
|
588 |
+
# The fig.subplots_adjust will be handled within that function.
|
589 |
return generate_reach_over_time_plot(df, date_column, clicks_col)
|
590 |
|
591 |
|
592 |
def generate_shares_over_time_plot(df, date_column='published_at', shares_col='shareCount'):
|
|
|
593 |
title = "Shares Over Time"
|
594 |
logging.info(f"Generating {title}. Date: '{date_column}', Shares Col: '{shares_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
595 |
if df is None or df.empty:
|
|
|
597 |
required_cols = [date_column, shares_col]
|
598 |
if any(col not in df.columns for col in required_cols):
|
599 |
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
600 |
+
|
601 |
+
fig = None
|
602 |
try:
|
603 |
df_copy = df.copy()
|
604 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
|
|
610 |
data_over_time = df_copy.resample('D')[shares_col].sum()
|
611 |
fig, ax = plt.subplots(figsize=(10, 5))
|
612 |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='teal')
|
613 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
614 |
ax.set_xlabel('Date')
|
615 |
ax.set_ylabel('Total Shares')
|
616 |
ax.grid(True, linestyle='--', alpha=0.7)
|
617 |
plt.xticks(rotation=45)
|
618 |
+
plt.tight_layout(fig=fig)
|
619 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
620 |
return fig
|
621 |
except Exception as e:
|
622 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
623 |
+
if fig: plt.close(fig)
|
624 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
625 |
finally:
|
626 |
+
pass
|
627 |
|
628 |
def generate_comments_over_time_plot(df, date_column='published_at', comments_col='commentCount'):
|
|
|
629 |
title = "Comments Over Time"
|
630 |
logging.info(f"Generating {title}. Date: '{date_column}', Comments Col: '{comments_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
631 |
if df is None or df.empty:
|
|
|
633 |
required_cols = [date_column, comments_col]
|
634 |
if any(col not in df.columns for col in required_cols):
|
635 |
return create_placeholder_plot(title=title, message=f"Missing one of required columns: {required_cols}. Available: {df.columns.tolist()}")
|
636 |
+
|
637 |
+
fig = None
|
638 |
try:
|
639 |
df_copy = df.copy()
|
640 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
|
|
646 |
data_over_time = df_copy.resample('D')[comments_col].sum()
|
647 |
fig, ax = plt.subplots(figsize=(10, 5))
|
648 |
ax.plot(data_over_time.index, data_over_time.values, marker='.', linestyle='-', color='gold')
|
649 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
650 |
ax.set_xlabel('Date')
|
651 |
ax.set_ylabel('Total Comments')
|
652 |
ax.grid(True, linestyle='--', alpha=0.7)
|
653 |
plt.xticks(rotation=45)
|
654 |
+
plt.tight_layout(fig=fig)
|
655 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
656 |
return fig
|
657 |
except Exception as e:
|
658 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
659 |
+
if fig: plt.close(fig)
|
660 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
661 |
finally:
|
662 |
+
pass
|
663 |
|
664 |
def generate_comments_sentiment_breakdown_plot(df, sentiment_column='comment_sentiment', date_column=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
665 |
title = "Breakdown of Comments by Sentiment"
|
666 |
logging.info(f"Generating {title}. Sentiment Col: '{sentiment_column}'. DF rows: {len(df) if df is not None else 'None'}")
|
667 |
|
668 |
if df is None or df.empty:
|
669 |
return create_placeholder_plot(title=title, message="No data for comment sentiment.")
|
670 |
if sentiment_column not in df.columns:
|
|
|
671 |
if 'sentiment' in df.columns and sentiment_column != 'sentiment':
|
672 |
logging.warning(f"Sentiment column '{sentiment_column}' not found, attempting to use 'sentiment' column as fallback for comment sentiment plot.")
|
673 |
+
sentiment_column = 'sentiment'
|
674 |
else:
|
675 |
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' (and fallback 'sentiment') not found. Available: {df.columns.tolist()}")
|
676 |
+
|
|
|
677 |
if df[sentiment_column].isnull().all():
|
678 |
return create_placeholder_plot(title=title, message=f"Sentiment column '{sentiment_column}' contains no valid data.")
|
679 |
|
680 |
+
fig = None
|
681 |
try:
|
682 |
df_copy = df.copy()
|
|
|
683 |
df_copy[sentiment_column] = df_copy[sentiment_column].astype(str)
|
684 |
+
sentiment_counts = df_copy[sentiment_column].value_counts().dropna()
|
685 |
|
686 |
if sentiment_counts.empty or sentiment_counts.sum() == 0:
|
687 |
return create_placeholder_plot(title=title, message="No comment sentiment data to display after processing.")
|
|
|
689 |
fig, ax = plt.subplots(figsize=(8, 5))
|
690 |
colors_map = plt.cm.get_cmap('coolwarm', len(sentiment_counts))
|
691 |
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
|
|
692 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
693 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
694 |
+
ax.axis('equal')
|
695 |
+
plt.tight_layout(fig=fig)
|
696 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
697 |
return fig
|
698 |
except Exception as e:
|
699 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
700 |
+
if fig: plt.close(fig)
|
701 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
702 |
finally:
|
703 |
+
pass
|
704 |
|
|
|
705 |
def generate_post_frequency_plot(df, date_column='published_at', resample_period='D'):
|
|
|
706 |
title = f"Post Frequency Over Time ({resample_period})"
|
707 |
logging.info(f"Generating {title}. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
708 |
|
|
|
711 |
if date_column not in df.columns:
|
712 |
return create_placeholder_plot(title=title, message=f"Date column '{date_column}' not found.")
|
713 |
|
714 |
+
fig = None
|
715 |
try:
|
716 |
df_copy = df.copy()
|
717 |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
718 |
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
719 |
+
|
720 |
df_copy = df_copy.dropna(subset=[date_column])
|
721 |
if df_copy.empty:
|
722 |
return create_placeholder_plot(title=title, message="No valid date entries found.")
|
723 |
|
724 |
post_frequency = df_copy.set_index(date_column).resample(resample_period).size()
|
725 |
+
|
726 |
if post_frequency.empty:
|
727 |
return create_placeholder_plot(title=title, message=f"No posts found for the period after resampling by '{resample_period}'.")
|
728 |
|
729 |
fig, ax = plt.subplots(figsize=(10, 5))
|
730 |
post_frequency.plot(kind='bar' if resample_period in ['M', 'W'] else 'line', ax=ax, marker='o' if resample_period=='D' else None)
|
731 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
732 |
ax.set_xlabel('Date' if resample_period == 'D' else 'Period')
|
733 |
ax.set_ylabel('Number of Posts')
|
734 |
ax.grid(True, linestyle='--', alpha=0.7)
|
735 |
plt.xticks(rotation=45)
|
736 |
+
plt.tight_layout(fig=fig)
|
737 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
738 |
logging.info(f"Successfully generated {title} plot.")
|
739 |
return fig
|
740 |
except Exception as e:
|
741 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
742 |
+
if fig: plt.close(fig)
|
743 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
744 |
finally:
|
745 |
+
pass
|
746 |
|
747 |
def generate_content_format_breakdown_plot(df, format_col='media_type'):
|
|
|
748 |
title = "Breakdown of Content by Format"
|
749 |
logging.info(f"Generating {title}. Format column: '{format_col}'. Input df rows: {len(df) if df is not None else 'None'}")
|
750 |
|
|
|
753 |
if format_col not in df.columns:
|
754 |
return create_placeholder_plot(title=title, message=f"Format column '{format_col}' not found. Available: {df.columns.tolist()}")
|
755 |
|
756 |
+
fig = None
|
757 |
try:
|
758 |
df_copy = df.copy()
|
759 |
format_counts = df_copy[format_col].value_counts().dropna()
|
|
|
763 |
|
764 |
fig, ax = plt.subplots(figsize=(8, 6))
|
765 |
format_counts.plot(kind='bar', ax=ax, color='skyblue')
|
766 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
767 |
ax.set_xlabel('Media Type')
|
768 |
ax.set_ylabel('Number of Posts')
|
769 |
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
770 |
plt.xticks(rotation=45, ha="right")
|
771 |
+
|
|
|
|
|
772 |
for i, v in enumerate(format_counts):
|
773 |
ax.text(i, v + (0.01 * format_counts.max()), str(v), ha='center', va='bottom')
|
774 |
+
|
775 |
+
plt.tight_layout(fig=fig)
|
776 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
777 |
logging.info(f"Successfully generated {title} plot.")
|
778 |
return fig
|
779 |
except Exception as e:
|
780 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
781 |
+
if fig: plt.close(fig)
|
782 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
783 |
finally:
|
784 |
+
pass
|
785 |
|
786 |
def _parse_eb_label(label_data):
|
|
|
787 |
if isinstance(label_data, list):
|
788 |
return label_data
|
789 |
if isinstance(label_data, str):
|
790 |
try:
|
|
|
791 |
parsed = ast.literal_eval(label_data)
|
792 |
if isinstance(parsed, list):
|
793 |
return parsed
|
794 |
+
return [str(parsed)]
|
|
|
795 |
except (ValueError, SyntaxError):
|
|
|
796 |
return [label_data] if label_data.strip() else []
|
797 |
if pd.isna(label_data):
|
798 |
return []
|
799 |
+
return []
|
800 |
|
801 |
def generate_content_topic_breakdown_plot(df, topics_col='eb_labels', top_n=15):
|
|
|
802 |
title = f"Breakdown of Content by Topics (Top {top_n})"
|
803 |
logging.info(f"Generating {title}. Topics column: '{topics_col}'. Input df rows: {len(df) if df is not None else 'None'}")
|
804 |
|
|
|
807 |
if topics_col not in df.columns:
|
808 |
return create_placeholder_plot(title=title, message=f"Topics column '{topics_col}' not found. Available: {df.columns.tolist()}")
|
809 |
|
810 |
+
fig = None
|
811 |
try:
|
812 |
df_copy = df.copy()
|
|
|
|
|
813 |
parsed_labels = df_copy[topics_col].apply(_parse_eb_label)
|
814 |
exploded_labels = parsed_labels.explode().dropna()
|
815 |
|
|
|
820 |
|
821 |
if topic_counts.empty:
|
822 |
return create_placeholder_plot(title=title, message="No topics to display after counting.")
|
823 |
+
|
|
|
824 |
top_topics = topic_counts.nlargest(top_n).sort_values(ascending=True)
|
825 |
|
826 |
fig, ax = plt.subplots(figsize=(10, 8 if len(top_topics) > 5 else 6))
|
827 |
top_topics.plot(kind='barh', ax=ax, color='mediumseagreen')
|
828 |
+
ax.set_title(title, y=1.03) # Matplotlib title
|
829 |
ax.set_xlabel('Number of Posts')
|
830 |
ax.set_ylabel('Topic')
|
831 |
+
|
|
|
832 |
for i, (topic, count) in enumerate(top_topics.items()):
|
833 |
ax.text(count + (0.01 * top_topics.max()), i, str(count), va='center')
|
834 |
+
|
835 |
+
plt.tight_layout(fig=fig)
|
836 |
+
fig.subplots_adjust(top=0.88) # Add space for Gradio label
|
837 |
logging.info(f"Successfully generated {title} plot.")
|
838 |
return fig
|
839 |
except Exception as e:
|
840 |
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
841 |
+
if fig: plt.close(fig)
|
842 |
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
843 |
finally:
|
844 |
+
pass
|
845 |
|
846 |
|
847 |
if __name__ == '__main__':
|
848 |
# Create dummy data for testing
|
849 |
posts_data = {
|
850 |
+
'id': [f'post{i}' for i in range(1, 8)],
|
851 |
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
852 |
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
853 |
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
854 |
+
'shareCount': [1, 0, 1, 1, 2, 0, 1],
|
855 |
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
856 |
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
857 |
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08],
|
858 |
+
'media_type': ['TEXT', 'IMAGE', 'TEXT', 'VIDEO', 'IMAGE', 'TEXT', 'IMAGE'],
|
859 |
+
'eb_labels': [
|
860 |
+
"['AI', 'Tech']",
|
861 |
+
['Innovation'],
|
862 |
+
'General',
|
863 |
+
None,
|
864 |
['Tech', 'Future'],
|
865 |
"['AI', 'Development']",
|
866 |
['Tech']
|
867 |
],
|
868 |
+
'comment_sentiment': ['Positive', 'Neutral', 'Positive', 'Negative', 'Positive', 'Neutral', 'Positive']
|
869 |
}
|
870 |
sample_merged_posts_df = pd.DataFrame(posts_data)
|
871 |
|
|
|
872 |
follower_data = {
|
873 |
'follower_count_type': [
|
874 |
+
'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
|
875 |
'follower_geo', 'follower_geo', 'follower_geo',
|
876 |
'follower_function', 'follower_function',
|
877 |
'follower_industry', 'follower_industry',
|
878 |
'follower_seniority', 'follower_seniority'
|
879 |
],
|
880 |
'category_name': [
|
881 |
+
'2024-01-01', '2024-02-01', '2024-03-01',
|
882 |
+
'USA', 'Canada', 'UK',
|
883 |
+
'Engineering', 'Sales',
|
884 |
+
'Tech', 'Finance',
|
885 |
+
'Senior', 'Junior'
|
886 |
],
|
887 |
'follower_count_organic': [
|
888 |
+
100, 110, 125,
|
889 |
+
500, 300, 150,
|
890 |
+
400, 200,
|
891 |
+
250, 180,
|
892 |
+
300, 220
|
893 |
],
|
894 |
'follower_count_paid': [
|
895 |
+
20, 30, 25,
|
896 |
+
50, 40, 60,
|
897 |
+
30, 20,
|
898 |
+
45, 35,
|
899 |
+
60, 40
|
900 |
]
|
901 |
}
|
902 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
903 |
|
904 |
logging.info("--- Testing Existing Plot Generations ---")
|
|
|
905 |
fig_posts_activity = generate_posts_activity_plot(sample_merged_posts_df.copy())
|
906 |
if fig_posts_activity: logging.info("Posts activity plot generated.")
|
907 |
+
|
908 |
fig_engagement_type = generate_engagement_type_plot(sample_merged_posts_df.copy())
|
909 |
if fig_engagement_type: logging.info("Engagement type plot generated.")
|
910 |
|
|
|
924 |
if fig_followers_rate: logging.info("Followers Growth Rate plot generated.")
|
925 |
fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
|
926 |
if fig_geo: logging.info("Followers by Location plot generated.")
|
|
|
927 |
|
928 |
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
|
929 |
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
|
|
|
931 |
if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
|
932 |
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
933 |
if fig_impressions: logging.info("Impressions Over Time plot generated.")
|
934 |
+
|
935 |
fig_likes_time = generate_likes_over_time_plot(sample_merged_posts_df.copy())
|
936 |
if fig_likes_time: logging.info("Likes Over Time plot generated.")
|
937 |
+
fig_clicks_time = generate_clicks_over_time_plot(sample_merged_posts_df.copy())
|
938 |
if fig_clicks_time: logging.info("Clicks Over Time plot generated.")
|
939 |
fig_shares_time = generate_shares_over_time_plot(sample_merged_posts_df.copy())
|
940 |
if fig_shares_time: logging.info("Shares Over Time plot generated.")
|
|
|
947 |
logging.info("--- Testing NEW Plot Generations for Content Strategy ---")
|
948 |
fig_post_freq = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='D')
|
949 |
if fig_post_freq: logging.info("Post Frequency (Daily) plot generated.")
|
950 |
+
|
951 |
fig_post_freq_w = generate_post_frequency_plot(sample_merged_posts_df.copy(), date_column='published_at', resample_period='W')
|
952 |
if fig_post_freq_w: logging.info("Post Frequency (Weekly) plot generated.")
|
953 |
|
|
|
956 |
|
957 |
fig_content_topics = generate_content_topic_breakdown_plot(sample_merged_posts_df.copy(), topics_col='eb_labels', top_n=5)
|
958 |
if fig_content_topics: logging.info("Content Topic Breakdown plot generated.")
|
959 |
+
|
|
|
960 |
logging.info("--- Testing NEW Plot Generations with Edge Cases ---")
|
961 |
empty_df = pd.DataFrame()
|
962 |
fig_post_freq_empty = generate_post_frequency_plot(empty_df.copy())
|
963 |
if fig_post_freq_empty: logging.info("Post Frequency (empty df) placeholder generated.")
|
964 |
+
|
965 |
fig_content_format_missing_col = generate_content_format_breakdown_plot(sample_merged_posts_df.copy(), format_col='non_existent_col')
|
966 |
if fig_content_format_missing_col: logging.info("Content Format (missing col) placeholder generated.")
|
967 |
|
968 |
+
fig_content_topics_no_labels = generate_content_topic_breakdown_plot(sample_merged_posts_df[['id', 'published_at']].copy(), topics_col='eb_labels')
|
969 |
if fig_content_topics_no_labels: logging.info("Content Topic (missing col) placeholder generated.")
|
970 |
|
971 |
df_no_topics_data = sample_merged_posts_df.copy()
|