Spaces:
Running
Running
Update analytics_plot_generators.py
Browse files- analytics_plot_generators.py +393 -103
analytics_plot_generators.py
CHANGED
@@ -4,6 +4,7 @@ import logging
|
|
4 |
from io import BytesIO
|
5 |
import base64
|
6 |
import numpy as np
|
|
|
7 |
|
8 |
# Configure logging for this module
|
9 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
@@ -18,28 +19,21 @@ def create_placeholder_plot(title="No Data or Plot Error", message="Data might b
|
|
18 |
return fig
|
19 |
except Exception as e:
|
20 |
logging.error(f"Error creating placeholder plot: {e}")
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
def generate_posts_activity_plot(df, date_column='published_at'): # Default changed as per common use
|
33 |
-
"""
|
34 |
-
Generates a plot for posts activity over time.
|
35 |
-
Assumes df has a date_column (e.g., 'published_at') and groups by date to count posts.
|
36 |
-
"""
|
37 |
logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
38 |
if df is None or df.empty:
|
39 |
logging.warning(f"Posts activity: DataFrame is empty.")
|
40 |
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.")
|
41 |
if date_column not in df.columns:
|
42 |
-
logging.warning(f"Posts activity: Date column '{date_column}' is missing
|
43 |
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
|
44 |
|
45 |
try:
|
@@ -55,8 +49,8 @@ def generate_posts_activity_plot(df, date_column='published_at'): # Default chan
|
|
55 |
posts_over_time = df_copy.set_index(date_column).resample('D').size()
|
56 |
|
57 |
if posts_over_time.empty:
|
58 |
-
|
59 |
-
|
60 |
|
61 |
fig, ax = plt.subplots(figsize=(10, 5))
|
62 |
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
|
@@ -74,11 +68,8 @@ def generate_posts_activity_plot(df, date_column='published_at'): # Default chan
|
|
74 |
finally:
|
75 |
plt.close('all')
|
76 |
|
77 |
-
def generate_engagement_type_plot(df, likes_col='
|
78 |
-
"""
|
79 |
-
Generates a bar plot for total engagement types (likes, comments, shares).
|
80 |
-
Input df is expected to be pre-filtered by date if necessary.
|
81 |
-
"""
|
82 |
logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
|
83 |
|
84 |
required_cols = [likes_col, comments_col, shares_col]
|
@@ -93,8 +84,8 @@ def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='com
|
|
93 |
return create_placeholder_plot(title="Post Engagement Types", message=msg)
|
94 |
|
95 |
try:
|
96 |
-
df_copy = df.copy()
|
97 |
-
for col in required_cols:
|
98 |
df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
|
99 |
|
100 |
total_likes = df_copy[likes_col].sum()
|
@@ -131,19 +122,16 @@ def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='com
|
|
131 |
finally:
|
132 |
plt.close('all')
|
133 |
|
134 |
-
def generate_mentions_activity_plot(df, date_column='date'):
|
135 |
-
"""
|
136 |
-
Generates a plot for mentions activity over time.
|
137 |
-
Assumes df has a date_column (e.g., 'date') and groups by date to count mentions.
|
138 |
-
"""
|
139 |
logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
140 |
if df is None or df.empty:
|
141 |
logging.warning(f"Mentions activity: DataFrame is empty.")
|
142 |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.")
|
143 |
if date_column not in df.columns:
|
144 |
-
logging.warning(f"Mentions activity: Date column '{date_column}' is missing
|
145 |
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
|
146 |
-
|
147 |
try:
|
148 |
df_copy = df.copy()
|
149 |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
@@ -177,10 +165,7 @@ def generate_mentions_activity_plot(df, date_column='date'): # Default changed a
|
|
177 |
plt.close('all')
|
178 |
|
179 |
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
180 |
-
"""
|
181 |
-
Generates a pie chart for mention sentiment distribution.
|
182 |
-
Input df is expected to be pre-filtered by date if necessary.
|
183 |
-
"""
|
184 |
logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
185 |
|
186 |
if df is None or df.empty:
|
@@ -199,8 +184,12 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
|
199 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
200 |
|
201 |
fig, ax = plt.subplots(figsize=(8, 5))
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
204 |
|
205 |
|
206 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
@@ -215,104 +204,405 @@ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
|
215 |
finally:
|
216 |
plt.close('all')
|
217 |
|
218 |
-
|
219 |
-
|
220 |
-
Generates a plot for follower growth over time.
|
221 |
-
This function
|
222 |
-
|
223 |
-
logging.info(f"Generating follower growth plot. Date col: '{date_column}', Count col: '{count_column}'.
|
224 |
-
|
225 |
if df is None or df.empty:
|
226 |
-
|
227 |
-
return create_placeholder_plot(title="Follower Growth Over Time", message="No follower data available.")
|
228 |
if date_column not in df.columns or count_column not in df.columns:
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
try:
|
237 |
df_copy = df.copy()
|
238 |
-
|
239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
|
241 |
-
|
242 |
-
df_copy =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
244 |
if df_copy.empty:
|
245 |
-
|
246 |
-
return create_placeholder_plot(title="Follower Growth Over Time", message="No valid data for follower growth.")
|
247 |
|
248 |
-
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
fig, ax = plt.subplots(figsize=(10, 5))
|
251 |
-
ax.plot(
|
252 |
-
ax.set_title(
|
253 |
ax.set_xlabel('Date')
|
254 |
-
ax.set_ylabel('Total
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
ax.grid(True, linestyle='--', alpha=0.7)
|
256 |
plt.xticks(rotation=45)
|
257 |
plt.tight_layout()
|
258 |
-
logging.info("Successfully generated follower growth plot.")
|
259 |
return fig
|
260 |
except Exception as e:
|
261 |
-
logging.error(f"Error generating
|
262 |
-
return create_placeholder_plot(title="
|
263 |
finally:
|
264 |
plt.close('all')
|
265 |
|
|
|
266 |
if __name__ == '__main__':
|
267 |
# Create dummy data for testing
|
|
|
268 |
posts_data = {
|
269 |
-
'
|
270 |
-
'
|
271 |
-
'
|
272 |
-
'
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
|
278 |
-
'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
|
279 |
}
|
280 |
-
|
281 |
|
|
|
282 |
follower_data = {
|
283 |
-
'date': pd.to_datetime(['2023-01-01', '2023-01-
|
284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
}
|
286 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
|
|
|
|
287 |
|
288 |
-
logging.info("--- Testing Plot Generations ---")
|
289 |
|
290 |
-
|
291 |
-
if fig1: logging.info("Posts activity plot generated.") # plt.show() for local test
|
292 |
|
293 |
-
|
294 |
-
if
|
295 |
|
296 |
-
|
297 |
-
if
|
298 |
|
299 |
-
|
300 |
-
if
|
301 |
-
|
302 |
-
fig5 = generate_follower_growth_plot(sample_follower_stats_df.copy(), date_column='date', count_column='total_followers')
|
303 |
-
if fig5: logging.info("Follower growth plot generated.")
|
304 |
-
|
305 |
-
logging.info("--- Testing Placeholders ---")
|
306 |
-
fig_placeholder = create_placeholder_plot()
|
307 |
-
if fig_placeholder: logging.info("Placeholder plot generated.")
|
308 |
|
309 |
-
|
310 |
-
|
311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
|
313 |
-
|
314 |
-
|
315 |
-
|
|
|
|
|
316 |
|
317 |
|
318 |
-
logging.info("Test script finished.")
|
|
|
4 |
from io import BytesIO
|
5 |
import base64
|
6 |
import numpy as np
|
7 |
+
import matplotlib.ticker as mticker
|
8 |
|
9 |
# Configure logging for this module
|
10 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
|
|
19 |
return fig
|
20 |
except Exception as e:
|
21 |
logging.error(f"Error creating placeholder plot: {e}")
|
22 |
+
# Fallback placeholder if the above fails
|
23 |
+
fig_err, ax_err = plt.subplots()
|
24 |
+
ax_err.text(0.5, 0.5, "Fatal: Plot generation error", ha='center', va='center')
|
25 |
+
ax_err.axis('off')
|
26 |
+
return fig_err
|
27 |
+
# No plt.close(fig) here as Gradio handles the figure object.
|
28 |
+
|
29 |
+
def generate_posts_activity_plot(df, date_column='published_at'):
|
30 |
+
"""Generates a plot for posts activity over time."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
32 |
if df is None or df.empty:
|
33 |
logging.warning(f"Posts activity: DataFrame is empty.")
|
34 |
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.")
|
35 |
if date_column not in df.columns:
|
36 |
+
logging.warning(f"Posts activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
|
37 |
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
|
38 |
|
39 |
try:
|
|
|
49 |
posts_over_time = df_copy.set_index(date_column).resample('D').size()
|
50 |
|
51 |
if posts_over_time.empty:
|
52 |
+
logging.info("Posts activity: No posts after resampling by day.")
|
53 |
+
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
|
54 |
|
55 |
fig, ax = plt.subplots(figsize=(10, 5))
|
56 |
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
|
|
|
68 |
finally:
|
69 |
plt.close('all')
|
70 |
|
71 |
+
def generate_engagement_type_plot(df, likes_col='likeCount', comments_col='commentCount', shares_col='shareCount'): # Updated col names
|
72 |
+
"""Generates a bar plot for total engagement types (likes, comments, shares)."""
|
|
|
|
|
|
|
73 |
logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
|
74 |
|
75 |
required_cols = [likes_col, comments_col, shares_col]
|
|
|
84 |
return create_placeholder_plot(title="Post Engagement Types", message=msg)
|
85 |
|
86 |
try:
|
87 |
+
df_copy = df.copy()
|
88 |
+
for col in required_cols:
|
89 |
df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
|
90 |
|
91 |
total_likes = df_copy[likes_col].sum()
|
|
|
122 |
finally:
|
123 |
plt.close('all')
|
124 |
|
125 |
+
def generate_mentions_activity_plot(df, date_column='date'):
|
126 |
+
"""Generates a plot for mentions activity over time."""
|
|
|
|
|
|
|
127 |
logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
128 |
if df is None or df.empty:
|
129 |
logging.warning(f"Mentions activity: DataFrame is empty.")
|
130 |
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.")
|
131 |
if date_column not in df.columns:
|
132 |
+
logging.warning(f"Mentions activity: Date column '{date_column}' is missing. Cols: {df.columns.tolist()}.")
|
133 |
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
|
134 |
+
|
135 |
try:
|
136 |
df_copy = df.copy()
|
137 |
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
|
|
165 |
plt.close('all')
|
166 |
|
167 |
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
168 |
+
"""Generates a pie chart for mention sentiment distribution."""
|
|
|
|
|
|
|
169 |
logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
170 |
|
171 |
if df is None or df.empty:
|
|
|
184 |
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
185 |
|
186 |
fig, ax = plt.subplots(figsize=(8, 5))
|
187 |
+
# Define a broader range of colors or a colormap for more sentiment types
|
188 |
+
colors_map = plt.cm.get_cmap('viridis', len(sentiment_counts))
|
189 |
+
pie_colors = [colors_map(i) for i in range(len(sentiment_counts))]
|
190 |
+
# Or keep your specific colors if sentiment labels are fixed:
|
191 |
+
# colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'}
|
192 |
+
# pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index]
|
193 |
|
194 |
|
195 |
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
|
|
204 |
finally:
|
205 |
plt.close('all')
|
206 |
|
207 |
+
# --- Existing Follower Growth Plot (can be reused or adapted) ---
|
208 |
+
def generate_total_follower_growth_plot(df, date_column='date', count_column='total_followers'):
|
209 |
+
""" Generates a plot for TOTAL follower growth over time. """
|
210 |
+
# This is your existing function, ensure it's called with the correct data for overall growth.
|
211 |
+
# For 'Follower Count Over Time (follower_gains_monthly)', we'll make a new specific one if structure differs.
|
212 |
+
logging.info(f"Generating total follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. DF rows: {len(df) if df is not None else 'None'}")
|
|
|
213 |
if df is None or df.empty:
|
214 |
+
return create_placeholder_plot(title="Total Follower Growth", message="No follower data.")
|
|
|
215 |
if date_column not in df.columns or count_column not in df.columns:
|
216 |
+
return create_placeholder_plot(title="Total Follower Growth", message=f"Missing columns: {date_column} or {count_column}.")
|
217 |
+
try:
|
218 |
+
df_copy = df.copy()
|
219 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
220 |
+
df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce')
|
221 |
+
df_copy = df_copy.dropna(subset=[date_column, count_column]).sort_values(by=date_column)
|
222 |
+
if df_copy.empty:
|
223 |
+
return create_placeholder_plot(title="Total Follower Growth", message="No valid data after cleaning.")
|
224 |
+
|
225 |
+
fig, ax = plt.subplots(figsize=(10,5))
|
226 |
+
ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green')
|
227 |
+
ax.set_title('Total Follower Growth Over Time')
|
228 |
+
ax.set_xlabel('Date')
|
229 |
+
ax.set_ylabel('Total Followers')
|
230 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
231 |
+
plt.xticks(rotation=45)
|
232 |
+
plt.tight_layout()
|
233 |
+
return fig
|
234 |
+
except Exception as e:
|
235 |
+
logging.error(f"Error in generate_total_follower_growth_plot: {e}", exc_info=True)
|
236 |
+
return create_placeholder_plot(title="Total Follower Growth Error", message=str(e))
|
237 |
+
finally:
|
238 |
+
plt.close('all')
|
239 |
+
|
240 |
+
# --- New Plot Functions ---
|
241 |
+
|
242 |
+
def generate_followers_count_over_time_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'):
|
243 |
+
"""Generates a plot for specific follower counts over time (e.g., monthly gains)."""
|
244 |
+
title = f"Followers Count Over Time ({type_value})"
|
245 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
246 |
+
|
247 |
+
if df is None or df.empty:
|
248 |
+
return create_placeholder_plot(title=title, message="No follower data available.")
|
249 |
+
|
250 |
+
required_cols = [date_column, count_column, type_filter_column]
|
251 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
252 |
+
if missing_cols:
|
253 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.")
|
254 |
|
255 |
try:
|
256 |
df_copy = df.copy()
|
257 |
+
df_filtered = df_copy[df_copy[type_filter_column] == type_value]
|
258 |
+
|
259 |
+
if df_filtered.empty:
|
260 |
+
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
261 |
+
|
262 |
+
df_filtered[date_column] = pd.to_datetime(df_filtered[date_column], errors='coerce')
|
263 |
+
df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce')
|
264 |
+
df_filtered = df_filtered.dropna(subset=[date_column, count_column]).sort_values(by=date_column)
|
265 |
+
|
266 |
+
if df_filtered.empty:
|
267 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning and filtering.")
|
268 |
+
|
269 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
270 |
+
ax.plot(df_filtered[date_column], df_filtered[count_column], marker='o', linestyle='-', color='dodgerblue')
|
271 |
+
ax.set_title(title)
|
272 |
+
ax.set_xlabel('Date')
|
273 |
+
ax.set_ylabel('Follower Count')
|
274 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
275 |
+
plt.xticks(rotation=45)
|
276 |
+
plt.tight_layout()
|
277 |
+
return fig
|
278 |
+
except Exception as e:
|
279 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
280 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
281 |
+
finally:
|
282 |
+
plt.close('all')
|
283 |
+
|
284 |
+
def generate_followers_growth_rate_plot(df, date_column='date', count_column='follower_count_o', type_filter_column='follower_count_type', type_value='follower_gains_monthly'):
|
285 |
+
"""Calculates and plots follower growth rate over time."""
|
286 |
+
title = f"Follower Growth Rate ({type_value})"
|
287 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
288 |
+
|
289 |
+
if df is None or df.empty:
|
290 |
+
return create_placeholder_plot(title=title, message="No follower data available.")
|
291 |
+
|
292 |
+
required_cols = [date_column, count_column, type_filter_column]
|
293 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
294 |
+
if missing_cols:
|
295 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}.")
|
296 |
|
297 |
+
try:
|
298 |
+
df_copy = df.copy()
|
299 |
+
df_filtered = df_copy[df_copy[type_filter_column] == type_value]
|
300 |
+
|
301 |
+
if df_filtered.empty:
|
302 |
+
return create_placeholder_plot(title=title, message=f"No data for type '{type_value}'.")
|
303 |
+
|
304 |
+
df_filtered[date_column] = pd.to_datetime(df_filtered[date_column], errors='coerce')
|
305 |
+
df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce')
|
306 |
+
df_filtered = df_filtered.dropna(subset=[date_column, count_column]).sort_values(by=date_column).set_index(date_column)
|
307 |
+
|
308 |
+
if df_filtered.empty or len(df_filtered) < 2:
|
309 |
+
return create_placeholder_plot(title=title, message="Not enough data points to calculate growth rate.")
|
310 |
+
|
311 |
+
# Calculate growth rate: (current - previous) / previous * 100
|
312 |
+
# Ensure previous is not zero to avoid division by zero
|
313 |
+
df_filtered['growth_rate'] = df_filtered[count_column].pct_change() * 100
|
314 |
+
# Replace inf with NaN (e.g. if previous was 0 and current is non-zero) then drop NaNs
|
315 |
+
df_filtered.replace([np.inf, -np.inf], np.nan, inplace=True)
|
316 |
+
df_filtered.dropna(subset=['growth_rate'], inplace=True)
|
317 |
+
|
318 |
+
|
319 |
+
if df_filtered.empty:
|
320 |
+
return create_placeholder_plot(title=title, message="No valid growth rate data after calculation.")
|
321 |
+
|
322 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
323 |
+
ax.plot(df_filtered.index, df_filtered['growth_rate'], marker='o', linestyle='-', color='lightcoral')
|
324 |
+
ax.set_title(title)
|
325 |
+
ax.set_xlabel('Date')
|
326 |
+
ax.set_ylabel('Growth Rate (%)')
|
327 |
+
ax.yaxis.set_major_formatter(mticker.PercentFormatter())
|
328 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
329 |
+
plt.xticks(rotation=45)
|
330 |
+
plt.tight_layout()
|
331 |
+
return fig
|
332 |
+
except Exception as e:
|
333 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
334 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
335 |
+
finally:
|
336 |
+
plt.close('all')
|
337 |
+
|
338 |
+
def generate_followers_by_demographics_plot(df, category_col='category_name', count_column='follower_count_o', type_filter_column='follower_count_type', type_value=None, plot_title="Followers by Demographics"):
|
339 |
+
"""Generates a bar chart for follower demographics (e.g., by location, industry)."""
|
340 |
+
logging.info(f"Generating {plot_title}. Category: '{category_col}', Count: '{count_column}', Type Filter: '{type_filter_column}=={type_value}'. DF rows: {len(df) if df is not None else 'None'}")
|
341 |
+
|
342 |
+
if df is None or df.empty:
|
343 |
+
return create_placeholder_plot(title=plot_title, message="No follower data available.")
|
344 |
+
|
345 |
+
required_cols = [category_col, count_column, type_filter_column]
|
346 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
347 |
+
if missing_cols:
|
348 |
+
return create_placeholder_plot(title=plot_title, message=f"Missing columns: {missing_cols}.")
|
349 |
+
|
350 |
+
if type_value is None: # Should be specified
|
351 |
+
return create_placeholder_plot(title=plot_title, message="Demographic type (type_value) not specified.")
|
352 |
+
|
353 |
+
try:
|
354 |
+
df_copy = df.copy()
|
355 |
+
df_filtered = df_copy[df_copy[type_filter_column] == type_value]
|
356 |
+
|
357 |
+
if df_filtered.empty:
|
358 |
+
return create_placeholder_plot(title=plot_title, message=f"No data for demographic type '{type_value}'.")
|
359 |
+
|
360 |
+
df_filtered[count_column] = pd.to_numeric(df_filtered[count_column], errors='coerce').fillna(0)
|
361 |
+
|
362 |
+
# Group by the category column and sum the count column
|
363 |
+
demographics_data = df_filtered.groupby(category_col)[count_column].sum().sort_values(ascending=False)
|
364 |
+
|
365 |
+
if demographics_data.empty:
|
366 |
+
return create_placeholder_plot(title=plot_title, message="No demographic data to display after filtering and aggregation.")
|
367 |
+
|
368 |
+
# Limit to top N for readability if too many categories
|
369 |
+
top_n = 10
|
370 |
+
if len(demographics_data) > top_n:
|
371 |
+
demographics_data = demographics_data.head(top_n)
|
372 |
+
plot_title += f" (Top {top_n})"
|
373 |
+
|
374 |
+
|
375 |
+
fig, ax = plt.subplots(figsize=(10, 6) if len(demographics_data) > 5 else (8,5) )
|
376 |
+
demographics_data.plot(kind='bar', ax=ax, color='teal')
|
377 |
+
ax.set_title(plot_title)
|
378 |
+
ax.set_xlabel(category_col.replace('_', ' ').title())
|
379 |
+
ax.set_ylabel('Number of Followers')
|
380 |
+
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
381 |
+
plt.xticks(rotation=45, ha="right")
|
382 |
+
plt.tight_layout()
|
383 |
+
return fig
|
384 |
+
except Exception as e:
|
385 |
+
logging.error(f"Error generating {plot_title}: {e}", exc_info=True)
|
386 |
+
return create_placeholder_plot(title=f"{plot_title} Error", message=str(e))
|
387 |
+
finally:
|
388 |
+
plt.close('all')
|
389 |
+
|
390 |
+
|
391 |
+
def generate_engagement_rate_over_time_plot(df, date_column='published_at', engagement_rate_col='engagement'):
|
392 |
+
"""Generates a plot for engagement rate over time."""
|
393 |
+
title = "Engagement Rate Over Time"
|
394 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Rate Col: '{engagement_rate_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
395 |
+
|
396 |
+
if df is None or df.empty:
|
397 |
+
return create_placeholder_plot(title=title, message="No post data for engagement rate.")
|
398 |
+
|
399 |
+
required_cols = [date_column, engagement_rate_col]
|
400 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
401 |
+
if missing_cols:
|
402 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
403 |
+
|
404 |
+
try:
|
405 |
+
df_copy = df.copy()
|
406 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
407 |
+
# Assuming 'engagement' is already a rate (e.g., 0.05 for 5%). If it's an absolute count, this logic needs change.
|
408 |
+
df_copy[engagement_rate_col] = pd.to_numeric(df_copy[engagement_rate_col], errors='coerce')
|
409 |
+
df_copy = df_copy.dropna(subset=[date_column, engagement_rate_col]).set_index(date_column)
|
410 |
+
|
411 |
+
if df_copy.empty:
|
412 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
413 |
+
|
414 |
+
# Resample daily and calculate mean engagement rate
|
415 |
+
engagement_over_time = df_copy.resample('D')[engagement_rate_col].mean()
|
416 |
+
engagement_over_time = engagement_over_time.dropna() # Remove days with no data after resampling
|
417 |
+
|
418 |
+
if engagement_over_time.empty:
|
419 |
+
return create_placeholder_plot(title=title, message="No engagement rate data to display after resampling.")
|
420 |
+
|
421 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
422 |
+
ax.plot(engagement_over_time.index, engagement_over_time.values, marker='.', linestyle='-', color='darkorange')
|
423 |
+
ax.set_title(title)
|
424 |
+
ax.set_xlabel('Date')
|
425 |
+
ax.set_ylabel('Engagement Rate')
|
426 |
+
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0 if engagement_over_time.max() <=1 else 100.0)) # Adjust based on rate scale
|
427 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
428 |
+
plt.xticks(rotation=45)
|
429 |
+
plt.tight_layout()
|
430 |
+
return fig
|
431 |
+
except Exception as e:
|
432 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
433 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
434 |
+
finally:
|
435 |
+
plt.close('all')
|
436 |
+
|
437 |
+
def generate_reach_over_time_plot(df, date_column='published_at', reach_col='clickCount'): # Using clickCount as proxy for Reach
|
438 |
+
"""Generates a plot for reach (clicks) over time."""
|
439 |
+
title = "Reach Over Time (Clicks)"
|
440 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Reach Col: '{reach_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
441 |
+
|
442 |
+
if df is None or df.empty:
|
443 |
+
return create_placeholder_plot(title=title, message="No post data for reach.")
|
444 |
+
|
445 |
+
required_cols = [date_column, reach_col]
|
446 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
447 |
+
if missing_cols:
|
448 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
449 |
+
|
450 |
+
try:
|
451 |
+
df_copy = df.copy()
|
452 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
453 |
+
df_copy[reach_col] = pd.to_numeric(df_copy[reach_col], errors='coerce')
|
454 |
+
df_copy = df_copy.dropna(subset=[date_column, reach_col]).set_index(date_column)
|
455 |
|
456 |
if df_copy.empty:
|
457 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
|
|
458 |
|
459 |
+
reach_over_time = df_copy.resample('D')[reach_col].sum()
|
460 |
|
461 |
+
if reach_over_time.empty and not df_copy.empty : # if original had data but resampling resulted in empty (e.g. all NaNs for sum)
|
462 |
+
pass # allow plot of zeros if that's the case
|
463 |
+
elif reach_over_time.sum() == 0 and not df_copy.empty : # if all values are zero
|
464 |
+
pass
|
465 |
+
|
466 |
+
|
467 |
fig, ax = plt.subplots(figsize=(10, 5))
|
468 |
+
ax.plot(reach_over_time.index, reach_over_time.values, marker='.', linestyle='-', color='mediumseagreen')
|
469 |
+
ax.set_title(title)
|
470 |
ax.set_xlabel('Date')
|
471 |
+
ax.set_ylabel('Total Clicks')
|
472 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
473 |
+
plt.xticks(rotation=45)
|
474 |
+
plt.tight_layout()
|
475 |
+
return fig
|
476 |
+
except Exception as e:
|
477 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
478 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
479 |
+
finally:
|
480 |
+
plt.close('all')
|
481 |
+
|
482 |
+
def generate_impressions_over_time_plot(df, date_column='published_at', impressions_col='impressionCount'):
|
483 |
+
"""Generates a plot for impressions over time."""
|
484 |
+
title = "Impressions Over Time"
|
485 |
+
logging.info(f"Generating {title}. Date: '{date_column}', Impressions Col: '{impressions_col}'. DF rows: {len(df) if df is not None else 'None'}")
|
486 |
+
|
487 |
+
if df is None or df.empty:
|
488 |
+
return create_placeholder_plot(title=title, message="No post data for impressions.")
|
489 |
+
|
490 |
+
required_cols = [date_column, impressions_col]
|
491 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
492 |
+
if missing_cols:
|
493 |
+
return create_placeholder_plot(title=title, message=f"Missing columns: {missing_cols}. Available: {df.columns.tolist()}")
|
494 |
+
|
495 |
+
try:
|
496 |
+
df_copy = df.copy()
|
497 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
498 |
+
df_copy[impressions_col] = pd.to_numeric(df_copy[impressions_col], errors='coerce')
|
499 |
+
df_copy = df_copy.dropna(subset=[date_column, impressions_col]).set_index(date_column)
|
500 |
+
|
501 |
+
if df_copy.empty:
|
502 |
+
return create_placeholder_plot(title=title, message="No valid data after cleaning.")
|
503 |
+
|
504 |
+
impressions_over_time = df_copy.resample('D')[impressions_col].sum()
|
505 |
+
|
506 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
507 |
+
ax.plot(impressions_over_time.index, impressions_over_time.values, marker='.', linestyle='-', color='slateblue')
|
508 |
+
ax.set_title(title)
|
509 |
+
ax.set_xlabel('Date')
|
510 |
+
ax.set_ylabel('Total Impressions')
|
511 |
ax.grid(True, linestyle='--', alpha=0.7)
|
512 |
plt.xticks(rotation=45)
|
513 |
plt.tight_layout()
|
|
|
514 |
return fig
|
515 |
except Exception as e:
|
516 |
+
logging.error(f"Error generating {title}: {e}", exc_info=True)
|
517 |
+
return create_placeholder_plot(title=f"{title} Error", message=str(e))
|
518 |
finally:
|
519 |
plt.close('all')
|
520 |
|
521 |
+
|
522 |
if __name__ == '__main__':
|
523 |
# Create dummy data for testing
|
524 |
+
# Posts Data (merged with stats)
|
525 |
posts_data = {
|
526 |
+
'id': [f'post{i}' for i in range(1, 7)],
|
527 |
+
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03', '2023-01-04']),
|
528 |
+
'likeCount': [10, 5, 12, 8, 15, 3, 20],
|
529 |
+
'commentCount': [2, 1, 3, 1, 4, 0, 5],
|
530 |
+
'shareCount': [1, 0, 1, 1, 2, 0, 1],
|
531 |
+
'clickCount': [20, 15, 30, 22, 40, 10, 50],
|
532 |
+
'impressionCount': [200, 150, 300, 220, 400, 100, 500],
|
533 |
+
'engagement': [0.05, 0.04, 0.06, 0.055, 0.07, 0.03, 0.08] # Engagement Rate
|
|
|
|
|
534 |
}
|
535 |
+
sample_merged_posts_df = pd.DataFrame(posts_data)
|
536 |
|
537 |
+
# Follower Stats Data
|
538 |
follower_data = {
|
539 |
+
'date': pd.to_datetime(['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15', '2023-03-01', # For time series
|
540 |
+
'2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', # For demographics (snapshot)
|
541 |
+
'2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01',
|
542 |
+
'2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01', '2023-03-01'
|
543 |
+
]),
|
544 |
+
'follower_count_type': ['follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly', 'follower_gains_monthly',
|
545 |
+
'follower_geo', 'follower_geo', 'follower_geo', # Location
|
546 |
+
'follower_function', 'follower_function', 'follower_function', # Role
|
547 |
+
'follower_industry', 'follower_industry', 'follower_industry', # Industry
|
548 |
+
'follower_seniority', 'follower_seniority', 'follower_seniority', # Seniority
|
549 |
+
'total_followers_snapshot', 'total_followers_snapshot', 'total_followers_snapshot' # For existing total growth
|
550 |
+
],
|
551 |
+
'category_name': ['Jan', 'Jan-Mid', 'Feb', 'Feb-Mid', 'Mar', # Corresponds to follower_gains_monthly
|
552 |
+
'USA', 'Canada', 'UK', # Geo
|
553 |
+
'Engineering', 'Sales', 'Marketing', # Function/Role
|
554 |
+
'Tech', 'Finance', 'Healthcare', # Industry
|
555 |
+
'Senior', 'Junior', 'Manager', # Seniority
|
556 |
+
'Overall1', 'Overall2', 'Overall3' # For total_followers_snapshot
|
557 |
+
],
|
558 |
+
'follower_count_o': [100, 105, 115, 120, 130, # Counts for monthly gains
|
559 |
+
500, 300, 200, # Geo counts
|
560 |
+
400, 350, 250, # Role counts
|
561 |
+
600, 200, 200, # Industry counts
|
562 |
+
300, 400, 300, # Seniority counts
|
563 |
+
1000, 1010, 1025 # For total_followers_snapshot
|
564 |
+
],
|
565 |
+
'total_followers': [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,100,115,130] # For existing total growth plot
|
566 |
}
|
567 |
sample_follower_stats_df = pd.DataFrame(follower_data)
|
568 |
+
# Ensure 'total_followers' for generate_total_follower_growth_plot is correctly populated for its specific rows
|
569 |
+
sample_follower_stats_df.loc[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot', 'total_followers'] = sample_follower_stats_df['follower_count_o']
|
570 |
|
|
|
571 |
|
572 |
+
logging.info("--- Testing New Plot Generations ---")
|
|
|
573 |
|
574 |
+
fig_followers_count = generate_followers_count_over_time_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly')
|
575 |
+
if fig_followers_count: logging.info("Followers Count Over Time (monthly) plot generated.")
|
576 |
|
577 |
+
fig_followers_rate = generate_followers_growth_rate_plot(sample_follower_stats_df.copy(), date_column='date', count_column='follower_count_o', type_value='follower_gains_monthly')
|
578 |
+
if fig_followers_rate: logging.info("Followers Growth Rate (monthly) plot generated.")
|
579 |
|
580 |
+
fig_geo = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_geo', plot_title="Followers by Location")
|
581 |
+
if fig_geo: logging.info("Followers by Location plot generated.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
|
583 |
+
fig_role = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_function', plot_title="Followers by Role")
|
584 |
+
if fig_role: logging.info("Followers by Role plot generated.")
|
585 |
+
|
586 |
+
fig_industry = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_industry', plot_title="Followers by Industry")
|
587 |
+
if fig_industry: logging.info("Followers by Industry plot generated.")
|
588 |
+
|
589 |
+
fig_seniority = generate_followers_by_demographics_plot(sample_follower_stats_df.copy(), type_value='follower_seniority', plot_title="Followers by Seniority")
|
590 |
+
if fig_seniority: logging.info("Followers by Seniority plot generated.")
|
591 |
+
|
592 |
+
fig_eng_rate = generate_engagement_rate_over_time_plot(sample_merged_posts_df.copy())
|
593 |
+
if fig_eng_rate: logging.info("Engagement Rate Over Time plot generated.")
|
594 |
+
|
595 |
+
fig_reach = generate_reach_over_time_plot(sample_merged_posts_df.copy())
|
596 |
+
if fig_reach: logging.info("Reach Over Time (Clicks) plot generated.")
|
597 |
+
|
598 |
+
fig_impressions = generate_impressions_over_time_plot(sample_merged_posts_df.copy())
|
599 |
+
if fig_impressions: logging.info("Impressions Over Time plot generated.")
|
600 |
|
601 |
+
# Test existing total follower growth plot with appropriate data
|
602 |
+
total_followers_df = sample_follower_stats_df[sample_follower_stats_df['follower_count_type'] == 'total_followers_snapshot'].copy()
|
603 |
+
total_followers_df['date'] = pd.to_datetime(total_followers_df['date']) # Ensure date is datetime
|
604 |
+
fig_total_growth = generate_total_follower_growth_plot(total_followers_df, date_column='date', count_column='total_followers')
|
605 |
+
if fig_total_growth: logging.info("Total Follower Growth plot (existing function) generated.")
|
606 |
|
607 |
|
608 |
+
logging.info("Test script finished. Review plots if displayed locally or saved.")
|