GuglielmoTor commited on
Commit
f20ee95
·
verified ·
1 Parent(s): e3cbb18

Create analytics_plot_generators.py

Browse files
Files changed (1) hide show
  1. analytics_plot_generators.py +318 -0
analytics_plot_generators.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import matplotlib.pyplot as plt
3
+ import logging
4
+ from io import BytesIO
5
+ import base64
6
+ import numpy as np
7
+
8
+ # Configure logging for this module
9
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
10
+
11
+ def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
12
+ """Creates a placeholder Matplotlib plot indicating no data or an error."""
13
+ try:
14
+ fig, ax = plt.subplots(figsize=(8, 4))
15
+ ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True)
16
+ ax.axis('off')
17
+ plt.tight_layout()
18
+ return fig
19
+ except Exception as e:
20
+ logging.error(f"Error creating placeholder plot: {e}")
21
+ fig, ax = plt.subplots()
22
+ ax.text(0.5, 0.5, "Plot generation error", ha='center', va='center')
23
+ ax.axis('off')
24
+ return fig
25
+ finally:
26
+ # plt.close(fig) # Close the specific figure to free memory
27
+ # More robustly, Gradio handles figure objects, explicit close might not always be needed here
28
+ # but plt.close('all') in calling functions or after a block of plot generations is safer.
29
+ pass
30
+
31
+
32
+ def generate_posts_activity_plot(df, date_column='published_at'): # Default changed as per common use
33
+ """
34
+ Generates a plot for posts activity over time.
35
+ Assumes df has a date_column (e.g., 'published_at') and groups by date to count posts.
36
+ """
37
+ logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
38
+ if df is None or df.empty:
39
+ logging.warning(f"Posts activity: DataFrame is empty.")
40
+ return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.")
41
+ if date_column not in df.columns:
42
+ logging.warning(f"Posts activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.")
43
+ return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
44
+
45
+ try:
46
+ df_copy = df.copy()
47
+ if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
48
+ df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
49
+
50
+ df_copy = df_copy.dropna(subset=[date_column])
51
+ if df_copy.empty:
52
+ logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.")
53
+ return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")
54
+
55
+ posts_over_time = df_copy.set_index(date_column).resample('D').size()
56
+
57
+ if posts_over_time.empty:
58
+ logging.info("Posts activity: No posts after resampling by day.")
59
+ return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
60
+
61
+ fig, ax = plt.subplots(figsize=(10, 5))
62
+ posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
63
+ ax.set_title('Posts Activity Over Time')
64
+ ax.set_xlabel('Date')
65
+ ax.set_ylabel('Number of Posts')
66
+ ax.grid(True, linestyle='--', alpha=0.7)
67
+ plt.xticks(rotation=45)
68
+ plt.tight_layout()
69
+ logging.info("Successfully generated posts activity plot.")
70
+ return fig
71
+ except Exception as e:
72
+ logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
73
+ return create_placeholder_plot(title="Posts Activity Error", message=str(e))
74
+ finally:
75
+ plt.close('all')
76
+
77
+ def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='comments_count', shares_col='shares_count'):
78
+ """
79
+ Generates a bar plot for total engagement types (likes, comments, shares).
80
+ Input df is expected to be pre-filtered by date if necessary.
81
+ """
82
+ logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
83
+
84
+ required_cols = [likes_col, comments_col, shares_col]
85
+ if df is None or df.empty:
86
+ logging.warning("Engagement type: DataFrame is empty.")
87
+ return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.")
88
+
89
+ missing_cols = [col for col in required_cols if col not in df.columns]
90
+ if missing_cols:
91
+ msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}"
92
+ logging.warning(msg)
93
+ return create_placeholder_plot(title="Post Engagement Types", message=msg)
94
+
95
+ try:
96
+ df_copy = df.copy() # Work on a copy
97
+ for col in required_cols: # Ensure numeric, fill NaNs with 0
98
+ df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
99
+
100
+ total_likes = df_copy[likes_col].sum()
101
+ total_comments = df_copy[comments_col].sum()
102
+ total_shares = df_copy[shares_col].sum()
103
+
104
+ if total_likes == 0 and total_comments == 0 and total_shares == 0:
105
+ logging.info("Engagement type: All engagement counts are zero.")
106
+ return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.")
107
+
108
+ engagement_data = {
109
+ 'Likes': total_likes,
110
+ 'Comments': total_comments,
111
+ 'Shares': total_shares
112
+ }
113
+
114
+ fig, ax = plt.subplots(figsize=(8, 5))
115
+ bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon'])
116
+ ax.set_title('Total Post Engagement Types')
117
+ ax.set_xlabel('Engagement Type')
118
+ ax.set_ylabel('Total Count')
119
+ ax.grid(axis='y', linestyle='--', alpha=0.7)
120
+
121
+ for bar in bars:
122
+ yval = bar.get_height()
123
+ ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom')
124
+
125
+ plt.tight_layout()
126
+ logging.info("Successfully generated engagement type plot.")
127
+ return fig
128
+ except Exception as e:
129
+ logging.error(f"Error generating engagement type plot: {e}", exc_info=True)
130
+ return create_placeholder_plot(title="Engagement Type Error", message=str(e))
131
+ finally:
132
+ plt.close('all')
133
+
134
+ def generate_mentions_activity_plot(df, date_column='date'): # Default changed as per common use
135
+ """
136
+ Generates a plot for mentions activity over time.
137
+ Assumes df has a date_column (e.g., 'date') and groups by date to count mentions.
138
+ """
139
+ logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
140
+ if df is None or df.empty:
141
+ logging.warning(f"Mentions activity: DataFrame is empty.")
142
+ return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.")
143
+ if date_column not in df.columns:
144
+ logging.warning(f"Mentions activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.")
145
+ return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
146
+
147
+ try:
148
+ df_copy = df.copy()
149
+ if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
150
+ df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
151
+
152
+ df_copy = df_copy.dropna(subset=[date_column])
153
+ if df_copy.empty:
154
+ logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.")
155
+ return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")
156
+
157
+ mentions_over_time = df_copy.set_index(date_column).resample('D').size()
158
+
159
+ if mentions_over_time.empty:
160
+ logging.info("Mentions activity: No mentions after resampling by day.")
161
+ return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")
162
+
163
+ fig, ax = plt.subplots(figsize=(10, 5))
164
+ mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple')
165
+ ax.set_title('Mentions Activity Over Time')
166
+ ax.set_xlabel('Date')
167
+ ax.set_ylabel('Number of Mentions')
168
+ ax.grid(True, linestyle='--', alpha=0.7)
169
+ plt.xticks(rotation=45)
170
+ plt.tight_layout()
171
+ logging.info("Successfully generated mentions activity plot.")
172
+ return fig
173
+ except Exception as e:
174
+ logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
175
+ return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
176
+ finally:
177
+ plt.close('all')
178
+
179
+ def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
180
+ """
181
+ Generates a pie chart for mention sentiment distribution.
182
+ Input df is expected to be pre-filtered by date if necessary.
183
+ """
184
+ logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
185
+
186
+ if df is None or df.empty:
187
+ logging.warning("Mention sentiment: DataFrame is empty.")
188
+ return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.")
189
+ if sentiment_column not in df.columns:
190
+ msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}"
191
+ logging.warning(msg)
192
+ return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg)
193
+
194
+ try:
195
+ df_copy = df.copy()
196
+ sentiment_counts = df_copy[sentiment_column].value_counts()
197
+ if sentiment_counts.empty:
198
+ logging.info("Mention sentiment: No sentiment data after value_counts.")
199
+ return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
200
+
201
+ fig, ax = plt.subplots(figsize=(8, 5))
202
+ colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'}
203
+ pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index] # Default color for unknown sentiments
204
+
205
+
206
+ ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
207
+ ax.set_title('Mention Sentiment Distribution')
208
+ ax.axis('equal')
209
+ plt.tight_layout()
210
+ logging.info("Successfully generated mention sentiment plot.")
211
+ return fig
212
+ except Exception as e:
213
+ logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
214
+ return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
215
+ finally:
216
+ plt.close('all')
217
+
218
+ def generate_follower_growth_plot(df, date_column='date', count_column='total_followers'):
219
+ """
220
+ Generates a plot for follower growth over time.
221
+ This function receives the *unfiltered* follower DataFrame.
222
+ """
223
+ logging.info(f"Generating follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. Input df rows: {len(df) if df is not None else 'None'}")
224
+
225
+ if df is None or df.empty:
226
+ logging.warning("Follower growth: DataFrame is empty.")
227
+ return create_placeholder_plot(title="Follower Growth Over Time", message="No follower data available.")
228
+ if date_column not in df.columns or count_column not in df.columns:
229
+ missing = []
230
+ if date_column not in df.columns: missing.append(date_column)
231
+ if count_column not in df.columns: missing.append(count_column)
232
+ msg = f"Follower growth: Columns missing: {missing}. Available: {df.columns.tolist()}"
233
+ logging.warning(msg)
234
+ return create_placeholder_plot(title="Follower Growth Over Time", message=msg)
235
+
236
+ try:
237
+ df_copy = df.copy()
238
+ if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
239
+ df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
240
+
241
+ df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce')
242
+ df_copy = df_copy.dropna(subset=[date_column, count_column])
243
+
244
+ if df_copy.empty:
245
+ logging.info("Follower growth: DataFrame empty after NaNs dropped from date/count columns.")
246
+ return create_placeholder_plot(title="Follower Growth Over Time", message="No valid data for follower growth.")
247
+
248
+ df_copy = df_copy.sort_values(by=date_column)
249
+
250
+ fig, ax = plt.subplots(figsize=(10, 5))
251
+ ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green')
252
+ ax.set_title('Follower Growth Over Time')
253
+ ax.set_xlabel('Date')
254
+ ax.set_ylabel('Total Followers')
255
+ ax.grid(True, linestyle='--', alpha=0.7)
256
+ plt.xticks(rotation=45)
257
+ plt.tight_layout()
258
+ logging.info("Successfully generated follower growth plot.")
259
+ return fig
260
+ except Exception as e:
261
+ logging.error(f"Error generating follower growth plot: {e}", exc_info=True)
262
+ return create_placeholder_plot(title="Follower Growth Error", message=str(e))
263
+ finally:
264
+ plt.close('all')
265
+
266
+ if __name__ == '__main__':
267
+ # Create dummy data for testing
268
+ posts_data = {
269
+ 'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03']),
270
+ 'likes_count': [10, 5, 12, 8, 15, 3],
271
+ 'comments_count': [2, 1, 3, 1, 4, 0],
272
+ 'shares_count': [1, 0, 1, 1, 2, 0]
273
+ }
274
+ sample_posts_df = pd.DataFrame(posts_data)
275
+
276
+ mentions_data = {
277
+ 'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
278
+ 'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
279
+ }
280
+ sample_mentions_df = pd.DataFrame(mentions_data)
281
+
282
+ follower_data = {
283
+ 'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']),
284
+ 'total_followers': [100, 105, 115, 120, 118] # Example data
285
+ }
286
+ sample_follower_stats_df = pd.DataFrame(follower_data)
287
+
288
+ logging.info("--- Testing Plot Generations ---")
289
+
290
+ fig1 = generate_posts_activity_plot(sample_posts_df.copy(), date_column='published_at')
291
+ if fig1: logging.info("Posts activity plot generated.") # plt.show() for local test
292
+
293
+ fig2 = generate_engagement_type_plot(sample_posts_df.copy())
294
+ if fig2: logging.info("Engagement type plot generated.")
295
+
296
+ fig3 = generate_mentions_activity_plot(sample_mentions_df.copy(), date_column='date')
297
+ if fig3: logging.info("Mentions activity plot generated.")
298
+
299
+ fig4 = generate_mention_sentiment_plot(sample_mentions_df.copy())
300
+ if fig4: logging.info("Mention sentiment plot generated.")
301
+
302
+ fig5 = generate_follower_growth_plot(sample_follower_stats_df.copy(), date_column='date', count_column='total_followers')
303
+ if fig5: logging.info("Follower growth plot generated.")
304
+
305
+ logging.info("--- Testing Placeholders ---")
306
+ fig_placeholder = create_placeholder_plot()
307
+ if fig_placeholder: logging.info("Placeholder plot generated.")
308
+
309
+ empty_df = pd.DataFrame(columns=['published_at']) # Empty df with column
310
+ fig_empty_posts = generate_posts_activity_plot(empty_df, date_column='published_at')
311
+ if fig_empty_posts: logging.info("Empty posts activity plot (placeholder) generated.")
312
+
313
+ df_no_col = pd.DataFrame({'some_other_date': pd.to_datetime(['2023-01-01'])})
314
+ fig_no_col_posts = generate_posts_activity_plot(df_no_col, date_column='published_at')
315
+ if fig_no_col_posts: logging.info("Posts activity with missing column (placeholder) generated.")
316
+
317
+
318
+ logging.info("Test script finished.")