Spaces:
Running
Running
Create analytics_plot_generators.py
Browse files- analytics_plot_generators.py +318 -0
analytics_plot_generators.py
ADDED
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
import logging
|
4 |
+
from io import BytesIO
|
5 |
+
import base64
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
# Configure logging for this module
|
9 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
|
10 |
+
|
11 |
+
def create_placeholder_plot(title="No Data or Plot Error", message="Data might be empty or an error occurred."):
|
12 |
+
"""Creates a placeholder Matplotlib plot indicating no data or an error."""
|
13 |
+
try:
|
14 |
+
fig, ax = plt.subplots(figsize=(8, 4))
|
15 |
+
ax.text(0.5, 0.5, f"{title}\n{message}", ha='center', va='center', fontsize=10, wrap=True)
|
16 |
+
ax.axis('off')
|
17 |
+
plt.tight_layout()
|
18 |
+
return fig
|
19 |
+
except Exception as e:
|
20 |
+
logging.error(f"Error creating placeholder plot: {e}")
|
21 |
+
fig, ax = plt.subplots()
|
22 |
+
ax.text(0.5, 0.5, "Plot generation error", ha='center', va='center')
|
23 |
+
ax.axis('off')
|
24 |
+
return fig
|
25 |
+
finally:
|
26 |
+
# plt.close(fig) # Close the specific figure to free memory
|
27 |
+
# More robustly, Gradio handles figure objects, explicit close might not always be needed here
|
28 |
+
# but plt.close('all') in calling functions or after a block of plot generations is safer.
|
29 |
+
pass
|
30 |
+
|
31 |
+
|
32 |
+
def generate_posts_activity_plot(df, date_column='published_at'): # Default changed as per common use
|
33 |
+
"""
|
34 |
+
Generates a plot for posts activity over time.
|
35 |
+
Assumes df has a date_column (e.g., 'published_at') and groups by date to count posts.
|
36 |
+
"""
|
37 |
+
logging.info(f"Generating posts activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
38 |
+
if df is None or df.empty:
|
39 |
+
logging.warning(f"Posts activity: DataFrame is empty.")
|
40 |
+
return create_placeholder_plot(title="Posts Activity Over Time", message="No data available for the selected period.")
|
41 |
+
if date_column not in df.columns:
|
42 |
+
logging.warning(f"Posts activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.")
|
43 |
+
return create_placeholder_plot(title="Posts Activity Over Time", message=f"Date column '{date_column}' not found.")
|
44 |
+
|
45 |
+
try:
|
46 |
+
df_copy = df.copy()
|
47 |
+
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
48 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
49 |
+
|
50 |
+
df_copy = df_copy.dropna(subset=[date_column])
|
51 |
+
if df_copy.empty:
|
52 |
+
logging.info("Posts activity: DataFrame empty after NaNs dropped from date column.")
|
53 |
+
return create_placeholder_plot(title="Posts Activity Over Time", message="No valid date entries found.")
|
54 |
+
|
55 |
+
posts_over_time = df_copy.set_index(date_column).resample('D').size()
|
56 |
+
|
57 |
+
if posts_over_time.empty:
|
58 |
+
logging.info("Posts activity: No posts after resampling by day.")
|
59 |
+
return create_placeholder_plot(title="Posts Activity Over Time", message="No posts in the selected period.")
|
60 |
+
|
61 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
62 |
+
posts_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-')
|
63 |
+
ax.set_title('Posts Activity Over Time')
|
64 |
+
ax.set_xlabel('Date')
|
65 |
+
ax.set_ylabel('Number of Posts')
|
66 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
67 |
+
plt.xticks(rotation=45)
|
68 |
+
plt.tight_layout()
|
69 |
+
logging.info("Successfully generated posts activity plot.")
|
70 |
+
return fig
|
71 |
+
except Exception as e:
|
72 |
+
logging.error(f"Error generating posts activity plot: {e}", exc_info=True)
|
73 |
+
return create_placeholder_plot(title="Posts Activity Error", message=str(e))
|
74 |
+
finally:
|
75 |
+
plt.close('all')
|
76 |
+
|
77 |
+
def generate_engagement_type_plot(df, likes_col='likes_count', comments_col='comments_count', shares_col='shares_count'):
|
78 |
+
"""
|
79 |
+
Generates a bar plot for total engagement types (likes, comments, shares).
|
80 |
+
Input df is expected to be pre-filtered by date if necessary.
|
81 |
+
"""
|
82 |
+
logging.info(f"Generating engagement type plot. Input df rows: {len(df) if df is not None else 'None'}")
|
83 |
+
|
84 |
+
required_cols = [likes_col, comments_col, shares_col]
|
85 |
+
if df is None or df.empty:
|
86 |
+
logging.warning("Engagement type: DataFrame is empty.")
|
87 |
+
return create_placeholder_plot(title="Post Engagement Types", message="No data available for the selected period.")
|
88 |
+
|
89 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
90 |
+
if missing_cols:
|
91 |
+
msg = f"Engagement type: Columns missing: {missing_cols}. Available: {df.columns.tolist()}"
|
92 |
+
logging.warning(msg)
|
93 |
+
return create_placeholder_plot(title="Post Engagement Types", message=msg)
|
94 |
+
|
95 |
+
try:
|
96 |
+
df_copy = df.copy() # Work on a copy
|
97 |
+
for col in required_cols: # Ensure numeric, fill NaNs with 0
|
98 |
+
df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce').fillna(0)
|
99 |
+
|
100 |
+
total_likes = df_copy[likes_col].sum()
|
101 |
+
total_comments = df_copy[comments_col].sum()
|
102 |
+
total_shares = df_copy[shares_col].sum()
|
103 |
+
|
104 |
+
if total_likes == 0 and total_comments == 0 and total_shares == 0:
|
105 |
+
logging.info("Engagement type: All engagement counts are zero.")
|
106 |
+
return create_placeholder_plot(title="Post Engagement Types", message="No engagement data (likes, comments, shares) in the selected period.")
|
107 |
+
|
108 |
+
engagement_data = {
|
109 |
+
'Likes': total_likes,
|
110 |
+
'Comments': total_comments,
|
111 |
+
'Shares': total_shares
|
112 |
+
}
|
113 |
+
|
114 |
+
fig, ax = plt.subplots(figsize=(8, 5))
|
115 |
+
bars = ax.bar(engagement_data.keys(), engagement_data.values(), color=['skyblue', 'lightgreen', 'salmon'])
|
116 |
+
ax.set_title('Total Post Engagement Types')
|
117 |
+
ax.set_xlabel('Engagement Type')
|
118 |
+
ax.set_ylabel('Total Count')
|
119 |
+
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
120 |
+
|
121 |
+
for bar in bars:
|
122 |
+
yval = bar.get_height()
|
123 |
+
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (0.01 * max(engagement_data.values(), default=10)), str(int(yval)), ha='center', va='bottom')
|
124 |
+
|
125 |
+
plt.tight_layout()
|
126 |
+
logging.info("Successfully generated engagement type plot.")
|
127 |
+
return fig
|
128 |
+
except Exception as e:
|
129 |
+
logging.error(f"Error generating engagement type plot: {e}", exc_info=True)
|
130 |
+
return create_placeholder_plot(title="Engagement Type Error", message=str(e))
|
131 |
+
finally:
|
132 |
+
plt.close('all')
|
133 |
+
|
134 |
+
def generate_mentions_activity_plot(df, date_column='date'): # Default changed as per common use
|
135 |
+
"""
|
136 |
+
Generates a plot for mentions activity over time.
|
137 |
+
Assumes df has a date_column (e.g., 'date') and groups by date to count mentions.
|
138 |
+
"""
|
139 |
+
logging.info(f"Generating mentions activity plot. Date column: '{date_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
140 |
+
if df is None or df.empty:
|
141 |
+
logging.warning(f"Mentions activity: DataFrame is empty.")
|
142 |
+
return create_placeholder_plot(title="Mentions Activity Over Time", message="No data available for the selected period.")
|
143 |
+
if date_column not in df.columns:
|
144 |
+
logging.warning(f"Mentions activity: Date column '{date_column}' is missing from DataFrame columns: {df.columns.tolist()}.")
|
145 |
+
return create_placeholder_plot(title="Mentions Activity Over Time", message=f"Date column '{date_column}' not found.")
|
146 |
+
|
147 |
+
try:
|
148 |
+
df_copy = df.copy()
|
149 |
+
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
150 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
151 |
+
|
152 |
+
df_copy = df_copy.dropna(subset=[date_column])
|
153 |
+
if df_copy.empty:
|
154 |
+
logging.info("Mentions activity: DataFrame empty after NaNs dropped from date column.")
|
155 |
+
return create_placeholder_plot(title="Mentions Activity Over Time", message="No valid date entries found.")
|
156 |
+
|
157 |
+
mentions_over_time = df_copy.set_index(date_column).resample('D').size()
|
158 |
+
|
159 |
+
if mentions_over_time.empty:
|
160 |
+
logging.info("Mentions activity: No mentions after resampling by day.")
|
161 |
+
return create_placeholder_plot(title="Mentions Activity Over Time", message="No mentions in the selected period.")
|
162 |
+
|
163 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
164 |
+
mentions_over_time.plot(kind='line', ax=ax, marker='o', linestyle='-', color='purple')
|
165 |
+
ax.set_title('Mentions Activity Over Time')
|
166 |
+
ax.set_xlabel('Date')
|
167 |
+
ax.set_ylabel('Number of Mentions')
|
168 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
169 |
+
plt.xticks(rotation=45)
|
170 |
+
plt.tight_layout()
|
171 |
+
logging.info("Successfully generated mentions activity plot.")
|
172 |
+
return fig
|
173 |
+
except Exception as e:
|
174 |
+
logging.error(f"Error generating mentions activity plot: {e}", exc_info=True)
|
175 |
+
return create_placeholder_plot(title="Mentions Activity Error", message=str(e))
|
176 |
+
finally:
|
177 |
+
plt.close('all')
|
178 |
+
|
179 |
+
def generate_mention_sentiment_plot(df, sentiment_column='sentiment_label'):
|
180 |
+
"""
|
181 |
+
Generates a pie chart for mention sentiment distribution.
|
182 |
+
Input df is expected to be pre-filtered by date if necessary.
|
183 |
+
"""
|
184 |
+
logging.info(f"Generating mention sentiment plot. Sentiment column: '{sentiment_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
185 |
+
|
186 |
+
if df is None or df.empty:
|
187 |
+
logging.warning("Mention sentiment: DataFrame is empty.")
|
188 |
+
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No data available for the selected period.")
|
189 |
+
if sentiment_column not in df.columns:
|
190 |
+
msg = f"Mention sentiment: Column '{sentiment_column}' is missing. Available: {df.columns.tolist()}"
|
191 |
+
logging.warning(msg)
|
192 |
+
return create_placeholder_plot(title="Mention Sentiment Distribution", message=msg)
|
193 |
+
|
194 |
+
try:
|
195 |
+
df_copy = df.copy()
|
196 |
+
sentiment_counts = df_copy[sentiment_column].value_counts()
|
197 |
+
if sentiment_counts.empty:
|
198 |
+
logging.info("Mention sentiment: No sentiment data after value_counts.")
|
199 |
+
return create_placeholder_plot(title="Mention Sentiment Distribution", message="No sentiment data available.")
|
200 |
+
|
201 |
+
fig, ax = plt.subplots(figsize=(8, 5))
|
202 |
+
colors = {'Positive': 'lightgreen', 'Negative': 'salmon', 'Neutral': 'lightskyblue', 'Mixed': 'gold'}
|
203 |
+
pie_colors = [colors.get(label, '#cccccc') for label in sentiment_counts.index] # Default color for unknown sentiments
|
204 |
+
|
205 |
+
|
206 |
+
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=pie_colors)
|
207 |
+
ax.set_title('Mention Sentiment Distribution')
|
208 |
+
ax.axis('equal')
|
209 |
+
plt.tight_layout()
|
210 |
+
logging.info("Successfully generated mention sentiment plot.")
|
211 |
+
return fig
|
212 |
+
except Exception as e:
|
213 |
+
logging.error(f"Error generating mention sentiment plot: {e}", exc_info=True)
|
214 |
+
return create_placeholder_plot(title="Mention Sentiment Error", message=str(e))
|
215 |
+
finally:
|
216 |
+
plt.close('all')
|
217 |
+
|
218 |
+
def generate_follower_growth_plot(df, date_column='date', count_column='total_followers'):
|
219 |
+
"""
|
220 |
+
Generates a plot for follower growth over time.
|
221 |
+
This function receives the *unfiltered* follower DataFrame.
|
222 |
+
"""
|
223 |
+
logging.info(f"Generating follower growth plot. Date col: '{date_column}', Count col: '{count_column}'. Input df rows: {len(df) if df is not None else 'None'}")
|
224 |
+
|
225 |
+
if df is None or df.empty:
|
226 |
+
logging.warning("Follower growth: DataFrame is empty.")
|
227 |
+
return create_placeholder_plot(title="Follower Growth Over Time", message="No follower data available.")
|
228 |
+
if date_column not in df.columns or count_column not in df.columns:
|
229 |
+
missing = []
|
230 |
+
if date_column not in df.columns: missing.append(date_column)
|
231 |
+
if count_column not in df.columns: missing.append(count_column)
|
232 |
+
msg = f"Follower growth: Columns missing: {missing}. Available: {df.columns.tolist()}"
|
233 |
+
logging.warning(msg)
|
234 |
+
return create_placeholder_plot(title="Follower Growth Over Time", message=msg)
|
235 |
+
|
236 |
+
try:
|
237 |
+
df_copy = df.copy()
|
238 |
+
if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
|
239 |
+
df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
|
240 |
+
|
241 |
+
df_copy[count_column] = pd.to_numeric(df_copy[count_column], errors='coerce')
|
242 |
+
df_copy = df_copy.dropna(subset=[date_column, count_column])
|
243 |
+
|
244 |
+
if df_copy.empty:
|
245 |
+
logging.info("Follower growth: DataFrame empty after NaNs dropped from date/count columns.")
|
246 |
+
return create_placeholder_plot(title="Follower Growth Over Time", message="No valid data for follower growth.")
|
247 |
+
|
248 |
+
df_copy = df_copy.sort_values(by=date_column)
|
249 |
+
|
250 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
251 |
+
ax.plot(df_copy[date_column], df_copy[count_column], marker='o', linestyle='-', color='green')
|
252 |
+
ax.set_title('Follower Growth Over Time')
|
253 |
+
ax.set_xlabel('Date')
|
254 |
+
ax.set_ylabel('Total Followers')
|
255 |
+
ax.grid(True, linestyle='--', alpha=0.7)
|
256 |
+
plt.xticks(rotation=45)
|
257 |
+
plt.tight_layout()
|
258 |
+
logging.info("Successfully generated follower growth plot.")
|
259 |
+
return fig
|
260 |
+
except Exception as e:
|
261 |
+
logging.error(f"Error generating follower growth plot: {e}", exc_info=True)
|
262 |
+
return create_placeholder_plot(title="Follower Growth Error", message=str(e))
|
263 |
+
finally:
|
264 |
+
plt.close('all')
|
265 |
+
|
266 |
+
if __name__ == '__main__':
|
267 |
+
# Create dummy data for testing
|
268 |
+
posts_data = {
|
269 |
+
'published_at': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-03', '2023-01-03', '2023-01-03']),
|
270 |
+
'likes_count': [10, 5, 12, 8, 15, 3],
|
271 |
+
'comments_count': [2, 1, 3, 1, 4, 0],
|
272 |
+
'shares_count': [1, 0, 1, 1, 2, 0]
|
273 |
+
}
|
274 |
+
sample_posts_df = pd.DataFrame(posts_data)
|
275 |
+
|
276 |
+
mentions_data = {
|
277 |
+
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-02', '2023-01-03']),
|
278 |
+
'sentiment_label': ['Positive', 'Negative', 'Positive', 'Neutral']
|
279 |
+
}
|
280 |
+
sample_mentions_df = pd.DataFrame(mentions_data)
|
281 |
+
|
282 |
+
follower_data = {
|
283 |
+
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']),
|
284 |
+
'total_followers': [100, 105, 115, 120, 118] # Example data
|
285 |
+
}
|
286 |
+
sample_follower_stats_df = pd.DataFrame(follower_data)
|
287 |
+
|
288 |
+
logging.info("--- Testing Plot Generations ---")
|
289 |
+
|
290 |
+
fig1 = generate_posts_activity_plot(sample_posts_df.copy(), date_column='published_at')
|
291 |
+
if fig1: logging.info("Posts activity plot generated.") # plt.show() for local test
|
292 |
+
|
293 |
+
fig2 = generate_engagement_type_plot(sample_posts_df.copy())
|
294 |
+
if fig2: logging.info("Engagement type plot generated.")
|
295 |
+
|
296 |
+
fig3 = generate_mentions_activity_plot(sample_mentions_df.copy(), date_column='date')
|
297 |
+
if fig3: logging.info("Mentions activity plot generated.")
|
298 |
+
|
299 |
+
fig4 = generate_mention_sentiment_plot(sample_mentions_df.copy())
|
300 |
+
if fig4: logging.info("Mention sentiment plot generated.")
|
301 |
+
|
302 |
+
fig5 = generate_follower_growth_plot(sample_follower_stats_df.copy(), date_column='date', count_column='total_followers')
|
303 |
+
if fig5: logging.info("Follower growth plot generated.")
|
304 |
+
|
305 |
+
logging.info("--- Testing Placeholders ---")
|
306 |
+
fig_placeholder = create_placeholder_plot()
|
307 |
+
if fig_placeholder: logging.info("Placeholder plot generated.")
|
308 |
+
|
309 |
+
empty_df = pd.DataFrame(columns=['published_at']) # Empty df with column
|
310 |
+
fig_empty_posts = generate_posts_activity_plot(empty_df, date_column='published_at')
|
311 |
+
if fig_empty_posts: logging.info("Empty posts activity plot (placeholder) generated.")
|
312 |
+
|
313 |
+
df_no_col = pd.DataFrame({'some_other_date': pd.to_datetime(['2023-01-01'])})
|
314 |
+
fig_no_col_posts = generate_posts_activity_plot(df_no_col, date_column='published_at')
|
315 |
+
if fig_no_col_posts: logging.info("Posts activity with missing column (placeholder) generated.")
|
316 |
+
|
317 |
+
|
318 |
+
logging.info("Test script finished.")
|