File size: 36,854 Bytes
35ed909
 
 
 
 
 
 
 
 
795b267
 
35ed909
795b267
 
 
 
 
 
35ed909
795b267
35ed909
795b267
35ed909
 
 
795b267
 
 
 
 
35ed909
795b267
 
 
 
 
 
 
 
 
35ed909
 
 
795b267
35ed909
795b267
35ed909
 
 
 
 
 
 
 
 
 
795b267
35ed909
795b267
 
 
 
 
 
 
 
35ed909
 
 
795b267
 
 
 
 
 
 
 
 
 
 
 
 
35ed909
795b267
 
 
 
 
 
 
 
35ed909
 
795b267
35ed909
795b267
 
35ed909
 
 
 
 
 
 
 
 
 
dce8999
35ed909
 
795b267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35ed909
 
 
795b267
35ed909
795b267
35ed909
 
dce8999
795b267
dce8999
795b267
dce8999
35ed909
795b267
35ed909
 
 
795b267
35ed909
795b267
35ed909
 
 
 
 
 
 
dce8999
35ed909
dce8999
35ed909
 
 
 
795b267
 
 
 
35ed909
 
795b267
 
35ed909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
#analytics_data_processing.py
import pandas as pd
from datetime import datetime, timedelta, time
import logging
import numpy as np

# Configure logging for this module
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')


# --- CORRECTED FUNCTION START (V2) ---
def filter_dataframe_by_date(df, date_column, start_date, end_date):
    """
    Filters a DataFrame by a date column within a given date range.
    
    This robust version correctly handles both daily ('YYYY-MM-DD') and monthly ('YYYY-MM') 
    date formats by using a two-pass detection system.
    """
    if df is None or df.empty or not date_column:
        logging.warning(f"Filter by date: DataFrame is None, empty, or no date_column provided.")
        return pd.DataFrame()

    if date_column not in df.columns:
        logging.warning(f"Filter by date: Date column '{date_column}' not found in DataFrame columns: {df.columns.tolist()}.")
        return pd.DataFrame()
        
    df_copy = df.copy()

    # --- NEW TWO-PASS DETECTION LOGIC ---
    use_month_logic = False
    
    # Pass 1: Check if all non-null values are 'YYYY-MM' strings. This is fast and specific.
    valid_dates_str = df_copy[date_column].dropna()
    if pd.api.types.is_string_dtype(valid_dates_str.dtype) and not valid_dates_str.empty:
        # This regex ensures the entire string is just 'YYYY-MM'
        if valid_dates_str.str.match(r'^\d{4}-\d{2}$').all():
            use_month_logic = True
            logging.info(f"Filter by date (Pass 1): Detected 'YYYY-MM' string format for column '{date_column}'.")

    # Standardize column to datetime objects for filtering and for the second pass
    try:
        if not pd.api.types.is_datetime64_any_dtype(df_copy[date_column]):
            df_copy[date_column] = pd.to_datetime(df_copy[date_column], errors='coerce')
        
        df_copy.dropna(subset=[date_column], inplace=True)
        
        if df_copy.empty:
            logging.info(f"Filter by date: DataFrame empty after to_datetime and dropna for column '{date_column}'.")
            return pd.DataFrame()

        df_copy[date_column] = df_copy[date_column].dt.normalize()

        if hasattr(df_copy[date_column].dt, 'tz') and df_copy[date_column].dt.tz is not None:
            df_copy[date_column] = df_copy[date_column].dt.tz_convert('UTC').dt.tz_localize(None)
    except Exception as e:
        logging.error(f"Error processing date column '{date_column}': {e}", exc_info=True)
        return pd.DataFrame()

    # Pass 2: If not detected by string format, check if all dates are the 1st of the month.
    if not use_month_logic and not df_copy.empty:
        if (df_copy[date_column].dt.day == 1).all():
            use_month_logic = True
            logging.info(f"Filter by date (Pass 2): All dates in '{date_column}' are 1st of the month. Applying month-range filtering.")
    # --- END OF NEW LOGIC ---

    # Convert filter start/end dates to normalized, naive Timestamps
    start_dt_obj = pd.to_datetime(start_date, errors='coerce').normalize() if start_date else None
    end_dt_obj = pd.to_datetime(end_date, errors='coerce').normalize() if end_date else None

    if not start_dt_obj and not end_dt_obj:
        return df_copy

    # Perform the filtering based on the detected format
    if use_month_logic:
        logging.info(f"Applying month-overlap filtering for column '{date_column}'.")
        # For monthly data, include a row if its month overlaps with the filter range.
        df_copy['end_of_month'] = df_copy[date_column] + pd.offsets.MonthEnd(1)
        filter_start = start_dt_obj if start_dt_obj else pd.Timestamp.min
        filter_end = end_dt_obj if end_dt_obj else pd.Timestamp.max

        mask = (df_copy[date_column] <= filter_end) & (df_copy['end_of_month'] >= filter_start)
        df_filtered_final = df_copy[mask].drop(columns=['end_of_month'])
    else:
        logging.info(f"Applying standard daily filtering for column '{date_column}'.")
        # Standard filtering for daily ('YYYY-MM-DD') data
        df_filtered_final = df_copy
        if start_dt_obj:
            df_filtered_final = df_filtered_final[df_filtered_final[date_column] >= start_dt_obj]
        if end_dt_obj:
            df_filtered_final = df_filtered_final[df_filtered_final[date_column] <= end_dt_obj]

    if df_filtered_final.empty:
        logging.info(f"Filter by date: DataFrame became empty after applying date range to column '{date_column}'.")

    return df_filtered_final
# --- CORRECTED FUNCTION END (V2) ---


def prepare_filtered_analytics_data(token_state_value, date_filter_option, custom_start_date, custom_end_date):
    """
    Retrieves data from token_state, determines date range, filters posts, mentions, and follower time-series data.
    Merges posts with post stats.
    """
    logging.info(f"Preparing filtered analytics data. Filter: {date_filter_option}, Custom Start: {custom_start_date}, Custom End: {custom_end_date}")
    posts_df = token_state_value.get("bubble_posts_df", pd.DataFrame()).copy()
    mentions_df = token_state_value.get("bubble_mentions_df", pd.DataFrame()).copy()
    follower_stats_df = token_state_value.get("bubble_follower_stats_df", pd.DataFrame()).copy()
    post_stats_df = token_state_value.get("bubble_post_stats_df", pd.DataFrame()).copy()
    date_column_posts = token_state_value.get("config_date_col_posts", "published_at")
    date_column_mentions = token_state_value.get("config_date_col_mentions", "date")
    date_column_followers = token_state_value.get("config_date_col_followers", "date")

    # --- NEW: PRE-PROCESSING STEP FOR FOLLOWER STATS ---
    # This block handles the case where date information is in the 'category_name' column.
    if not follower_stats_df.empty and 'category_name' in follower_stats_df.columns:
        logging.info("Pre-processing follower_stats_df: Checking 'category_name' for dates.")
        # Create a series of datetime objects from 'category_name'.
        # 'coerce' will turn any non-date strings into NaT (Not a Time).
        category_as_dates = pd.to_datetime(follower_stats_df['category_name'], errors='coerce')
        
        # Create a boolean mask for rows where the conversion was successful.
        valid_dates_mask = category_as_dates.notna()
        
        # If any dates were found, update the main 'date' column with them.
        if valid_dates_mask.any():
            logging.info(f"Found {valid_dates_mask.sum()} date-like values in 'category_name'. Consolidating them into the '{date_column_followers}' column.")
            # Use .loc[] to update the 'date' column only for the relevant rows.
            follower_stats_df.loc[valid_dates_mask, date_column_followers] = category_as_dates[valid_dates_mask]
    # --- END OF PRE-PROCESSING STEP ---

    # Determine date range for filtering
    current_datetime_obj = datetime.now()
    current_time_normalized = current_datetime_obj.replace(hour=0, minute=0, second=0, microsecond=0)
    
    end_dt_filter = current_time_normalized
    start_dt_filter = None

    if date_filter_option == "Ultimi 7 Giorni":
        start_dt_filter = current_time_normalized - timedelta(days=6)
    elif date_filter_option == "Ultimi 30 Giorni":
        start_dt_filter = current_time_normalized - timedelta(days=29)
    elif date_filter_option == "Intervallo Personalizzato":
        start_dt_filter_temp = pd.to_datetime(custom_start_date, errors='coerce')
        start_dt_filter = start_dt_filter_temp.replace(hour=0, minute=0, second=0, microsecond=0) if pd.notna(start_dt_filter_temp) else None
        
        end_dt_filter_temp = pd.to_datetime(custom_end_date, errors='coerce')
        end_dt_filter = end_dt_filter_temp.replace(hour=0, minute=0, second=0, microsecond=0) if pd.notna(end_dt_filter_temp) else current_time_normalized
        
    logging.info(f"Date range for filtering: Start: {start_dt_filter}, End: {end_dt_filter}")
    
    # Merge posts_df and post_stats_df
    merged_posts_df = pd.DataFrame()
    if not posts_df.empty and not post_stats_df.empty:
        if 'id' in posts_df.columns and 'post_id' in post_stats_df.columns:
            merged_posts_df = pd.merge(posts_df, post_stats_df, left_on='id', right_on='post_id', how='left')
        else:
            logging.warning("Cannot merge posts_df and post_stats_df due to missing 'id' or 'post_id' columns.")
            merged_posts_df = posts_df
    elif not posts_df.empty:
        merged_posts_df = posts_df
        expected_stat_cols = ['engagement', 'impressionCount', 'clickCount', 'likeCount', 'commentCount', 'shareCount']
        for col in expected_stat_cols:
            if col not in merged_posts_df.columns:
                merged_posts_df[col] = pd.NA
                
    # Filter DataFrames by date (now using pre-processed follower_stats_df)
    filtered_merged_posts_data = filter_dataframe_by_date(merged_posts_df, date_column_posts, start_dt_filter, end_dt_filter)
    filtered_mentions_data = filter_dataframe_by_date(mentions_df, date_column_mentions, start_dt_filter, end_dt_filter)
    
    date_filtered_follower_stats_df = pd.DataFrame()
    raw_follower_stats_df = follower_stats_df.copy() # Use a copy of the *original* for raw data
    if not follower_stats_df.empty:
        date_filtered_follower_stats_df = filter_dataframe_by_date(follower_stats_df, date_column_followers, start_dt_filter, end_dt_filter)

    logging.info(f"Processed - Filtered Merged Posts: {len(filtered_merged_posts_data)} rows, Filtered Mentions: {len(filtered_mentions_data)} rows, Date-Filtered Follower Stats: {len(date_filtered_follower_stats_df)} rows.")
    
    return filtered_merged_posts_data, filtered_mentions_data, date_filtered_follower_stats_df, raw_follower_stats_df, start_dt_filter, end_dt_filter

# --- Helper function to generate textual data summaries for chatbot ---
def generate_chatbot_data_summaries(
    plot_configs_list,
    filtered_merged_posts_df,
    filtered_mentions_df,
    date_filtered_follower_stats_df, # Expected to contain 'follower_gains_monthly'
    raw_follower_stats_df,          # Expected to contain other demographics like 'follower_geo', 'follower_industry'
    token_state_value
    ):
    """
    Generates textual summaries for each plot ID to be used by the chatbot,
    based on the corrected understanding of DataFrame structures and follower count columns.
    """
    data_summaries = {}
    
    # --- Date and Config Columns from token_state ---
    # For Posts
    date_col_posts = token_state_value.get("config_date_col_posts", "published_at")
    media_type_col_name = token_state_value.get("config_media_type_col", "media_type")
    eb_labels_col_name = token_state_value.get("config_eb_labels_col", "li_eb_label")
    # For Mentions
    date_col_mentions = token_state_value.get("config_date_col_mentions", "date")
    mentions_sentiment_col = "sentiment_label" # As per user's mention df structure
    
    # For Follower Stats - Actual column names provided by user
    follower_count_organic_col = "follower_count_organic"
    follower_count_paid_col = "follower_count_paid"

    # For Follower Stats (Demographics from raw_follower_stats_df)
    follower_demographics_type_col = "follower_count_type" # Column indicating 'follower_geo', 'follower_industry'
    follower_demographics_category_col = "category_name"   # Column indicating 'USA', 'Technology'

    # For Follower Gains/Growth (from date_filtered_follower_stats_df)
    follower_gains_type_col = "follower_count_type" # Should be 'follower_gains_monthly'
    follower_gains_date_col = "category_name"       # This is 'YYYY-MM-DD'

    # --- Helper: Safely convert to datetime ---
    def safe_to_datetime(series, errors='coerce'):
        return pd.to_datetime(series, errors=errors)

    # --- Prepare DataFrames (copy and convert dates) ---
    if filtered_merged_posts_df is not None and not filtered_merged_posts_df.empty:
        posts_df = filtered_merged_posts_df.copy()
        if date_col_posts in posts_df.columns:
            posts_df[date_col_posts] = safe_to_datetime(posts_df[date_col_posts])
        else:
            logging.warning(f"Date column '{date_col_posts}' not found in posts_df for chatbot summary.")
    else:
        posts_df = pd.DataFrame()

    if filtered_mentions_df is not None and not filtered_mentions_df.empty:
        mentions_df = filtered_mentions_df.copy()
        if date_col_mentions in mentions_df.columns:
            mentions_df[date_col_mentions] = safe_to_datetime(mentions_df[date_col_mentions])
        else:
            logging.warning(f"Date column '{date_col_mentions}' not found in mentions_df for chatbot summary.")
    else:
        mentions_df = pd.DataFrame()

    # For date_filtered_follower_stats_df (monthly gains)
    if date_filtered_follower_stats_df is not None and not date_filtered_follower_stats_df.empty:
        follower_monthly_df = date_filtered_follower_stats_df.copy()
        if follower_gains_type_col in follower_monthly_df.columns:
             follower_monthly_df = follower_monthly_df[follower_monthly_df[follower_gains_type_col] == 'follower_gains_monthly'].copy()
        
        if follower_gains_date_col in follower_monthly_df.columns:
            follower_monthly_df['datetime_obj'] = safe_to_datetime(follower_monthly_df[follower_gains_date_col])
            follower_monthly_df = follower_monthly_df.dropna(subset=['datetime_obj'])
            
            # Calculate total gains
            if follower_count_organic_col in follower_monthly_df.columns and follower_count_paid_col in follower_monthly_df.columns:
                follower_monthly_df[follower_count_organic_col] = pd.to_numeric(follower_monthly_df[follower_count_organic_col], errors='coerce').fillna(0)
                follower_monthly_df[follower_count_paid_col] = pd.to_numeric(follower_monthly_df[follower_count_paid_col], errors='coerce').fillna(0)
                follower_monthly_df['total_monthly_gains'] = follower_monthly_df[follower_count_organic_col] + follower_monthly_df[follower_count_paid_col]
            elif follower_count_organic_col in follower_monthly_df.columns: # Only organic exists
                 follower_monthly_df[follower_count_organic_col] = pd.to_numeric(follower_monthly_df[follower_count_organic_col], errors='coerce').fillna(0)
                 follower_monthly_df['total_monthly_gains'] = follower_monthly_df[follower_count_organic_col]
            elif follower_count_paid_col in follower_monthly_df.columns: # Only paid exists
                 follower_monthly_df[follower_count_paid_col] = pd.to_numeric(follower_monthly_df[follower_count_paid_col], errors='coerce').fillna(0)
                 follower_monthly_df['total_monthly_gains'] = follower_monthly_df[follower_count_paid_col]
            else:
                logging.warning(f"Neither '{follower_count_organic_col}' nor '{follower_count_paid_col}' found in follower_monthly_df for total gains calculation.")
                follower_monthly_df['total_monthly_gains'] = 0 # Avoid KeyError later
        else:
            logging.warning(f"Date column '{follower_gains_date_col}' (from category_name) not found in follower_monthly_df for chatbot summary.")
            if 'datetime_obj' not in follower_monthly_df.columns:
                 follower_monthly_df['datetime_obj'] = pd.NaT 
            if 'total_monthly_gains' not in follower_monthly_df.columns:
                 follower_monthly_df['total_monthly_gains'] = 0
    else:
        follower_monthly_df = pd.DataFrame(columns=[follower_gains_date_col, 'total_monthly_gains', 'datetime_obj'])


    if raw_follower_stats_df is not None and not raw_follower_stats_df.empty:
        follower_demographics_df = raw_follower_stats_df.copy()
        # Calculate total followers for demographics
        if follower_count_organic_col in follower_demographics_df.columns and follower_count_paid_col in follower_demographics_df.columns:
            follower_demographics_df[follower_count_organic_col] = pd.to_numeric(follower_demographics_df[follower_count_organic_col], errors='coerce').fillna(0)
            follower_demographics_df[follower_count_paid_col] = pd.to_numeric(follower_demographics_df[follower_count_paid_col], errors='coerce').fillna(0)
            follower_demographics_df['total_follower_count'] = follower_demographics_df[follower_count_organic_col] + follower_demographics_df[follower_count_paid_col]
        elif follower_count_organic_col in follower_demographics_df.columns:
            follower_demographics_df[follower_count_organic_col] = pd.to_numeric(follower_demographics_df[follower_count_organic_col], errors='coerce').fillna(0)
            follower_demographics_df['total_follower_count'] = follower_demographics_df[follower_count_organic_col]
        elif follower_count_paid_col in follower_demographics_df.columns:
            follower_demographics_df[follower_count_paid_col] = pd.to_numeric(follower_demographics_df[follower_count_paid_col], errors='coerce').fillna(0)
            follower_demographics_df['total_follower_count'] = follower_demographics_df[follower_count_paid_col]
        else:
            logging.warning(f"Neither '{follower_count_organic_col}' nor '{follower_count_paid_col}' found in follower_demographics_df for total count calculation.")
            if 'total_follower_count' not in follower_demographics_df.columns:
                 follower_demographics_df['total_follower_count'] = 0
    else:
        follower_demographics_df = pd.DataFrame()


    for plot_cfg in plot_configs_list:
        plot_id = plot_cfg["id"]
        plot_label = plot_cfg["label"]
        summary_text = f"No specific data summary available for '{plot_label}' for the selected period."

        try:
            # --- FOLLOWER STATS ---
            if plot_id == "followers_count": # Uses follower_monthly_df
                if not follower_monthly_df.empty and 'total_monthly_gains' in follower_monthly_df.columns and 'datetime_obj' in follower_monthly_df.columns and not follower_monthly_df['datetime_obj'].isnull().all():
                    df_summary = follower_monthly_df[['datetime_obj', 'total_monthly_gains']].copy()
                    df_summary['datetime_obj'] = df_summary['datetime_obj'].dt.strftime('%Y-%m-%d')
                    df_summary.rename(columns={'datetime_obj': 'Date', 'total_monthly_gains': 'Total Monthly Gains'}, inplace=True)
                    summary_text = f"Follower Count (Total Monthly Gains):\n{df_summary.sort_values(by='Date').tail(5).to_string(index=False)}"
                else:
                    summary_text = f"Follower count data (total monthly gains) is unavailable or incomplete for '{plot_label}'."
            
            elif plot_id == "followers_growth_rate": # Uses follower_monthly_df
                if not follower_monthly_df.empty and 'total_monthly_gains' in follower_monthly_df.columns and 'datetime_obj' in follower_monthly_df.columns and not follower_monthly_df['datetime_obj'].isnull().all():
                    df_calc = follower_monthly_df.sort_values(by='datetime_obj').copy()
                    # Growth rate is calculated on the total monthly gains (which are changes, not cumulative counts)
                    # To calculate growth rate of followers, we'd need cumulative follower count.
                    # The plot logic also uses pct_change on the gains themselves.
                    # If 'total_monthly_gains' represents the *change* in followers, then pct_change on this is rate of change of gains.
                    # If it represents the *cumulative* followers at that point, then pct_change is follower growth rate.
                    # Assuming 'total_monthly_gains' is the *change* for the month, like the plot logic.
                    df_calc['total_monthly_gains'] = pd.to_numeric(df_calc['total_monthly_gains'], errors='coerce')
                    if len(df_calc) >= 2:
                        # Calculate cumulative sum to get follower count if 'total_monthly_gains' are indeed just gains
                        # If your 'total_monthly_gains' already IS the total follower count at end of month, remove next line
                        # For now, assuming it's GAINS, so we need cumulative for growth rate of total followers.
                        # However, the original plot logic applies pct_change directly to 'follower_gains_monthly'.
                        # Let's stick to pct_change on the gains/count column for consistency with plot.
                        
                        # If 'total_monthly_gains' is the actual follower count for that month:
                        df_calc['growth_rate_monthly'] = df_calc['total_monthly_gains'].pct_change() * 100
                        df_calc['growth_rate_monthly'] = df_calc['growth_rate_monthly'].round(2)
                        df_calc.replace([np.inf, -np.inf], np.nan, inplace=True) # Handle division by zero if a gain was 0
                        
                        df_summary = df_calc[['datetime_obj', 'growth_rate_monthly']].dropna().copy()
                        df_summary['datetime_obj'] = df_summary['datetime_obj'].dt.strftime('%Y-%m-%d')
                        df_summary.rename(columns={'datetime_obj': 'Date', 'growth_rate_monthly': 'Growth Rate (%)'}, inplace=True)
                        if not df_summary.empty:
                            summary_text = f"Follower Growth Rate (Monthly % based on Total Follower Count/Gains):\n{df_summary.sort_values(by='Date').tail(5).to_string(index=False)}"
                        else:
                            summary_text = f"Not enough data points or valid transitions to calculate follower growth rate for '{plot_label}'."
                    else:
                        summary_text = f"Not enough data points (need at least 2) to calculate follower growth rate for '{plot_label}'."
                else:
                    summary_text = f"Follower growth rate data (total monthly gains) is unavailable or incomplete for '{plot_label}'."

            elif plot_id in ["followers_by_location", "followers_by_role", "followers_by_industry", "followers_by_seniority"]:
                demographic_type_map = {
                    "followers_by_location": "follower_geo",
                    "followers_by_role": "follower_function",
                    "followers_by_industry": "follower_industry",
                    "followers_by_seniority": "follower_seniority"
                }
                current_demographic_type = demographic_type_map.get(plot_id)
                if not follower_demographics_df.empty and \
                   follower_demographics_type_col in follower_demographics_df.columns and \
                   follower_demographics_category_col in follower_demographics_df.columns and \
                   'total_follower_count' in follower_demographics_df.columns: # Check for the calculated total
                    
                    df_filtered_demographics = follower_demographics_df[
                        follower_demographics_df[follower_demographics_type_col] == current_demographic_type
                    ].copy()

                    if not df_filtered_demographics.empty:
                        df_summary = df_filtered_demographics.groupby(follower_demographics_category_col)['total_follower_count'].sum().reset_index()
                        df_summary.rename(columns={follower_demographics_category_col: 'Category', 'total_follower_count': 'Total Follower Count'}, inplace=True)
                        top_5 = df_summary.nlargest(5, 'Total Follower Count')
                        summary_text = f"Top 5 {plot_label} (Total Followers):\n{top_5.to_string(index=False)}"
                    else:
                        summary_text = f"No data available for demographic type '{current_demographic_type}' in '{plot_label}'."
                else:
                    summary_text = f"Follower demographic data columns (including total_follower_count) are missing or incomplete for '{plot_label}'."

            # --- POSTS STATS ---
            elif plot_id == "engagement_rate":
                if not posts_df.empty and 'engagement' in posts_df.columns and date_col_posts in posts_df.columns and not posts_df[date_col_posts].isnull().all():
                    df_resampled = posts_df.set_index(date_col_posts)['engagement'].resample('W').mean().reset_index()
                    df_resampled['engagement'] = pd.to_numeric(df_resampled['engagement'], errors='coerce').round(2)
                    df_summary = df_resampled[[date_col_posts, 'engagement']].dropna().copy()
                    df_summary[date_col_posts] = df_summary[date_col_posts].dt.strftime('%Y-%m-%d')
                    summary_text = f"Engagement Rate Over Time (Weekly Avg %):\n{df_summary.sort_values(by=date_col_posts).tail(5).to_string(index=False)}"
                else:
                    summary_text = f"Engagement rate data is unavailable for '{plot_label}'."
            
            elif plot_id == "reach_over_time": 
                if not posts_df.empty and 'reach' in posts_df.columns and date_col_posts in posts_df.columns and not posts_df[date_col_posts].isnull().all():
                    df_resampled = posts_df.set_index(date_col_posts)['reach'].resample('W').sum().reset_index()
                    df_resampled['reach'] = pd.to_numeric(df_resampled['reach'], errors='coerce')
                    df_summary = df_resampled[[date_col_posts, 'reach']].dropna().copy()
                    df_summary[date_col_posts] = df_summary[date_col_posts].dt.strftime('%Y-%m-%d')
                    summary_text = f"Reach Over Time (Weekly Sum):\n{df_summary.sort_values(by=date_col_posts).tail(5).to_string(index=False)}"
                else:
                    summary_text = f"Reach data is unavailable for '{plot_label}'."

            elif plot_id == "impressions_over_time": 
                if not posts_df.empty and 'impressionCount' in posts_df.columns and date_col_posts in posts_df.columns and not posts_df[date_col_posts].isnull().all():
                    df_resampled = posts_df.set_index(date_col_posts)['impressionCount'].resample('W').sum().reset_index()
                    df_resampled['impressionCount'] = pd.to_numeric(df_resampled['impressionCount'], errors='coerce')
                    df_summary = df_resampled[[date_col_posts, 'impressionCount']].dropna().copy()
                    df_summary[date_col_posts] = df_summary[date_col_posts].dt.strftime('%Y-%m-%d')
                    df_summary.rename(columns={'impressionCount': 'Impressions'}, inplace=True)
                    summary_text = f"Impressions Over Time (Weekly Sum):\n{df_summary.sort_values(by=date_col_posts).tail(5).to_string(index=False)}"
                else:
                    summary_text = f"Impressions data is unavailable for '{plot_label}'."

            elif plot_id == "likes_over_time": 
                if not posts_df.empty and 'likeCount' in posts_df.columns and date_col_posts in posts_df.columns and not posts_df[date_col_posts].isnull().all():
                    df_resampled = posts_df.set_index(date_col_posts)['likeCount'].resample('W').sum().reset_index()
                    df_resampled['likeCount'] = pd.to_numeric(df_resampled['likeCount'], errors='coerce')
                    df_summary = df_resampled[[date_col_posts, 'likeCount']].dropna().copy()
                    df_summary[date_col_posts] = df_summary[date_col_posts].dt.strftime('%Y-%m-%d')
                    df_summary.rename(columns={'likeCount': 'Likes'}, inplace=True)
                    summary_text = f"Likes Over Time (Weekly Sum):\n{df_summary.sort_values(by=date_col_posts).tail(5).to_string(index=False)}"
                else:
                    summary_text = f"Likes data is unavailable for '{plot_label}'."

            elif plot_id == "clicks_over_time": 
                if not posts_df.empty and 'clickCount' in posts_df.columns and date_col_posts in posts_df.columns and not posts_df[date_col_posts].isnull().all():
                    df_resampled = posts_df.set_index(date_col_posts)['clickCount'].resample('W').sum().reset_index()
                    df_resampled['clickCount'] = pd.to_numeric(df_resampled['clickCount'], errors='coerce')
                    df_summary = df_resampled[[date_col_posts, 'clickCount']].dropna().copy()
                    df_summary[date_col_posts] = df_summary[date_col_posts].dt.strftime('%Y-%m-%d')
                    df_summary.rename(columns={'clickCount': 'Clicks'}, inplace=True)
                    summary_text = f"Clicks Over Time (Weekly Sum):\n{df_summary.sort_values(by=date_col_posts).tail(5).to_string(index=False)}"
                else:
                    summary_text = f"Clicks data is unavailable for '{plot_label}'."

            elif plot_id == "shares_over_time": 
                if not posts_df.empty and 'shareCount' in posts_df.columns and date_col_posts in posts_df.columns and not posts_df[date_col_posts].isnull().all():
                    df_resampled = posts_df.set_index(date_col_posts)['shareCount'].resample('W').sum().reset_index()
                    df_resampled['shareCount'] = pd.to_numeric(df_resampled['shareCount'], errors='coerce')
                    df_summary = df_resampled[[date_col_posts, 'shareCount']].dropna().copy()
                    df_summary[date_col_posts] = df_summary[date_col_posts].dt.strftime('%Y-%m-%d')
                    df_summary.rename(columns={'shareCount': 'Shares'}, inplace=True)
                    summary_text = f"Shares Over Time (Weekly Sum):\n{df_summary.sort_values(by=date_col_posts).tail(5).to_string(index=False)}"
                elif 'shareCount' not in posts_df.columns and not posts_df.empty : # Check if posts_df is not empty before assuming column is the only issue
                     summary_text = f"Shares data column ('shareCount') not found for '{plot_label}'."
                else:
                    summary_text = f"Shares data is unavailable for '{plot_label}'."
            
            elif plot_id == "comments_over_time": 
                if not posts_df.empty and 'commentCount' in posts_df.columns and date_col_posts in posts_df.columns and not posts_df[date_col_posts].isnull().all():
                    df_resampled = posts_df.set_index(date_col_posts)['commentCount'].resample('W').sum().reset_index()
                    df_resampled['commentCount'] = pd.to_numeric(df_resampled['commentCount'], errors='coerce')
                    df_summary = df_resampled[[date_col_posts, 'commentCount']].dropna().copy()
                    df_summary[date_col_posts] = df_summary[date_col_posts].dt.strftime('%Y-%m-%d')
                    df_summary.rename(columns={'commentCount': 'Comments'}, inplace=True)
                    summary_text = f"Comments Over Time (Weekly Sum):\n{df_summary.sort_values(by=date_col_posts).tail(5).to_string(index=False)}"
                else:
                    summary_text = f"Comments data is unavailable for '{plot_label}'."

            elif plot_id == "comments_sentiment": 
                comment_sentiment_col_posts = "sentiment" 
                if not posts_df.empty and comment_sentiment_col_posts in posts_df.columns:
                    sentiment_counts = posts_df[comment_sentiment_col_posts].value_counts().reset_index()
                    sentiment_counts.columns = ['Sentiment', 'Count']
                    summary_text = f"Comments Sentiment Breakdown (Posts Data):\n{sentiment_counts.to_string(index=False)}"
                else:
                    summary_text = f"Comment sentiment data ('{comment_sentiment_col_posts}') is unavailable for '{plot_label}'."
            
            elif plot_id == "post_frequency_cs":
                if not posts_df.empty and date_col_posts in posts_df.columns and not posts_df[date_col_posts].isnull().all():
                    post_counts_weekly = posts_df.set_index(date_col_posts).resample('W').size().reset_index(name='post_count')
                    post_counts_weekly.rename(columns={date_col_posts: 'Week', 'post_count': 'Posts'}, inplace=True)
                    post_counts_weekly['Week'] = post_counts_weekly['Week'].dt.strftime('%Y-%m-%d (Week of)')
                    summary_text = f"Post Frequency (Weekly):\n{post_counts_weekly.sort_values(by='Week').tail(5).to_string(index=False)}"
                else:
                    summary_text = f"Post frequency data is unavailable for '{plot_label}'."

            elif plot_id == "content_format_breakdown_cs":
                if not posts_df.empty and media_type_col_name in posts_df.columns:
                    format_counts = posts_df[media_type_col_name].value_counts().reset_index()
                    format_counts.columns = ['Format', 'Count']
                    summary_text = f"Content Format Breakdown:\n{format_counts.nlargest(5, 'Count').to_string(index=False)}"
                else:
                    summary_text = f"Content format data ('{media_type_col_name}') is unavailable for '{plot_label}'."

            elif plot_id == "content_topic_breakdown_cs":
                if not posts_df.empty and eb_labels_col_name in posts_df.columns:
                    try:
                        # Ensure the column is not all NaN before trying to check for lists or explode
                        if posts_df[eb_labels_col_name].notna().any():
                            if posts_df[eb_labels_col_name].apply(lambda x: isinstance(x, list)).any():
                                topic_counts = posts_df.explode(eb_labels_col_name)[eb_labels_col_name].value_counts().reset_index()
                            else: 
                                topic_counts = posts_df[eb_labels_col_name].value_counts().reset_index()
                            topic_counts.columns = ['Topic', 'Count']
                            summary_text = f"Content Topic Breakdown (Top 5):\n{topic_counts.nlargest(5, 'Count').to_string(index=False)}"
                        else:
                            summary_text = f"Content topic data ('{eb_labels_col_name}') contains no valid topics for '{plot_label}'."
                    except Exception as e_topic:
                        logging.warning(f"Could not process topic breakdown for '{eb_labels_col_name}': {e_topic}")
                        summary_text = f"Content topic data ('{eb_labels_col_name}') could not be processed for '{plot_label}'."
                else:
                    summary_text = f"Content topic data ('{eb_labels_col_name}') is unavailable for '{plot_label}'."

            # --- MENTIONS STATS ---
            elif plot_id == "mention_analysis_volume": 
                if not mentions_df.empty and date_col_mentions in mentions_df.columns and not mentions_df[date_col_mentions].isnull().all():
                    mentions_over_time = mentions_df.set_index(date_col_mentions).resample('W').size().reset_index(name='mention_count')
                    mentions_over_time.rename(columns={date_col_mentions: 'Week', 'mention_count': 'Mentions'}, inplace=True)
                    mentions_over_time['Week'] = mentions_over_time['Week'].dt.strftime('%Y-%m-%d (Week of)')
                    if not mentions_over_time.empty:
                        summary_text = f"Mentions Volume (Weekly):\n{mentions_over_time.sort_values(by='Week').tail(5).to_string(index=False)}"
                    else:
                        summary_text = f"No mention activity found for '{plot_label}' in the selected period."
                else:
                    summary_text = f"Mentions volume data is unavailable for '{plot_label}'."
            
            elif plot_id == "mention_analysis_sentiment": 
                if not mentions_df.empty and mentions_sentiment_col in mentions_df.columns:
                    sentiment_counts = mentions_df[mentions_sentiment_col].value_counts().reset_index()
                    sentiment_counts.columns = ['Sentiment', 'Count']
                    summary_text = f"Mentions Sentiment Breakdown:\n{sentiment_counts.to_string(index=False)}"
                else:
                    summary_text = f"Mention sentiment data ('{mentions_sentiment_col}') is unavailable for '{plot_label}'."

            data_summaries[plot_id] = summary_text
        except KeyError as e:
            logging.warning(f"KeyError generating summary for {plot_id} ('{plot_label}'): {e}. Using default summary.")
            data_summaries[plot_id] = f"Data summary generation error for '{plot_label}' (missing column: {e})."
        except Exception as e:
            logging.error(f"Error generating summary for {plot_id} ('{plot_label}'): {e}", exc_info=True)
            data_summaries[plot_id] = f"Error generating data summary for '{plot_label}'."
            
    return data_summaries