GuglielmoTor commited on
Commit
dce8999
·
verified ·
1 Parent(s): 0cc5a7d

Update data_processing/analytics_data_processing.py

Browse files
data_processing/analytics_data_processing.py CHANGED
@@ -62,7 +62,6 @@ def filter_dataframe_by_date(df, date_column, start_date, end_date):
62
 
63
  return df_filtered_final
64
 
65
-
66
  def prepare_filtered_analytics_data(token_state_value, date_filter_option, custom_start_date, custom_end_date):
67
  """
68
  Retrieves data from token_state, determines date range, filters posts, mentions, and follower time-series data.
@@ -80,11 +79,10 @@ def prepare_filtered_analytics_data(token_state_value, date_filter_option, custo
80
  posts_df = token_state_value.get("bubble_posts_df", pd.DataFrame()).copy()
81
  mentions_df = token_state_value.get("bubble_mentions_df", pd.DataFrame()).copy()
82
  follower_stats_df = token_state_value.get("bubble_follower_stats_df", pd.DataFrame()).copy()
83
- post_stats_df = token_state_value.get("bubble_post_stats_df", pd.DataFrame()).copy() # Fetch post_stats_df
84
 
85
  date_column_posts = token_state_value.get("config_date_col_posts", "published_at")
86
  date_column_mentions = token_state_value.get("config_date_col_mentions", "date")
87
- # Assuming follower_stats_df has a 'date' column for time-series data
88
  date_column_followers = token_state_value.get("config_date_col_followers", "date")
89
 
90
  # Determine date range for filtering
@@ -94,47 +92,48 @@ def prepare_filtered_analytics_data(token_state_value, date_filter_option, custo
94
  end_dt_filter = current_time_normalized
95
  start_dt_filter = None
96
 
97
- if date_filter_option == "Last 7 Days":
 
 
 
98
  start_dt_filter = current_time_normalized - timedelta(days=6)
99
- elif date_filter_option == "Last 30 Days":
100
  start_dt_filter = current_time_normalized - timedelta(days=29)
101
- elif date_filter_option == "Custom Range":
102
  start_dt_filter_temp = pd.to_datetime(custom_start_date, errors='coerce')
103
  start_dt_filter = start_dt_filter_temp.replace(hour=0, minute=0, second=0, microsecond=0) if pd.notna(start_dt_filter_temp) else None
104
 
105
  end_dt_filter_temp = pd.to_datetime(custom_end_date, errors='coerce')
 
106
  end_dt_filter = end_dt_filter_temp.replace(hour=0, minute=0, second=0, microsecond=0) if pd.notna(end_dt_filter_temp) else current_time_normalized
 
107
 
108
  logging.info(f"Date range for filtering: Start: {start_dt_filter}, End: {end_dt_filter}")
109
 
110
  # Merge posts_df and post_stats_df
111
  merged_posts_df = pd.DataFrame()
112
  if not posts_df.empty and not post_stats_df.empty:
113
- # Assuming posts_df has 'id' and post_stats_df has 'post_id' for merging
114
  if 'id' in posts_df.columns and 'post_id' in post_stats_df.columns:
115
  merged_posts_df = pd.merge(posts_df, post_stats_df, left_on='id', right_on='post_id', how='left')
116
  logging.info(f"Merged posts_df ({len(posts_df)} rows) and post_stats_df ({len(post_stats_df)} rows) into merged_posts_df ({len(merged_posts_df)} rows).")
117
  else:
118
  logging.warning("Cannot merge posts_df and post_stats_df due to missing 'id' or 'post_id' columns.")
119
- # Fallback to using posts_df if merge fails but provide an empty df for stats-dependent plots
120
- merged_posts_df = posts_df # Or handle as an error / empty DF for those plots
121
  elif not posts_df.empty:
122
- logging.warning("post_stats_df is empty. Proceeding with posts_df only for plots that don't require stats.")
123
- merged_posts_df = posts_df # Create necessary columns with NaN if they are expected by plots
124
- # For columns expected from post_stats_df, add them with NaNs if not present
125
  expected_stat_cols = ['engagement', 'impressionCount', 'clickCount', 'likeCount', 'commentCount', 'shareCount']
126
  for col in expected_stat_cols:
127
  if col not in merged_posts_df.columns:
128
  merged_posts_df[col] = pd.NA
129
 
130
-
131
  # Filter DataFrames by date
132
  filtered_merged_posts_data = pd.DataFrame()
133
  if not merged_posts_df.empty and date_column_posts in merged_posts_df.columns:
134
  filtered_merged_posts_data = filter_dataframe_by_date(merged_posts_df, date_column_posts, start_dt_filter, end_dt_filter)
135
  elif not merged_posts_df.empty:
136
  logging.warning(f"Date column '{date_column_posts}' not found in merged_posts_df. Returning unfiltered merged posts data.")
137
- filtered_merged_posts_data = merged_posts_df # Or apply other logic
138
 
139
  filtered_mentions_data = pd.DataFrame()
140
  if not mentions_df.empty and date_column_mentions in mentions_df.columns:
@@ -144,14 +143,13 @@ def prepare_filtered_analytics_data(token_state_value, date_filter_option, custo
144
  filtered_mentions_data = mentions_df
145
 
146
  date_filtered_follower_stats_df = pd.DataFrame()
147
- raw_follower_stats_df = follower_stats_df.copy() # For demographic plots, use raw (or latest snapshot logic)
148
 
149
  if not follower_stats_df.empty and date_column_followers in follower_stats_df.columns:
150
  date_filtered_follower_stats_df = filter_dataframe_by_date(follower_stats_df, date_column_followers, start_dt_filter, end_dt_filter)
151
  elif not follower_stats_df.empty:
152
  logging.warning(f"Date column '{date_column_followers}' not found in follower_stats_df. Time-series follower plots might be empty or use unfiltered data.")
153
- # Decide if date_filtered_follower_stats_df should be raw_follower_stats_df or empty
154
- date_filtered_follower_stats_df = follower_stats_df # Or pd.DataFrame() if strict filtering is required
155
 
156
  logging.info(f"Processed - Filtered Merged Posts: {len(filtered_merged_posts_data)} rows, Filtered Mentions: {len(filtered_mentions_data)} rows, Date-Filtered Follower Stats: {len(date_filtered_follower_stats_df)} rows.")
157
 
 
62
 
63
  return df_filtered_final
64
 
 
65
  def prepare_filtered_analytics_data(token_state_value, date_filter_option, custom_start_date, custom_end_date):
66
  """
67
  Retrieves data from token_state, determines date range, filters posts, mentions, and follower time-series data.
 
79
  posts_df = token_state_value.get("bubble_posts_df", pd.DataFrame()).copy()
80
  mentions_df = token_state_value.get("bubble_mentions_df", pd.DataFrame()).copy()
81
  follower_stats_df = token_state_value.get("bubble_follower_stats_df", pd.DataFrame()).copy()
82
+ post_stats_df = token_state_value.get("bubble_post_stats_df", pd.DataFrame()).copy()
83
 
84
  date_column_posts = token_state_value.get("config_date_col_posts", "published_at")
85
  date_column_mentions = token_state_value.get("config_date_col_mentions", "date")
 
86
  date_column_followers = token_state_value.get("config_date_col_followers", "date")
87
 
88
  # Determine date range for filtering
 
92
  end_dt_filter = current_time_normalized
93
  start_dt_filter = None
94
 
95
+ # --- FIX STARTS HERE ---
96
+ # The filter option strings from the UI must exactly match the strings being checked here.
97
+ # The original code checked for "Last 7 Days" but the UI sent "Ultimi 7 Giorni".
98
+ if date_filter_option == "Ultimi 7 Giorni":
99
  start_dt_filter = current_time_normalized - timedelta(days=6)
100
+ elif date_filter_option == "Ultimi 30 Giorni":
101
  start_dt_filter = current_time_normalized - timedelta(days=29)
102
+ elif date_filter_option == "Intervallo Personalizzato":
103
  start_dt_filter_temp = pd.to_datetime(custom_start_date, errors='coerce')
104
  start_dt_filter = start_dt_filter_temp.replace(hour=0, minute=0, second=0, microsecond=0) if pd.notna(start_dt_filter_temp) else None
105
 
106
  end_dt_filter_temp = pd.to_datetime(custom_end_date, errors='coerce')
107
+ # If end date is specified, use it. Otherwise, default to today.
108
  end_dt_filter = end_dt_filter_temp.replace(hour=0, minute=0, second=0, microsecond=0) if pd.notna(end_dt_filter_temp) else current_time_normalized
109
+ # --- FIX ENDS HERE ---
110
 
111
  logging.info(f"Date range for filtering: Start: {start_dt_filter}, End: {end_dt_filter}")
112
 
113
  # Merge posts_df and post_stats_df
114
  merged_posts_df = pd.DataFrame()
115
  if not posts_df.empty and not post_stats_df.empty:
 
116
  if 'id' in posts_df.columns and 'post_id' in post_stats_df.columns:
117
  merged_posts_df = pd.merge(posts_df, post_stats_df, left_on='id', right_on='post_id', how='left')
118
  logging.info(f"Merged posts_df ({len(posts_df)} rows) and post_stats_df ({len(post_stats_df)} rows) into merged_posts_df ({len(merged_posts_df)} rows).")
119
  else:
120
  logging.warning("Cannot merge posts_df and post_stats_df due to missing 'id' or 'post_id' columns.")
121
+ merged_posts_df = posts_df
 
122
  elif not posts_df.empty:
123
+ logging.warning("post_stats_df is empty. Proceeding with posts_df only.")
124
+ merged_posts_df = posts_df
 
125
  expected_stat_cols = ['engagement', 'impressionCount', 'clickCount', 'likeCount', 'commentCount', 'shareCount']
126
  for col in expected_stat_cols:
127
  if col not in merged_posts_df.columns:
128
  merged_posts_df[col] = pd.NA
129
 
 
130
  # Filter DataFrames by date
131
  filtered_merged_posts_data = pd.DataFrame()
132
  if not merged_posts_df.empty and date_column_posts in merged_posts_df.columns:
133
  filtered_merged_posts_data = filter_dataframe_by_date(merged_posts_df, date_column_posts, start_dt_filter, end_dt_filter)
134
  elif not merged_posts_df.empty:
135
  logging.warning(f"Date column '{date_column_posts}' not found in merged_posts_df. Returning unfiltered merged posts data.")
136
+ filtered_merged_posts_data = merged_posts_df
137
 
138
  filtered_mentions_data = pd.DataFrame()
139
  if not mentions_df.empty and date_column_mentions in mentions_df.columns:
 
143
  filtered_mentions_data = mentions_df
144
 
145
  date_filtered_follower_stats_df = pd.DataFrame()
146
+ raw_follower_stats_df = follower_stats_df.copy()
147
 
148
  if not follower_stats_df.empty and date_column_followers in follower_stats_df.columns:
149
  date_filtered_follower_stats_df = filter_dataframe_by_date(follower_stats_df, date_column_followers, start_dt_filter, end_dt_filter)
150
  elif not follower_stats_df.empty:
151
  logging.warning(f"Date column '{date_column_followers}' not found in follower_stats_df. Time-series follower plots might be empty or use unfiltered data.")
152
+ date_filtered_follower_stats_df = follower_stats_df
 
153
 
154
  logging.info(f"Processed - Filtered Merged Posts: {len(filtered_merged_posts_data)} rows, Filtered Mentions: {len(filtered_mentions_data)} rows, Date-Filtered Follower Stats: {len(date_filtered_follower_stats_df)} rows.")
155