GuglielmoTor commited on
Commit
fed4e5b
Β·
verified Β·
1 Parent(s): f252ea9

Create state_manager.py

Browse files
Files changed (1) hide show
  1. state_manager.py +222 -0
state_manager.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # state_manager.py
2
+ """
3
+ Manages the application state, including token processing,
4
+ initial data loading from Bubble, and determining sync requirements.
5
+ """
6
+ import pandas as pd
7
+ import logging
8
+ import os
9
+ from datetime import datetime, timedelta, timezone # Added timezone to ensure it's available
10
+ import gradio as gr
11
+
12
+ # Assuming Bubble_API_Calls contains fetch_linkedin_token_from_bubble and fetch_linkedin_posts_data_from_bubble
13
+ from Bubble_API_Calls import (
14
+ fetch_linkedin_token_from_bubble,
15
+ fetch_linkedin_posts_data_from_bubble
16
+ )
17
+ # Assuming config.py contains all necessary constants
18
+ from config import (
19
+ DEFAULT_INITIAL_FETCH_COUNT, BUBBLE_POST_DATE_COLUMN_NAME, BUBBLE_POSTS_TABLE_NAME,
20
+ BUBBLE_MENTIONS_TABLE_NAME, BUBBLE_MENTIONS_DATE_COLUMN_NAME,
21
+ BUBBLE_FOLLOWER_STATS_TABLE_NAME, FOLLOWER_STATS_TYPE_COLUMN, FOLLOWER_STATS_CATEGORY_COLUMN,
22
+ LINKEDIN_CLIENT_ID_ENV_VAR
23
+ )
24
+
25
+ def check_token_status(token_state):
26
+ """Checks the status of the LinkedIn token."""
27
+ return "βœ… Token available" if token_state and token_state.get("token") else "❌ Token not available"
28
+
29
+ def process_and_store_bubble_token(url_user_token, org_urn, token_state):
30
+ """
31
+ Processes user token, fetches LinkedIn token, fetches existing Bubble data (posts, mentions, follower stats),
32
+ and determines if an initial fetch or update is needed for each data type.
33
+ Updates token state and UI for the sync button.
34
+ """
35
+ logging.info(f"Processing token with URL user token: '{url_user_token}', Org URN: '{org_urn}'")
36
+
37
+ # Initialize or update state safely
38
+ new_state = token_state.copy() if token_state else {
39
+ "token": None, "client_id": None, "org_urn": None,
40
+ "bubble_posts_df": pd.DataFrame(), "fetch_count_for_api": 0,
41
+ "bubble_mentions_df": pd.DataFrame(),
42
+ "bubble_follower_stats_df": pd.DataFrame(),
43
+ "url_user_token_temp_storage": None
44
+ }
45
+ new_state.update({
46
+ "org_urn": org_urn,
47
+ "bubble_posts_df": new_state.get("bubble_posts_df", pd.DataFrame()),
48
+ "fetch_count_for_api": new_state.get("fetch_count_for_api", 0),
49
+ "bubble_mentions_df": new_state.get("bubble_mentions_df", pd.DataFrame()),
50
+ "bubble_follower_stats_df": new_state.get("bubble_follower_stats_df", pd.DataFrame()),
51
+ "url_user_token_temp_storage": url_user_token
52
+ })
53
+
54
+ button_update = gr.update(visible=False, interactive=False, value="πŸ”„ Sync LinkedIn Data") # Default to hidden
55
+
56
+ client_id = os.environ.get(LINKEDIN_CLIENT_ID_ENV_VAR)
57
+ new_state["client_id"] = client_id if client_id else "ENV VAR MISSING"
58
+ if not client_id: logging.error(f"CRITICAL ERROR: '{LINKEDIN_CLIENT_ID_ENV_VAR}' environment variable not set.")
59
+
60
+ # Fetch LinkedIn Token from Bubble
61
+ if url_user_token and "not found" not in url_user_token and "Could not access" not in url_user_token:
62
+ logging.info(f"Attempting to fetch LinkedIn token from Bubble with user token: {url_user_token}")
63
+ try:
64
+ parsed_linkedin_token = fetch_linkedin_token_from_bubble(url_user_token)
65
+ if isinstance(parsed_linkedin_token, dict) and "access_token" in parsed_linkedin_token:
66
+ new_state["token"] = parsed_linkedin_token
67
+ logging.info("βœ… LinkedIn Token successfully fetched from Bubble.")
68
+ else:
69
+ new_state["token"] = None
70
+ logging.warning(f"❌ Failed to fetch a valid LinkedIn token from Bubble. Response: {parsed_linkedin_token}")
71
+ except Exception as e:
72
+ new_state["token"] = None
73
+ logging.error(f"❌ Exception while fetching LinkedIn token from Bubble: {e}", exc_info=True)
74
+ else:
75
+ new_state["token"] = None
76
+ logging.info("No valid URL user token provided for LinkedIn token fetch, or an error was indicated.")
77
+
78
+ # Fetch existing data from Bubble if Org URN is available
79
+ current_org_urn = new_state.get("org_urn")
80
+ if current_org_urn:
81
+ # Fetch Posts from Bubble
82
+ logging.info(f"Attempting to fetch posts from Bubble for org_urn: {current_org_urn}")
83
+ try:
84
+ fetched_posts_df, error_message_posts = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_POSTS_TABLE_NAME)
85
+ new_state["bubble_posts_df"] = pd.DataFrame() if error_message_posts or fetched_posts_df is None else fetched_posts_df
86
+ if error_message_posts: logging.warning(f"Error fetching {BUBBLE_POSTS_TABLE_NAME} from Bubble: {error_message_posts}.")
87
+ except Exception as e:
88
+ logging.error(f"❌ Error fetching posts from Bubble: {e}.", exc_info=True)
89
+ new_state["bubble_posts_df"] = pd.DataFrame()
90
+
91
+ # Fetch Mentions from Bubble
92
+ logging.info(f"Attempting to fetch mentions from Bubble for org_urn: {current_org_urn}")
93
+ try:
94
+ fetched_mentions_df, error_message_mentions = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_MENTIONS_TABLE_NAME)
95
+ new_state["bubble_mentions_df"] = pd.DataFrame() if error_message_mentions or fetched_mentions_df is None else fetched_mentions_df
96
+ if error_message_mentions: logging.warning(f"Error fetching {BUBBLE_MENTIONS_TABLE_NAME} from Bubble: {error_message_mentions}.")
97
+ except Exception as e:
98
+ logging.error(f"❌ Error fetching mentions from Bubble: {e}.", exc_info=True)
99
+ new_state["bubble_mentions_df"] = pd.DataFrame()
100
+
101
+ # Fetch Follower Stats from Bubble
102
+ logging.info(f"Attempting to fetch follower stats from Bubble for org_urn: {current_org_urn}")
103
+ try:
104
+ fetched_follower_stats_df, error_message_fs = fetch_linkedin_posts_data_from_bubble(current_org_urn, BUBBLE_FOLLOWER_STATS_TABLE_NAME)
105
+ new_state["bubble_follower_stats_df"] = pd.DataFrame() if error_message_fs or fetched_follower_stats_df is None else fetched_follower_stats_df
106
+ if error_message_fs: logging.warning(f"Error fetching {BUBBLE_FOLLOWER_STATS_TABLE_NAME} from Bubble: {error_message_fs}.")
107
+ except Exception as e:
108
+ logging.error(f"❌ Error fetching follower stats from Bubble: {e}.", exc_info=True)
109
+ new_state["bubble_follower_stats_df"] = pd.DataFrame()
110
+ else:
111
+ logging.warning("Org URN not available in state. Cannot fetch data from Bubble.")
112
+ new_state["bubble_posts_df"] = pd.DataFrame()
113
+ new_state["bubble_mentions_df"] = pd.DataFrame()
114
+ new_state["bubble_follower_stats_df"] = pd.DataFrame()
115
+
116
+ # Determine fetch count for Posts API
117
+ if new_state["bubble_posts_df"].empty:
118
+ logging.info(f"ℹ️ No posts in Bubble. Setting to fetch initial {DEFAULT_INITIAL_FETCH_COUNT} posts.")
119
+ new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
120
+ else:
121
+ try:
122
+ df_posts_check = new_state["bubble_posts_df"].copy()
123
+ if BUBBLE_POST_DATE_COLUMN_NAME not in df_posts_check.columns or df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME].isnull().all():
124
+ logging.warning(f"Date column '{BUBBLE_POST_DATE_COLUMN_NAME}' for posts missing or all null values. Triggering initial fetch.")
125
+ new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
126
+ else:
127
+ df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME] = pd.to_datetime(df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME], errors='coerce', utc=True)
128
+ last_post_date_utc = df_posts_check[BUBBLE_POST_DATE_COLUMN_NAME].dropna().max()
129
+ if pd.isna(last_post_date_utc):
130
+ logging.warning("No valid post dates found after conversion. Triggering initial fetch.")
131
+ new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
132
+ else:
133
+ days_diff = (pd.Timestamp('now', tz='UTC').normalize() - last_post_date_utc.normalize()).days
134
+ if days_diff >= 7:
135
+ new_state['fetch_count_for_api'] = max(1, days_diff // 7) * 10
136
+ logging.info(f"Posts data is {days_diff} days old. Setting fetch count to {new_state['fetch_count_for_api']}.")
137
+ else:
138
+ new_state['fetch_count_for_api'] = 0
139
+ logging.info("Posts data is recent. No new posts fetch needed based on date.")
140
+ except Exception as e:
141
+ logging.error(f"Error processing post dates: {e}. Defaulting to initial fetch for posts.", exc_info=True)
142
+ new_state['fetch_count_for_api'] = DEFAULT_INITIAL_FETCH_COUNT
143
+
144
+ # Determine if Mentions need sync
145
+ mentions_need_sync = False
146
+ if new_state["bubble_mentions_df"].empty:
147
+ mentions_need_sync = True
148
+ logging.info("Mentions need sync: Bubble mentions DF is empty.")
149
+ else:
150
+ if BUBBLE_MENTIONS_DATE_COLUMN_NAME not in new_state["bubble_mentions_df"].columns or \
151
+ new_state["bubble_mentions_df"][BUBBLE_MENTIONS_DATE_COLUMN_NAME].isnull().all():
152
+ mentions_need_sync = True
153
+ logging.info(f"Mentions need sync: Date column '{BUBBLE_MENTIONS_DATE_COLUMN_NAME}' missing or all null values.")
154
+ else:
155
+ df_mentions_check = new_state["bubble_mentions_df"].copy()
156
+ df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME] = pd.to_datetime(df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME], errors='coerce', utc=True)
157
+ last_mention_date_utc = df_mentions_check[BUBBLE_MENTIONS_DATE_COLUMN_NAME].dropna().max()
158
+ if pd.isna(last_mention_date_utc) or \
159
+ (pd.Timestamp('now', tz='UTC').normalize() - last_mention_date_utc.normalize()).days >= 7:
160
+ mentions_need_sync = True
161
+ logging.info(f"Mentions need sync: Last mention date {last_mention_date_utc} is old or invalid.")
162
+ else:
163
+ logging.info(f"Mentions up-to-date. Last mention: {last_mention_date_utc}")
164
+
165
+ # Determine if Follower Stats need sync
166
+ follower_stats_need_sync = False
167
+ fs_df = new_state.get("bubble_follower_stats_df", pd.DataFrame())
168
+ if fs_df.empty:
169
+ follower_stats_need_sync = True
170
+ logging.info("Follower stats need sync: Bubble follower stats DF is empty.")
171
+ else:
172
+ monthly_gains_df = fs_df[fs_df[FOLLOWER_STATS_TYPE_COLUMN] == 'follower_gains_monthly'].copy()
173
+ if monthly_gains_df.empty:
174
+ follower_stats_need_sync = True
175
+ logging.info("Follower stats need sync: No monthly gains data in Bubble.")
176
+ elif FOLLOWER_STATS_CATEGORY_COLUMN not in monthly_gains_df.columns:
177
+ follower_stats_need_sync = True
178
+ logging.info(f"Follower stats need sync: Date column '{FOLLOWER_STATS_CATEGORY_COLUMN}' missing in monthly gains.")
179
+ else:
180
+ monthly_gains_df.loc[:, FOLLOWER_STATS_CATEGORY_COLUMN] = pd.to_datetime(monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN], errors='coerce').dt.normalize()
181
+ last_gain_date = monthly_gains_df[FOLLOWER_STATS_CATEGORY_COLUMN].dropna().max()
182
+ if pd.isna(last_gain_date):
183
+ follower_stats_need_sync = True
184
+ logging.info("Follower stats need sync: No valid dates in monthly gains after conversion.")
185
+ else:
186
+ if last_gain_date.tzinfo is None or last_gain_date.tzinfo.utcoffset(last_gain_date) is None:
187
+ last_gain_date = last_gain_date.tz_localize('UTC') # Localize naive to UTC
188
+ else:
189
+ last_gain_date = last_gain_date.tz_convert('UTC') # Convert aware to UTC
190
+
191
+ start_of_current_month = pd.Timestamp('now', tz='UTC').normalize().replace(day=1)
192
+ if last_gain_date < start_of_current_month:
193
+ follower_stats_need_sync = True
194
+ logging.info(f"Follower stats need sync: Last gain date {last_gain_date} is before current month start {start_of_current_month}.")
195
+ else:
196
+ logging.info(f"Follower monthly gains up-to-date. Last gain recorded on: {last_gain_date}")
197
+
198
+ if fs_df[fs_df[FOLLOWER_STATS_TYPE_COLUMN] != 'follower_gains_monthly'].empty:
199
+ follower_stats_need_sync = True
200
+ logging.info("Follower stats need sync: Demographic data (non-monthly types) missing.")
201
+
202
+ # Update Sync Button based on token and needed actions
203
+ sync_actions = []
204
+ if new_state['fetch_count_for_api'] > 0:
205
+ sync_actions.append(f"{new_state['fetch_count_for_api']} Posts")
206
+ if mentions_need_sync: # This flag is set based on data freshness
207
+ sync_actions.append("Mentions")
208
+ if follower_stats_need_sync: # This flag is set based on data freshness
209
+ sync_actions.append("Follower Stats")
210
+
211
+ if new_state["token"] and sync_actions:
212
+ button_label = f"πŸ”„ Sync LinkedIn Data ({', '.join(sync_actions)})"
213
+ button_update = gr.update(value=button_label, visible=True, interactive=True)
214
+ elif new_state["token"]:
215
+ button_label = "βœ… Data Up-to-Date"
216
+ button_update = gr.update(value=button_label, visible=True, interactive=False)
217
+ else:
218
+ button_update = gr.update(visible=False, interactive=False)
219
+
220
+ token_status_message = check_token_status(new_state)
221
+ logging.info(f"Token processing complete. Status: {token_status_message}. Button: {button_update}. Sync actions: {sync_actions}")
222
+ return token_status_message, new_state, button_update