Trisha Tomy commited on
Commit
c24ca69
·
1 Parent(s): cbcd3e0

trying fixes for loading

Browse files
Files changed (1) hide show
  1. src/proxy_lite/browser/browser.py +45 -101
src/proxy_lite/browser/browser.py CHANGED
@@ -179,116 +179,60 @@ class BrowserSession:
179
  # re-run for cases of mid-run redirects
180
  @retry(
181
  wait=wait_exponential(multiplier=1, min=1, max=10),
182
- stop=stop_after_delay(5),
183
  reraise=True,
184
  before_sleep=before_sleep_log(logger, logging.ERROR),
185
  )
186
  async def update_poi(self) -> None:
187
  try:
188
- # Added for robustness based on previous discussions
189
- await self.current_page.wait_for_load_state("networkidle", timeout=180000)
 
190
  logger.debug("wait_for_load_state('networkidle') completed.")
191
 
192
- # This is the line that was previously timing out, now with increased timeout.
193
- # Adding explicit try/except around it for specific debugging.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  try:
195
- await self.current_page.wait_for_selector("body", timeout=180000, state="visible")
196
- logger.debug("wait_for_selector('body', state='visible') completed.")
197
- except PlaywrightTimeoutError as e:
198
- # --- START TEMPORARY DEBUGGING CODE ---
199
- current_url = self.current_page.url if self.current_page else "N/A"
200
- logger.error(f"DEBUGGING: Playwright Timeout (180s) on body selector at URL: {current_url}")
201
-
202
- html_content = None
203
- try:
204
- if self.current_page:
205
- html_content = await self.current_page.content()
206
- # Log only a snippet of HTML to avoid excessively large logs
207
- logger.error(f"DEBUGGING: HTML Content (first 1000 chars) when timeout occurred:\n{html_content[:1000]}...")
208
- except Exception as html_e:
209
- logger.error(f"DEBUGGING: Could not get HTML content: {html_e}")
210
-
211
- screenshot_b64 = "N/A"
212
- try:
213
- if self.current_page:
214
- # Capture screenshot at lower quality to keep log size manageable
215
- screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
216
- screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
217
- # Log only a very short snippet of base64 string
218
- logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
219
- except Exception as ss_e:
220
- logger.error(f"DEBUGGING: Could not take screenshot: {ss_e}")
221
-
222
- # Re-raise the original exception to ensure the task still fails,
223
- # but now with crucial debugging information in the logs.
224
- raise e
225
- # --- END TEMPORARY DEBUGGING CODE ---
226
-
227
- except PlaywrightTimeoutError: # This outer catch is for the wait_for_load_state timeout
228
- logger.error(f"Timeout waiting for website load state (networkidle): {self.current_url}")
229
- raise # Re-raise if load_state itself times out
230
-
231
- except Exception as e:
232
- logger.error(f"An unexpected error occurred during page readiness check: {e}")
233
- raise
234
-
235
- # Run the bounding box javascript code to highlight the points of interest on the page
236
- page_info = await self.current_page.evaluate(
237
- """() => {
238
- overwriteDefaultSelectConvergence();
239
- return findPOIsConvergence();
240
- }""",
241
- )
242
- # Get the points of interest on the page
243
- self.poi_elements = page_info["element_descriptions"]
244
- element_centroids = page_info["element_centroids"]
245
- try:
246
- # Select all iframes on the page
247
- iframes = await self.current_page.query_selector_all("iframe")
248
-
249
- max_iframes = 10
250
-
251
- # Define an asynchronous function to process and filter each iframe
252
- tasks = [asyncio.create_task(self.process_iframe(iframe)) for iframe in iframes[:max_iframes]]
253
 
254
- results = await asyncio.gather(*tasks)
255
-
256
- filtered_results = [result for result in results if result is not None]
257
-
258
- iframes_pois = []
259
- iframe_offsets = []
260
-
261
- for poi, offset in filtered_results:
262
- iframes_pois.append(poi)
263
- iframe_offsets.append(offset)
264
-
265
- # Combine the points of interest from the iframes with the main page and adjust the centroids
266
- for index, iframe_poi in enumerate(iframes_pois):
267
- self.poi_elements.extend(iframe_poi["element_descriptions"])
268
- for centroid in iframe_poi["element_centroids"]:
269
- centroid["x"] += iframe_offsets[index]["x"]
270
- centroid["y"] += iframe_offsets[index]["y"]
271
- centroid["left"] += iframe_offsets[index]["x"]
272
- centroid["top"] += iframe_offsets[index]["y"]
273
- centroid["right"] += iframe_offsets[index]["x"]
274
- centroid["bottom"] += iframe_offsets[index]["y"]
275
- element_centroids.extend(iframe_poi["element_centroids"])
276
-
277
- except Exception as e:
278
- logger.error(f"Error in finding iframes: {e}")
279
-
280
- # Get the centroids of the points of interest
281
- self.poi_centroids = [Point(x=xy["x"], y=xy["y"]) for xy in element_centroids]
282
- self.bounding_boxes = [BoundingBox(**xy, label=str(i)) for i, xy in enumerate(element_centroids)]
283
- self.pois = [
284
- POI(info=info, element_centroid=centroid, bounding_box=bbox)
285
- for info, centroid, bbox in zip(
286
- self.poi_elements,
287
- self.poi_centroids,
288
- self.bounding_boxes,
289
- strict=False,
290
- )
291
- ]
292
 
293
  @property
294
  def poi_text(self) -> str:
 
179
  # re-run for cases of mid-run redirects
180
  @retry(
181
  wait=wait_exponential(multiplier=1, min=1, max=10),
182
+ stop=stop_after_delay(5), # This retry is for the entire update_poi method, in case of intermittent issues
183
  reraise=True,
184
  before_sleep=before_sleep_log(logger, logging.ERROR),
185
  )
186
  async def update_poi(self) -> None:
187
  try:
188
+ # Step 1: Wait for network to be idle. This indicates that initial requests have settled.
189
+ logger.debug("Attempting wait_for_load_state('networkidle')...")
190
+ await self.current_page.wait_for_load_state("networkidle", timeout=180000) # Increased timeout
191
  logger.debug("wait_for_load_state('networkidle') completed.")
192
 
193
+ # Step 2: Wait for the 'loading' class to disappear from the body.
194
+ # This is a common and effective way to detect when SPAs like Salesforce are visually ready.
195
+ logger.debug("Attempting wait_for_selector('body:not(.loading)')...")
196
+ # Removed state="visible" as it's often too strict for 'body' in SPAs,
197
+ # and 'not(.loading)' implies it should become visible eventually.
198
+ await self.current_page.wait_for_selector("body:not(.loading)", timeout=180000)
199
+ logger.debug("wait_for_selector('body:not(.loading)') completed.")
200
+
201
+ # Optional Step 3 (Highly Recommended): If the above still times out,
202
+ # uncomment and replace with a reliable selector for an interactive element
203
+ # that only appears after the Salesforce UI is fully loaded and ready for user input.
204
+ # Example: await self.current_page.wait_for_selector("#some_salesforce_specific_id", timeout=180000, state="visible")
205
+ # Example: await self.current_page.wait_for_selector("text=App Launcher", timeout=180000, state="visible")
206
+ # For now, we'll rely on the 'body:not(.loading)' as the primary indicator.
207
+
208
+ except PlaywrightTimeoutError as e:
209
+ # --- START TEMPORARY DEBUGGING CODE ---
210
+ # This block captures state specifically when a Playwright timeout occurs
211
+ current_url = self.current_page.url if self.current_page else "N/A"
212
+ logger.error(f"DEBUGGING: Playwright Timeout (180s) during page readiness check at URL: {current_url}")
213
+
214
+ html_content = None
215
  try:
216
+ if self.current_page:
217
+ html_content = await self.current_page.content()
218
+ logger.error(f"DEBUGGING: HTML Content (first 1000 chars) when timeout occurred:\n{html_content[:1000]}...")
219
+ except Exception as html_e:
220
+ logger.error(f"DEBUGGING: Could not get HTML content for debug: {html_e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
+ screenshot_b64 = "N/A"
223
+ try:
224
+ if self.current_page:
225
+ # Capture screenshot at lower quality (e.g., 50) to keep log size manageable.
226
+ # Higher quality might make logs too large for some platforms.
227
+ screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
228
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
229
+ # Log only a very short snippet of base64 string to confirm it's there
230
+ logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
231
+
232
+ # If you want to view the full screenshot locally during development, you can save it:
233
+ # with open("debug_timeout_full_screenshot.jpeg", "wb") as f:
234
+ # f.write(screenshot_bytes)
235
+ # logger.error("DEBUGGING: Full screenshot saved to debug_timeout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  @property
238
  def poi_text(self) -> str: