Trisha Tomy commited on
Commit
e9a2867
·
1 Parent(s): c24ca69

trying fixes for loading

Browse files
Files changed (1) hide show
  1. src/proxy_lite/browser/browser.py +89 -8
src/proxy_lite/browser/browser.py CHANGED
@@ -15,7 +15,7 @@ from tenacity import before_sleep_log, retry, stop_after_delay, wait_exponential
15
  from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
16
  from proxy_lite.logger import logger
17
 
18
- import base64
19
 
20
  SELF_CONTAINED_TAGS = [
21
  # many of these are non-interactive but keeping them anyway
@@ -107,8 +107,10 @@ class BrowserSession:
107
  viewport={"width": self.viewport_width, "height": self.viewport_height},
108
  )
109
  await self.context.new_page()
110
- self.context.set_default_timeout(60_000)
111
- self.current_page.set_default_timeout(60_000)
 
 
112
  await stealth_async(self.current_page, StealthConfig(navigator_user_agent=False))
113
  await self.context.add_init_script(
114
  path=Path(__file__).with_name("add_custom_select.js"),
@@ -142,7 +144,7 @@ class BrowserSession:
142
  # re-run for cases of mid-run redirects
143
  @retry(
144
  wait=wait_exponential(multiplier=1, min=1, max=10),
145
- stop=stop_after_delay(5),
146
  reraise=True,
147
  before_sleep=before_sleep_log(logger, logging.ERROR),
148
  )
@@ -193,8 +195,6 @@ class BrowserSession:
193
  # Step 2: Wait for the 'loading' class to disappear from the body.
194
  # This is a common and effective way to detect when SPAs like Salesforce are visually ready.
195
  logger.debug("Attempting wait_for_selector('body:not(.loading)')...")
196
- # Removed state="visible" as it's often too strict for 'body' in SPAs,
197
- # and 'not(.loading)' implies it should become visible eventually.
198
  await self.current_page.wait_for_selector("body:not(.loading)", timeout=180000)
199
  logger.debug("wait_for_selector('body:not(.loading)') completed.")
200
 
@@ -232,7 +232,88 @@ class BrowserSession:
232
  # If you want to view the full screenshot locally during development, you can save it:
233
  # with open("debug_timeout_full_screenshot.jpeg", "wb") as f:
234
  # f.write(screenshot_bytes)
235
- # logger.error("DEBUGGING: Full screenshot saved to debug_timeout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  @property
238
  def poi_text(self) -> str:
@@ -383,4 +464,4 @@ if __name__ == "__main__":
383
  with open("output.png", "wb") as f:
384
  f.write(annotated_image)
385
 
386
- asyncio.run(dummy_test())
 
15
  from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
16
  from proxy_lite.logger import logger
17
 
18
+ import base64 # IMPORTANT: Make sure this import is present!
19
 
20
  SELF_CONTAINED_TAGS = [
21
  # many of these are non-interactive but keeping them anyway
 
107
  viewport={"width": self.viewport_width, "height": self.viewport_height},
108
  )
109
  await self.context.new_page()
110
+ # Set default timeouts for context and page
111
+ self.context.set_default_timeout(180_000) # Increased based on previous discussions
112
+ self.current_page.set_default_timeout(180_000) # Increased based on previous discussions
113
+
114
  await stealth_async(self.current_page, StealthConfig(navigator_user_agent=False))
115
  await self.context.add_init_script(
116
  path=Path(__file__).with_name("add_custom_select.js"),
 
144
  # re-run for cases of mid-run redirects
145
  @retry(
146
  wait=wait_exponential(multiplier=1, min=1, max=10),
147
+ stop=stop_after_delay(5), # This retry is for the entire update_poi method, in case of intermittent issues
148
  reraise=True,
149
  before_sleep=before_sleep_log(logger, logging.ERROR),
150
  )
 
195
  # Step 2: Wait for the 'loading' class to disappear from the body.
196
  # This is a common and effective way to detect when SPAs like Salesforce are visually ready.
197
  logger.debug("Attempting wait_for_selector('body:not(.loading)')...")
 
 
198
  await self.current_page.wait_for_selector("body:not(.loading)", timeout=180000)
199
  logger.debug("wait_for_selector('body:not(.loading)') completed.")
200
 
 
232
  # If you want to view the full screenshot locally during development, you can save it:
233
  # with open("debug_timeout_full_screenshot.jpeg", "wb") as f:
234
  # f.write(screenshot_bytes)
235
+ # logger.error("DEBUGGING: Full screenshot saved to debug_timeout_full_screenshot.jpeg for local inspection.")
236
+
237
+ except Exception as ss_e:
238
+ logger.error(f"DEBUGGING: Could not take screenshot for debug: {ss_e}")
239
+
240
+ # It's crucial to re-raise the exception so the upstream code knows the operation failed.
241
+ raise e
242
+ # --- END TEMPORARY DEBUGGING CODE ---
243
+
244
+ except Exception as e:
245
+ # This catches any other unexpected errors during the page readiness checks
246
+ logger.error(f"An unexpected error occurred during page readiness check: {e}")
247
+ raise # Re-raise to propagate
248
+
249
+ # --- Code below this point will only execute if the page readiness checks pass ---
250
+ # Ensure this block is at the SAME INDENTATION LEVEL as the 'try' and 'except' above.
251
+
252
+ # Run the bounding box javascript code to highlight the points of interest on the page
253
+ # This part assumes the page is now ready for interaction and content extraction.
254
+ page_info = await self.current_page.evaluate(
255
+ """() => {
256
+ overwriteDefaultSelectConvergence();
257
+ return findPOIsConvergence();
258
+ }""",
259
+ )
260
+ # Get the points of interest on the page
261
+ self.poi_elements = page_info["element_descriptions"]
262
+ element_centroids = page_info["element_centroids"]
263
+ try: # This is a new try block for iframe processing
264
+ # Select all iframes on the page
265
+ iframes = await self.current_page.query_selector_all("iframe")
266
+
267
+ max_iframes = 10 # Limit the number of iframes to process for performance
268
+
269
+ # Define an asynchronous function to process and filter each iframe
270
+ tasks = [asyncio.create_task(self.process_iframe(iframe)) for iframe in iframes[:max_iframes]]
271
+
272
+ # Gather results from iframe processing tasks concurrently
273
+ results = await asyncio.gather(*tasks)
274
+
275
+ # Filter out any None results from iframe processing errors or non-visible iframes
276
+ filtered_results = [result for result in results if result is not None]
277
+
278
+ iframes_pois = []
279
+ iframe_offsets = []
280
+
281
+ for poi, offset in filtered_results:
282
+ iframes_pois.append(poi)
283
+ iframe_offsets.append(offset)
284
+
285
+ # Combine the points of interest from the iframes with the main page and adjust the centroids
286
+ for index, iframe_poi in enumerate(iframes_pois):
287
+ self.poi_elements.extend(iframe_poi["element_descriptions"])
288
+ for centroid in iframe_poi["element_centroids"]:
289
+ # Adjust iframe POI coordinates based on iframe's position on the main page
290
+ centroid["x"] += iframe_offsets[index]["x"]
291
+ centroid["y"] += iframe_offsets[index]["y"]
292
+ centroid["left"] += iframe_offsets[index]["x"]
293
+ centroid["top"] += iframe_offsets[index]["y"]
294
+ centroid["right"] += iframe_offsets[index]["x"]
295
+ centroid["bottom"] += iframe_offsets[index]["y"]
296
+ element_centroids.extend(iframe_poi["element_centroids"])
297
+
298
+ except Exception as e:
299
+ logger.error(f"Error in finding iframes: {e}")
300
+ # Do not re-raise here unless iframe parsing is critical for the main task
301
+ # Iframes not found is often not a fatal error for core functionality.
302
+
303
+ # Get the centroids of the points of interest
304
+ self.poi_centroids = [Point(x=xy["x"], y=xy["y"]) for xy in element_centroids]
305
+ # Create BoundingBox objects for annotation
306
+ self.bounding_boxes = [BoundingBox(**xy, label=str(i)) for i, xy in enumerate(element_centroids)]
307
+ # Create POI objects which combine info, centroid, and bounding box
308
+ self.pois = [
309
+ POI(info=info, element_centroid=centroid, bounding_box=bbox)
310
+ for info, centroid, bbox in zip(
311
+ self.poi_elements,
312
+ self.poi_centroids,
313
+ self.bounding_boxes,
314
+ strict=False, # Use strict=False if lengths might genuinely differ slightly
315
+ )
316
+ ]
317
 
318
  @property
319
  def poi_text(self) -> str:
 
464
  with open("output.png", "wb") as f:
465
  f.write(annotated_image)
466
 
467
+ asyncio.run(dummy_test())