Duibonduil commited on
Commit
7433f0d
·
verified ·
1 Parent(s): f75ed7d

Upload 4 files

Browse files
examples/tools/browsers/__init__.py ADDED
File without changes
examples/tools/browsers/async_browser.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # Copyright (c) 2025 inclusionAI.
3
+
4
+ import asyncio
5
+ import base64
6
+ import json
7
+ import os
8
+ import subprocess
9
+ import traceback
10
+ from importlib import resources
11
+ from pathlib import Path
12
+ from typing import Any, Dict, Tuple, List
13
+
14
+ from examples.tools.common import package
15
+ from examples.tools.tool_action import BrowserAction
16
+ from aworld.core.common import Observation, ActionModel, ActionResult
17
+ from aworld.logs.util import logger
18
+ from aworld.core.tool.base import action_executor, ToolFactory, AsyncTool
19
+ from aworld.utils.import_package import is_package_installed
20
+ from examples.tools.browsers.action.executor import BrowserToolActionExecutor
21
+ from examples.tools.browsers.util.dom import DomTree
22
+ from examples.tools.conf import BrowserToolConfig
23
+ from examples.tools.browsers.util.dom_build import async_build_dom_tree
24
+ from aworld.utils import import_package
25
+ from aworld.tools.utils import build_observation
26
+
27
+ URL_MAX_LENGTH = 4096
28
+ UTF8 = "".join(chr(x) for x in range(0, 55290))
29
+ ASCII = "".join(chr(x) for x in range(32, 128))
30
+
31
+
32
+ @ToolFactory.register(name="browser",
33
+ desc="browser",
34
+ asyn=True,
35
+ supported_action=BrowserAction,
36
+ conf_file_name=f'browser_tool.yaml')
37
+ class BrowserTool(AsyncTool):
38
+ def __init__(self, conf: BrowserToolConfig, **kwargs) -> None:
39
+ super(BrowserTool, self).__init__(conf)
40
+
41
+ self.initialized = False
42
+ self._finish = False
43
+ self.record_trace = self.conf.get("working_dir", False)
44
+ self.sleep_after_init = self.conf.get("sleep_after_init", False)
45
+ dom_js_path = self.conf.get('dom_js_path')
46
+ if dom_js_path and os.path.exists(dom_js_path):
47
+ with open(dom_js_path, 'r') as read:
48
+ self.js_code = read.read()
49
+ else:
50
+ self.js_code = resources.read_text(f'{package}.browsers.script',
51
+ 'buildDomTree.js')
52
+ self.cur_observation = None
53
+ if not is_package_installed('playwright'):
54
+ import_package("playwright")
55
+ logger.info("playwright install...")
56
+ try:
57
+ subprocess.check_call('playwright install', shell=True, timeout=300)
58
+ except Exception as e:
59
+ logger.error(f"Fail to auto execute playwright install, you can install manually\n {e}")
60
+
61
+ async def init(self) -> None:
62
+ from playwright.async_api import async_playwright
63
+
64
+ if self.initialized:
65
+ return
66
+
67
+ self.context_manager = async_playwright()
68
+ self.playwright = await self.context_manager.start()
69
+
70
+ self.browser = await self._create_browser()
71
+ self.context = await self._create_browser_context()
72
+
73
+ if self.record_trace:
74
+ await self.context.tracing.start(screenshots=True, snapshots=True)
75
+
76
+ self.page = await self.context.new_page()
77
+ if self.conf.get("custom_executor"):
78
+ self.action_executor = BrowserToolActionExecutor(self)
79
+ else:
80
+ self.action_executor = action_executor
81
+ self.initialized = True
82
+
83
+ async def _create_browser(self):
84
+ browse_name = self.conf.get("browse_name", "chromium")
85
+ browse = getattr(self.playwright, browse_name)
86
+ cdp_url = self.conf.get("cdp_url")
87
+ wss_url = self.conf.get("wss_url")
88
+ if cdp_url:
89
+ if browse_name != "chromium":
90
+ logger.warning(f"{browse_name} unsupported CDP, will use chromium browser")
91
+ browse = self.playwright.chromium
92
+ logger.info(f"Connecting to remote browser via CDP {cdp_url}")
93
+ browser = await browse.connect_over_cdp(cdp_url)
94
+ elif wss_url:
95
+ logger.info(f"Connecting to remote browser via wss {wss_url}")
96
+ browser = await browse.connect(wss_url)
97
+ else:
98
+ headless = self.conf.get("headless", False)
99
+ slow_mo = self.conf.get("slow_mo", 0)
100
+ disable_security_args = []
101
+ if self.conf.get('disable_security', False):
102
+ disable_security_args = ['--disable-web-security',
103
+ '--disable-site-isolation-trials',
104
+ '--disable-features=IsolateOrigins,site-per-process']
105
+ args = ['--no-sandbox',
106
+ '--disable-crash-reporte',
107
+ '--disable-blink-features=AutomationControlled',
108
+ '--disable-infobars',
109
+ '--disable-background-timer-throttling',
110
+ '--disable-popup-blocking',
111
+ '--disable-backgrounding-occluded-windows',
112
+ '--disable-renderer-backgrounding',
113
+ '--disable-window-activation',
114
+ '--disable-focus-on-load',
115
+ '--no-first-run',
116
+ '--no-default-browser-check',
117
+ '--no-startup-window',
118
+ '--window-position=0,0',
119
+ '--window-size=1280,720'] + disable_security_args
120
+ browser = await browse.launch(
121
+ headless=headless,
122
+ slow_mo=slow_mo,
123
+ args=args,
124
+ proxy=self.conf.get('proxy'),
125
+ )
126
+ return browser
127
+
128
+ async def _create_browser_context(self):
129
+ """Creates a new browser context with anti-detection measures and loads cookies if available."""
130
+ from playwright.async_api import ViewportSize
131
+
132
+ browser = self.browser
133
+ if self.conf.get("cdp_url") and len(browser.contexts) > 0:
134
+ context = browser.contexts[0]
135
+ else:
136
+ viewport_size = ViewportSize(width=self.conf.get("width", 1280),
137
+ height=self.conf.get("height", 720))
138
+ disable_security = self.conf.get('disable_security', False)
139
+
140
+ context = await browser.new_context(viewport=viewport_size,
141
+ no_viewport=False,
142
+ user_agent=self.conf.get('user_agent'),
143
+ java_script_enabled=True,
144
+ bypass_csp=disable_security,
145
+ ignore_https_errors=disable_security,
146
+ record_video_dir=self.conf.get('working_dir'),
147
+ record_video_size=viewport_size,
148
+ locale=self.conf.get('locale'),
149
+ storage_state=self.conf.get("storage_state", None),
150
+ geolocation=self.conf.get("geolocation", None),
151
+ device_scale_factor=1)
152
+ if "chromium" == self.conf.get("browse_name", "chromium"):
153
+ await context.grant_permissions(['camera', 'microphone'])
154
+
155
+ if self.conf.get('trace_path'):
156
+ await context.tracing.start(screenshots=True, snapshots=True, sources=True)
157
+
158
+ cookie_file = self.conf.get('cookies_file')
159
+ if cookie_file and os.path.exists(cookie_file):
160
+ with open(cookie_file, 'r') as read:
161
+ cookies = json.loads(read.read())
162
+ await context.add_cookies(cookies)
163
+ logger.info(f'Cookies load from {cookie_file} finished')
164
+
165
+ if self.conf.get('private'):
166
+ js = resources.read_text(f"{package}.browsers.script", "stealth.min.js")
167
+ await context.add_init_script(js)
168
+
169
+ return context
170
+
171
+ async def get_cur_page(self):
172
+ return self.page
173
+
174
+ async def screenshot(self, full_page: bool = False) -> str:
175
+ """Returns a base64 encoded screenshot of the current page.
176
+
177
+ Args:
178
+ full_page: When true, takes a screenshot of the full scrollable page, instead of the currently visible viewport.
179
+
180
+ Returns:
181
+ Base64 of the page screenshot
182
+ """
183
+ page = await self.get_cur_page()
184
+
185
+ try:
186
+ await page.bring_to_front()
187
+ await page.wait_for_load_state(timeout=2000)
188
+ except:
189
+ logger.warning("bring to front load timeout")
190
+ pass
191
+
192
+ screenshot = await page.screenshot(
193
+ full_page=full_page,
194
+ animations='disabled',
195
+ timeout=600000
196
+ )
197
+ logger.info("page screenshot finished")
198
+ screenshot_base64 = base64.b64encode(screenshot).decode('utf-8')
199
+ return screenshot_base64
200
+
201
+ async def _get_observation(self, info: Dict[str, Any] = {}) -> Observation:
202
+ fail_error = info.get('exception')
203
+ if fail_error:
204
+ return Observation(observer=self.name(), action_result=[ActionResult(error=fail_error)])
205
+
206
+ try:
207
+ dom_tree = await self._parse_dom_tree()
208
+ image = await self.screenshot()
209
+ pixels_above, pixels_below = await self._scroll_info()
210
+ info.update({"pixels_above": pixels_above,
211
+ "pixels_below": pixels_below,
212
+ "url": self.page.url})
213
+ return Observation(observer=self.name(), dom_tree=dom_tree, image=image, info=info)
214
+ except Exception as e:
215
+ try:
216
+ try:
217
+ await self.page.go_back()
218
+ except:
219
+ logger.warning("current page abnormal, new page to use.")
220
+ self.page = await self.context.new_page()
221
+ dom_tree = await self._parse_dom_tree()
222
+ image = await self.screenshot()
223
+ pixels_above, pixels_below = await self._scroll_info()
224
+ info.update({"pixels_above": pixels_above,
225
+ "pixels_below": pixels_below,
226
+ "url": self.page.url})
227
+ return Observation(observer=self.name(), dom_tree=dom_tree, image=image, info=info)
228
+ except Exception as e:
229
+ logger.warning(f"build observation fail, {traceback.format_exc()}")
230
+ return Observation(observer=self.name(), action_result=[ActionResult(error=traceback.format_exc())])
231
+
232
+ async def _parse_dom_tree(self) -> DomTree:
233
+ args = {
234
+ 'doHighlightElements': self.conf.get("do_highlight", True),
235
+ 'focusHighlightIndex': self.conf.get("focus_highlight", -1),
236
+ 'viewportExpansion': self.conf.get("viewport_expansion", 0),
237
+ 'debugMode': logger.getEffectiveLevel() == 10,
238
+ }
239
+ element_tree, element_map = await async_build_dom_tree(self.page, self.js_code, args)
240
+ return DomTree(element_tree=element_tree, element_map=element_map)
241
+
242
+ async def _scroll_info(self) -> tuple[int, int]:
243
+ """Get scroll position information for the current page."""
244
+ scroll_y = await self.page.evaluate('window.scrollY')
245
+ viewport_height = await self.page.evaluate('window.innerHeight')
246
+ total_height = await self.page.evaluate('document.documentElement.scrollHeight')
247
+ pixels_above = scroll_y
248
+ pixels_below = total_height - (scroll_y + viewport_height)
249
+ return pixels_above, pixels_below
250
+
251
+ async def reset(self, *, seed: int | None = None, options: Dict[str, str] | None = None) -> Tuple[
252
+ Observation, Dict[str, Any]]:
253
+ await super().reset(seed=seed, options=options)
254
+ if self.initialized:
255
+ observation = await self._get_observation()
256
+ observation.action_result = [ActionResult(content='start', keep=True)]
257
+ self.cur_observation = observation
258
+ return observation, {}
259
+
260
+ await self.close()
261
+ await self.init()
262
+
263
+ if self.sleep_after_init > 0:
264
+ await asyncio.sleep(self.sleep_after_init)
265
+
266
+ observation = await self._get_observation()
267
+ observation.action_result = [ActionResult(content='start', keep=True)]
268
+ observation.ability = ''
269
+ self.cur_observation = observation
270
+ return observation, {}
271
+
272
+ async def save_trace(self, trace_path: str | Path) -> None:
273
+ if self.record_trace:
274
+ await self.context.tracing.stop(path=trace_path)
275
+
276
+ @property
277
+ async def finished(self) -> bool:
278
+ return self._finish
279
+
280
+ async def close(self) -> None:
281
+ if hasattr(self, 'context') and self.context:
282
+ await self.context.close()
283
+ if hasattr(self, 'browser') and self.browser:
284
+ await self.browser.close()
285
+ if hasattr(self, 'playwright') and self.playwright:
286
+ await self.playwright.stop()
287
+ if self.initialized:
288
+ await self.context_manager.__aexit__()
289
+
290
+ async def do_step(self, action: List[ActionModel], **kwargs) -> Tuple[
291
+ Observation, float, bool, bool, Dict[str, Any]]:
292
+ if not self.initialized:
293
+ raise RuntimeError("Call init first before calling step.")
294
+
295
+ if not action:
296
+ logger.warning(f"{self.name()} has no action")
297
+ return build_observation(observer=self.name(), ability='', content='no action'), 0., False, False, {}
298
+
299
+ reward = 0
300
+ fail_error = ""
301
+ action_result = None
302
+
303
+ invalid_acts: List[int] = []
304
+ for i, act in enumerate(action):
305
+ if act.tool_name != 'browser':
306
+ logger.warning(f"tool {act.tool_name} is not a browser!")
307
+ invalid_acts.append(i)
308
+
309
+ if invalid_acts:
310
+ for i in invalid_acts:
311
+ action[i] = None
312
+
313
+ try:
314
+ action_result, self.page = await self.action_executor.async_execute_action(action,
315
+ observation=self.cur_observation,
316
+ llm_config=self.conf.llm_config,
317
+ **kwargs)
318
+ reward = 1
319
+ except Exception as e:
320
+ fail_error = str(e)
321
+
322
+ info = {"exception": fail_error}
323
+ terminated = kwargs.get("terminated", False)
324
+ for res in action_result:
325
+ if res.is_done:
326
+ terminated = res.is_done
327
+ info['done'] = True
328
+ self._finish = True
329
+ if res.error:
330
+ fail_error += res.error
331
+
332
+ contains_write_to_file = any(act.action_name == BrowserAction.WRITE_TO_FILE.value.name for act in action if act)
333
+ if contains_write_to_file:
334
+ msg = ""
335
+ for action_result_elem in action_result:
336
+ msg = action_result_elem.content
337
+ # write_to_file observation
338
+ return (Observation(content=msg, action_result=action_result, info=info),
339
+ reward,
340
+ terminated,
341
+ kwargs.get("truncated", False),
342
+ info)
343
+ elif fail_error:
344
+ # failed error observation
345
+ return (Observation(action_result=action_result, observer=self.name()),
346
+ reward,
347
+ terminated,
348
+ kwargs.get("truncated", False),
349
+ info)
350
+ else:
351
+ # normal observation
352
+ observation = await self._get_observation(info)
353
+ observation.action_result = action_result
354
+ observation.ability = action[-1].action_name
355
+ self.cur_observation = observation
356
+ return (observation,
357
+ reward,
358
+ terminated,
359
+ kwargs.get("truncated", False),
360
+ info)
examples/tools/browsers/browser.py ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # Copyright (c) 2025 inclusionAI.
3
+
4
+ import base64
5
+ import json
6
+ import os
7
+ import subprocess
8
+ import time
9
+ import traceback
10
+ from importlib import resources
11
+ from pathlib import Path
12
+ from typing import Any, Dict, Tuple, List, Union
13
+
14
+ from aworld.config import ConfigDict
15
+ from examples.tools.common import package
16
+ from examples.tools.tool_action import BrowserAction
17
+ from aworld.core.common import Observation, ActionModel, ActionResult
18
+ from aworld.logs.util import logger
19
+ from aworld.core.tool.base import action_executor, ToolFactory
20
+ from aworld.core.tool.base import Tool
21
+ from aworld.utils.import_package import is_package_installed
22
+ from examples.tools.browsers.action.executor import BrowserToolActionExecutor
23
+ from examples.tools.browsers.util.dom import DomTree
24
+ from examples.tools.conf import BrowserToolConfig
25
+ from examples.tools.browsers.util.dom_build import build_dom_tree
26
+ from aworld.utils import import_package
27
+ from aworld.tools.utils import build_observation
28
+
29
+ URL_MAX_LENGTH = 4096
30
+ UTF8 = "".join(chr(x) for x in range(0, 55290))
31
+ ASCII = "".join(chr(x) for x in range(32, 128))
32
+
33
+ BROWSER = "browser"
34
+
35
+
36
+ @ToolFactory.register(name=BROWSER,
37
+ desc="browser",
38
+ supported_action=BrowserAction,
39
+ conf_file_name=f'browser_tool.yaml')
40
+ class BrowserTool(Tool):
41
+ def __init__(self, conf: Union[ConfigDict, BrowserToolConfig], **kwargs) -> None:
42
+ super(BrowserTool, self).__init__(conf, **kwargs)
43
+
44
+ self.initialized = False
45
+ self._finish = False
46
+ self.record_trace = self.conf.get("enable_recording", False)
47
+ self.sleep_after_init = self.conf.get("sleep_after_init", False)
48
+
49
+ dom_js_path = self.conf.get('dom_js_path')
50
+ if dom_js_path and os.path.exists(dom_js_path):
51
+ with open(dom_js_path, 'r') as read:
52
+ self.js_code = read.read()
53
+ else:
54
+ self.js_code = resources.read_text(f'{package}.browsers.script',
55
+ 'buildDomTree.js')
56
+ self.cur_observation = None
57
+ if not is_package_installed('playwright'):
58
+ import_package("playwright")
59
+ logger.info("playwright install...")
60
+ try:
61
+ subprocess.check_call('playwright install', shell=True, timeout=300)
62
+ except Exception as e:
63
+ logger.error(f"Fail to auto execute playwright install, you can install manually\n {e}")
64
+
65
+ def init(self) -> None:
66
+ from playwright.sync_api import sync_playwright
67
+
68
+ if self.initialized:
69
+ return
70
+
71
+ self.context_manager = sync_playwright()
72
+ self.playwright = self.context_manager.start()
73
+ self.browser = self._create_browser()
74
+ self.context = self._create_browser_context()
75
+
76
+ if self.record_trace:
77
+ self.context.tracing.start(screenshots=True, snapshots=True)
78
+
79
+ self.page = self.context.new_page()
80
+ if self.conf.get("custom_executor"):
81
+ self.action_executor = BrowserToolActionExecutor(self)
82
+ else:
83
+ self.action_executor = action_executor
84
+ self.initialized = True
85
+
86
+ def _create_browser(self):
87
+ browse_name = self.conf.get("browse_name", "chromium")
88
+ browse = getattr(self.playwright, browse_name)
89
+ cdp_url = self.conf.get("cdp_url")
90
+ wss_url = self.conf.get("wss_url")
91
+ if cdp_url:
92
+ if browse_name != "chromium":
93
+ logger.warning(f"{browse_name} unsupported CDP, will use chromium browser")
94
+ browse = self.playwright.chromium
95
+ logger.info(f"Connecting to remote browser via CDP {cdp_url}")
96
+ browser = browse.connect_over_cdp(cdp_url)
97
+ elif wss_url:
98
+ logger.info(f"Connecting to remote browser via wss {wss_url}")
99
+ browser = browse.connect(wss_url)
100
+ else:
101
+ headless = self.conf.get("headless", False)
102
+ slow_mo = self.conf.get("slow_mo", 0)
103
+ disable_security_args = []
104
+ if self.conf.get('disable_security', False):
105
+ disable_security_args = ['--disable-web-security',
106
+ '--disable-site-isolation-trials',
107
+ '--disable-features=IsolateOrigins,site-per-process']
108
+ args = ['--no-sandbox',
109
+ '--disable-crash-reporte',
110
+ '--disable-blink-features=AutomationControlled',
111
+ '--disable-infobars',
112
+ '--disable-background-timer-throttling',
113
+ '--disable-popup-blocking',
114
+ '--disable-backgrounding-occluded-windows',
115
+ '--disable-renderer-backgrounding',
116
+ '--disable-window-activation',
117
+ '--disable-focus-on-load',
118
+ '--no-first-run',
119
+ '--no-default-browser-check',
120
+ '--no-startup-window',
121
+ '--window-position=0,0',
122
+ '--window-size=1280,720'] + disable_security_args
123
+ browser = browse.launch(
124
+ headless=headless,
125
+ slow_mo=slow_mo,
126
+ args=args,
127
+ proxy=self.conf.get('proxy'),
128
+ )
129
+ return browser
130
+
131
+ def _create_browser_context(self):
132
+ """Creates a new browser context with anti-detection measures and loads cookies if available."""
133
+ from playwright.sync_api import ViewportSize
134
+
135
+ browser = self.browser
136
+ if self.conf.get("cdp_url") and len(browser.contexts) > 0:
137
+ context = browser.contexts[0]
138
+ else:
139
+ viewport_size = ViewportSize(width=self.conf.get("width", 1280),
140
+ height=self.conf.get("height", 720))
141
+ disable_security = self.conf.get('disable_security', False)
142
+
143
+ context = browser.new_context(viewport=viewport_size,
144
+ no_viewport=False,
145
+ user_agent=self.conf.get('user_agent'),
146
+ java_script_enabled=True,
147
+ bypass_csp=disable_security,
148
+ ignore_https_errors=disable_security,
149
+ record_video_dir=self.conf.get('working_dir'),
150
+ record_video_size=viewport_size,
151
+ locale=self.conf.get('locale'),
152
+ storage_state=self.conf.get("storage_state", None),
153
+ geolocation=self.conf.get("geolocation", None),
154
+ device_scale_factor=1)
155
+ if "chromium" == self.conf.get("browse_name", "chromium"):
156
+ context.grant_permissions(['camera', 'microphone'])
157
+
158
+ if self.conf.get('working_dir'):
159
+ context.tracing.start(screenshots=True, snapshots=True, sources=True)
160
+
161
+ cookie_file = self.conf.get('cookies_file')
162
+ if cookie_file and os.path.exists(cookie_file):
163
+ with open(cookie_file, 'r') as read:
164
+ cookies = json.loads(read.read())
165
+ context.add_cookies(cookies)
166
+ logger.info(f'Cookies load from {cookie_file} finished')
167
+
168
+ if self.conf.get('private'):
169
+ js = resources.read_text(f"{package}.browsers.script", "stealth.min.js")
170
+ context.add_init_script(js)
171
+
172
+ return context
173
+
174
+ def get_cur_page(self):
175
+ return self.page
176
+
177
+ def screenshot(self, full_page: bool = False) -> str:
178
+ """Returns a base64 encoded screenshot of the current page.
179
+
180
+ Args:
181
+ full_page: When true, takes a screenshot of the full scrollable page, instead of the currently visible viewport.
182
+
183
+ Returns:
184
+ Base64 of the page screenshot
185
+ """
186
+ page = self.get_cur_page()
187
+
188
+ try:
189
+ page.bring_to_front()
190
+ page.wait_for_load_state(timeout=2000)
191
+ except:
192
+ logger.warning("bring to front load timeout")
193
+ pass
194
+
195
+ screenshot = page.screenshot(
196
+ full_page=full_page,
197
+ animations='disabled',
198
+ timeout=600000
199
+ )
200
+ logger.info("page screenshot finished")
201
+ screenshot_base64 = base64.b64encode(screenshot).decode('utf-8')
202
+ return screenshot_base64
203
+
204
+ def _get_observation(self, info: Dict[str, Any] = {}) -> Observation:
205
+ fail_error = info.get('exception')
206
+ if fail_error:
207
+ return Observation(observer=self.name(), action_result=[ActionResult(error=fail_error)])
208
+
209
+ try:
210
+ dom_tree = self._parse_dom_tree()
211
+ image = self.screenshot()
212
+ pixels_above, pixels_below = self._scroll_info()
213
+ info.update({"pixels_above": pixels_above,
214
+ "pixels_below": pixels_below,
215
+ "url": self.page.url})
216
+ return Observation(observer=self.name(),
217
+ dom_tree=dom_tree,
218
+ image=image,
219
+ info=info)
220
+ except Exception as e:
221
+ try:
222
+ self.page.go_back()
223
+ except:
224
+ logger.warning("current page abnormal, new page to use.")
225
+ self.page = self.context.new_page()
226
+ try:
227
+ dom_tree = self._parse_dom_tree()
228
+ image = self.screenshot()
229
+ pixels_above, pixels_below = self._scroll_info()
230
+ info.update({"pixels_above": pixels_above,
231
+ "pixels_below": pixels_below,
232
+ "url": self.page.url})
233
+ return Observation(observer=self.name(), dom_tree=dom_tree, image=image, info=info)
234
+ except Exception as e:
235
+ logger.warning(f"build observation fail, {traceback.format_exc()}")
236
+ return Observation(observer=self.name(), action_result=[ActionResult(error=traceback.format_exc())])
237
+
238
+ def _parse_dom_tree(self) -> DomTree:
239
+ args = {
240
+ 'doHighlightElements': self.conf.get("do_highlight", True),
241
+ 'focusHighlightIndex': self.conf.get("focus_highlight", -1),
242
+ 'viewportExpansion': self.conf.get("viewport_expansion", 0),
243
+ 'debugMode': logger.getEffectiveLevel() == 10,
244
+ }
245
+ element_tree, element_map = build_dom_tree(self.page, self.js_code, args)
246
+ return DomTree(element_tree=element_tree, element_map=element_map)
247
+
248
+ def _scroll_info(self) -> tuple[int, int]:
249
+ """Get scroll position information for the current page."""
250
+ scroll_y = self.page.evaluate('window.scrollY')
251
+ viewport_height = self.page.evaluate('window.innerHeight')
252
+ total_height = self.page.evaluate('document.documentElement.scrollHeight')
253
+ pixels_above = scroll_y
254
+ pixels_below = total_height - (scroll_y + viewport_height)
255
+ return pixels_above, pixels_below
256
+
257
+ def reset(self, *, seed: int | None = None, options: Dict[str, str] | None = None) -> Tuple[
258
+ Observation, Dict[str, Any]]:
259
+ super().reset(seed=seed, options=options)
260
+ if self.initialized:
261
+ observation = self._get_observation()
262
+ observation.action_result = [ActionResult(content='start', keep=True)]
263
+ self.cur_observation = observation
264
+ return observation, {}
265
+
266
+ self.close()
267
+ self.init()
268
+
269
+ if self.sleep_after_init > 0:
270
+ time.sleep(self.sleep_after_init)
271
+
272
+ observation = self._get_observation()
273
+ observation.action_result = [ActionResult(content='start', keep=True)]
274
+ self.cur_observation = observation
275
+ return observation, {}
276
+
277
+ @property
278
+ def finished(self) -> bool:
279
+ return self._finish
280
+
281
+ def save_trace(self, trace_path: str | Path) -> None:
282
+ if self.record_trace:
283
+ self.context.tracing.stop(path=trace_path)
284
+
285
+ def close(self) -> None:
286
+ if hasattr(self, 'context') and self.context:
287
+ self.context.close()
288
+ if hasattr(self, 'browser') and self.browser:
289
+ self.browser.close()
290
+ if hasattr(self, 'playwright') and self.playwright:
291
+ self.playwright.stop()
292
+
293
+ if self.initialized:
294
+ self.context_manager.__exit__()
295
+
296
+ def do_step(self, action: List[ActionModel], **kwargs) -> Tuple[
297
+ Observation, float, bool, bool, Dict[str, Any]]:
298
+ if not self.initialized:
299
+ raise RuntimeError("Call init first before calling step.")
300
+
301
+ if not action:
302
+ logger.warning(f"{self.name()} has no action")
303
+ return build_observation(observer=self.name(), ability='', content='no action'), 0., False, False, {}
304
+
305
+ reward = 0
306
+ fail_error = ""
307
+ action_result = None
308
+
309
+ invalid_acts: List[int] = []
310
+ for i, act in enumerate(action):
311
+ if act.tool_name != BROWSER:
312
+ logger.warning(f"tool {act.tool_name} is not a browser!")
313
+ invalid_acts.append(i)
314
+
315
+ if invalid_acts:
316
+ for i in invalid_acts:
317
+ action[i] = None
318
+
319
+ try:
320
+ action_result, self.page = self.action_executor.execute_action(action,
321
+ observation=self.cur_observation,
322
+ llm_config=self.conf.llm_config,
323
+ **kwargs)
324
+ reward = 1
325
+ except Exception as e:
326
+ fail_error = str(e)
327
+
328
+ info = {"exception": fail_error}
329
+ terminated = kwargs.get("terminated", False)
330
+ if action_result:
331
+ for res in action_result:
332
+ if res.is_done:
333
+ terminated = res.is_done
334
+ info['done'] = True
335
+ self._finish = True
336
+ if res.error:
337
+ fail_error += res.error
338
+
339
+ contains_write_to_file = any(act.action_name == BrowserAction.WRITE_TO_FILE.value.name for act in action if act)
340
+ if contains_write_to_file:
341
+ msg = ""
342
+ for action_result_elem in action_result:
343
+ msg = action_result_elem.content
344
+ # write_to_file observation
345
+ return (Observation(content=msg, action_result=action_result, info=info),
346
+ reward,
347
+ terminated,
348
+ kwargs.get("truncated", False),
349
+ info)
350
+ elif fail_error:
351
+ # failed error observation
352
+ return (Observation(action_result=action_result, observer=self.name()),
353
+ reward,
354
+ terminated,
355
+ kwargs.get("truncated", False),
356
+ info)
357
+ else:
358
+ # normal observation
359
+ observation = self._get_observation(info)
360
+ observation.ability = action[-1].action_name
361
+ observation.action_result = action_result
362
+ self.cur_observation = observation
363
+ return (observation,
364
+ reward,
365
+ terminated,
366
+ kwargs.get("truncated", False),
367
+ info)
examples/tools/browsers/requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ playwright
2
+ markdownify