Duibonduil commited on
Commit
f75ed7d
·
verified ·
1 Parent(s): 093728b

Upload 2 files

Browse files
examples/tools/browsers/util/dom.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Optional, Dict, List
5
+
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class Coordinates(BaseModel):
10
+ x: int
11
+ y: int
12
+
13
+
14
+ class CoordinateSet(BaseModel):
15
+ top_left: Coordinates
16
+ top_right: Coordinates
17
+ bottom_left: Coordinates
18
+ bottom_right: Coordinates
19
+ center: Coordinates
20
+ width: int
21
+ height: int
22
+
23
+
24
+ class ViewportInfo(BaseModel):
25
+ width: int
26
+ height: int
27
+
28
+
29
+ @dataclass
30
+ class HashedDomElement:
31
+ """
32
+ Hash of the dom element to be used as a unique identifier
33
+ """
34
+
35
+ branch_path_hash: str
36
+ attributes_hash: str
37
+ xpath_hash: str
38
+
39
+
40
+ @dataclass(frozen=False)
41
+ class DOMBaseNode:
42
+ is_visible: bool
43
+ # Use None as default and set parent later to avoid circular reference issues
44
+ parent: Optional['DOMElementNode']
45
+
46
+
47
+ @dataclass(frozen=False)
48
+ class DOMTextNode(DOMBaseNode):
49
+ text: str
50
+ type: str = 'TEXT_NODE'
51
+
52
+ def has_parent_with_highlight_index(self) -> bool:
53
+ current = self.parent
54
+ while current is not None:
55
+ # stop if the element has a highlight index (will be handled separately)
56
+ if current.highlight_index is not None:
57
+ return True
58
+
59
+ current = current.parent
60
+ return False
61
+
62
+ def is_parent_in_viewport(self) -> bool:
63
+ if self.parent is None:
64
+ return False
65
+ return self.parent.is_in_viewport
66
+
67
+ def is_parent_top_element(self) -> bool:
68
+ if self.parent is None:
69
+ return False
70
+ return self.parent.is_top_element
71
+
72
+
73
+ @dataclass(frozen=False)
74
+ class DOMElementNode(DOMBaseNode):
75
+ """
76
+ xpath: the xpath of the element from the last root node (shadow root or iframe OR document if no shadow root or iframe).
77
+ To properly reference the element we need to recursively switch the root node until we find the element (work you way up the tree with `.parent`)
78
+ """
79
+
80
+ tag_name: str
81
+ xpath: str
82
+ attributes: Dict[str, str]
83
+ children: List[DOMBaseNode]
84
+ is_interactive: bool = False
85
+ is_top_element: bool = False
86
+ is_in_viewport: bool = False
87
+ shadow_root: bool = False
88
+ highlight_index: Optional[int] = None
89
+ viewport_coordinates: Optional[CoordinateSet] = None
90
+ page_coordinates: Optional[CoordinateSet] = None
91
+ viewport_info: Optional[ViewportInfo] = None
92
+
93
+ def __repr__(self) -> str:
94
+ tag_str = f'<{self.tag_name}'
95
+
96
+ # Add attributes
97
+ for key, value in self.attributes.items():
98
+ tag_str += f' {key}="{value}"'
99
+ tag_str += '>'
100
+
101
+ # Add extra info
102
+ extras = []
103
+ if self.is_interactive:
104
+ extras.append('interactive')
105
+ if self.is_top_element:
106
+ extras.append('top')
107
+ if self.shadow_root:
108
+ extras.append('shadow-root')
109
+ if self.highlight_index is not None:
110
+ extras.append(f'highlight:{self.highlight_index}')
111
+ if self.is_in_viewport:
112
+ extras.append('in-viewport')
113
+
114
+ if extras:
115
+ tag_str += f' [{", ".join(extras)}]'
116
+
117
+ return tag_str
118
+
119
+ def get_all_text_till_next_clickable_element(self, max_depth: int = -1) -> str:
120
+ text_parts = []
121
+
122
+ def collect_text(node: DOMBaseNode, current_depth: int) -> None:
123
+ if max_depth != -1 and current_depth > max_depth:
124
+ return
125
+
126
+ # Skip this branch if we hit a highlighted element (except for the current node)
127
+ if isinstance(node, DOMElementNode) and node != self and node.highlight_index is not None:
128
+ return
129
+
130
+ if isinstance(node, DOMTextNode):
131
+ text_parts.append(node.text)
132
+ elif isinstance(node, DOMElementNode):
133
+ for child in node.children:
134
+ collect_text(child, current_depth + 1)
135
+
136
+ collect_text(self, 0)
137
+ return '\n'.join(text_parts).strip()
138
+
139
+ def clickable_elements_to_string(self, include_attributes: list[str] | None = None) -> str:
140
+ """Convert the processed DOM content to HTML."""
141
+ formatted_text = []
142
+
143
+ def process_node(node: DOMBaseNode, depth: int) -> None:
144
+ if isinstance(node, DOMElementNode):
145
+ # Add element with highlight_index
146
+ if node.highlight_index is not None:
147
+ attributes_str = ''
148
+ text = node.get_all_text_till_next_clickable_element()
149
+ if include_attributes:
150
+ attributes = list(
151
+ set(
152
+ [
153
+ str(value)
154
+ for key, value in node.attributes.items()
155
+ if key in include_attributes and value != node.tag_name
156
+ ]
157
+ )
158
+ )
159
+ if text in attributes:
160
+ attributes.remove(text)
161
+ attributes_str = ';'.join(attributes)
162
+ line = f'[{node.highlight_index}]<{node.tag_name} '
163
+ if attributes_str:
164
+ line += f'{attributes_str}'
165
+ if text:
166
+ if attributes_str:
167
+ line += f'>{text}'
168
+ else:
169
+ line += f'{text}'
170
+ line += '/>'
171
+ formatted_text.append(line)
172
+
173
+ # Process children regardless
174
+ for child in node.children:
175
+ process_node(child, depth + 1)
176
+
177
+ elif isinstance(node, DOMTextNode):
178
+ # Add text only if it doesn't have a highlighted parent
179
+ if not node.has_parent_with_highlight_index() and node.is_visible: # and node.is_parent_top_element()
180
+ formatted_text.append(f'{node.text}')
181
+
182
+ process_node(self, 0)
183
+ return '\n'.join(formatted_text)
184
+
185
+ def get_file_upload_element(self, check_siblings: bool = True) -> Optional['DOMElementNode']:
186
+ # Check if current element is a file input
187
+ if self.tag_name == 'input' and self.attributes.get('type') == 'file':
188
+ return self
189
+
190
+ # Check children
191
+ for child in self.children:
192
+ if isinstance(child, DOMElementNode):
193
+ result = child.get_file_upload_element(check_siblings=False)
194
+ if result:
195
+ return result
196
+
197
+ # Check siblings only for the initial call
198
+ if check_siblings and self.parent:
199
+ for sibling in self.parent.children:
200
+ if sibling is not self and isinstance(sibling, DOMElementNode):
201
+ result = sibling.get_file_upload_element(check_siblings=False)
202
+ if result:
203
+ return result
204
+
205
+ return None
206
+
207
+
208
+ class DomTree(BaseModel):
209
+ element_tree: DOMElementNode
210
+ element_map: Dict[int, DOMElementNode]
examples/tools/browsers/util/dom_build.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+
3
+ # Derived from browser_use DomService, we use it as a utility method, and supports sync and async.
4
+
5
+ import gc
6
+ import json
7
+
8
+ from typing import Dict, Any, Tuple, Optional
9
+
10
+ from aworld.utils.async_func import async_func
11
+ from examples.tools.browsers.util.dom import DOMElementNode, DOMBaseNode, DOMTextNode, ViewportInfo
12
+ from aworld.logs.util import logger
13
+
14
+
15
+ async def async_build_dom_tree(page, js_code: str, args: Dict[str, Any]) -> Tuple[DOMElementNode, Dict[int, DOMElementNode]]:
16
+ if await page.evaluate('1+1') != 2:
17
+ raise ValueError('The page cannot evaluate javascript code properly')
18
+
19
+ # NOTE: We execute JS code in the browser to extract important DOM information.
20
+ # The returned hash map contains information about the DOM tree and the
21
+ # relationship between the DOM elements.
22
+ try:
23
+ eval_page = await page.evaluate(js_code, args)
24
+ except Exception as e:
25
+ logger.error('Error evaluating JavaScript: %s', e)
26
+ raise
27
+
28
+ # Only log performance metrics in debug mode
29
+ if args.get("debugMode") and 'perfMetrics' in eval_page:
30
+ logger.debug('DOM Tree Building Performance Metrics:\n%s', json.dumps(eval_page['perfMetrics'], indent=2))
31
+
32
+ return await async_func(_construct_dom_tree)(eval_page)
33
+
34
+
35
+ def build_dom_tree(page, js_code: str, args: Dict[str, Any]) -> Tuple[DOMElementNode, Dict[int, DOMElementNode]]:
36
+ if page.evaluate('1+1') != 2:
37
+ raise ValueError('The page cannot evaluate javascript code properly')
38
+
39
+ # NOTE: We execute JS code in the browser to extract important DOM information.
40
+ # The returned hash map contains information about the DOM tree and the
41
+ # relationship between the DOM elements.
42
+ try:
43
+ eval_page = page.evaluate(js_code, args)
44
+ except Exception as e:
45
+ logger.error('Error evaluating JavaScript: %s', e)
46
+ raise
47
+
48
+ # Only log performance metrics in debug mode
49
+ if args.get("debugMode") and 'perfMetrics' in eval_page:
50
+ logger.debug('DOM Tree Building Performance Metrics:\n%s', json.dumps(eval_page['perfMetrics'], indent=2))
51
+
52
+ return _construct_dom_tree(eval_page)
53
+
54
+
55
+ def _construct_dom_tree(eval_page: dict, ) -> tuple[DOMElementNode, Dict[int, DOMElementNode]]:
56
+ js_node_map = eval_page['map']
57
+ js_root_id = eval_page['rootId']
58
+
59
+ selector_map = {}
60
+ node_map = {}
61
+
62
+ for id, node_data in js_node_map.items():
63
+ node, children_ids = _parse_node(node_data)
64
+ if node is None:
65
+ continue
66
+
67
+ node_map[id] = node
68
+
69
+ if isinstance(node, DOMElementNode) and node.highlight_index is not None:
70
+ selector_map[node.highlight_index] = node
71
+
72
+ # NOTE: We know that we are building the tree bottom up
73
+ # and all children are already processed.
74
+ if isinstance(node, DOMElementNode):
75
+ for child_id in children_ids:
76
+ if child_id not in node_map:
77
+ continue
78
+
79
+ child_node = node_map[child_id]
80
+
81
+ child_node.parent = node
82
+ node.children.append(child_node)
83
+
84
+ html_to_dict = node_map[str(js_root_id)]
85
+
86
+ del node_map
87
+ del js_node_map
88
+ del js_root_id
89
+
90
+ gc.collect()
91
+
92
+ if html_to_dict is None or not isinstance(html_to_dict, DOMElementNode):
93
+ raise ValueError('Failed to parse HTML to dictionary')
94
+
95
+ return html_to_dict, selector_map
96
+
97
+
98
+ def _parse_node(node_data: dict, ) -> Tuple[Optional[DOMBaseNode], list[int]]:
99
+ if not node_data:
100
+ return None, []
101
+
102
+ # Process text nodes immediately
103
+ if node_data.get('type') == 'TEXT_NODE':
104
+ text_node = DOMTextNode(
105
+ text=node_data['text'],
106
+ is_visible=node_data['isVisible'],
107
+ parent=None,
108
+ )
109
+ return text_node, []
110
+
111
+ # Process coordinates if they exist for element nodes
112
+
113
+ viewport_info = None
114
+
115
+ if 'viewport' in node_data:
116
+ viewport_info = ViewportInfo(
117
+ width=node_data['viewport']['width'],
118
+ height=node_data['viewport']['height'],
119
+ )
120
+
121
+ element_node = DOMElementNode(
122
+ tag_name=node_data['tagName'],
123
+ xpath=node_data['xpath'],
124
+ attributes=node_data.get('attributes', {}),
125
+ children=[],
126
+ is_visible=node_data.get('isVisible', False),
127
+ is_interactive=node_data.get('isInteractive', False),
128
+ is_top_element=node_data.get('isTopElement', False),
129
+ is_in_viewport=node_data.get('isInViewport', False),
130
+ highlight_index=node_data.get('highlightIndex'),
131
+ shadow_root=node_data.get('shadowRoot', False),
132
+ parent=None,
133
+ viewport_info=viewport_info,
134
+ )
135
+
136
+ children_ids = node_data.get('children', [])
137
+
138
+ return element_node, children_ids