Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files
examples/tools/browsers/util/dom.py
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding: utf-8
|
2 |
+
|
3 |
+
from dataclasses import dataclass
|
4 |
+
from typing import Optional, Dict, List
|
5 |
+
|
6 |
+
from pydantic import BaseModel
|
7 |
+
|
8 |
+
|
9 |
+
class Coordinates(BaseModel):
|
10 |
+
x: int
|
11 |
+
y: int
|
12 |
+
|
13 |
+
|
14 |
+
class CoordinateSet(BaseModel):
|
15 |
+
top_left: Coordinates
|
16 |
+
top_right: Coordinates
|
17 |
+
bottom_left: Coordinates
|
18 |
+
bottom_right: Coordinates
|
19 |
+
center: Coordinates
|
20 |
+
width: int
|
21 |
+
height: int
|
22 |
+
|
23 |
+
|
24 |
+
class ViewportInfo(BaseModel):
|
25 |
+
width: int
|
26 |
+
height: int
|
27 |
+
|
28 |
+
|
29 |
+
@dataclass
|
30 |
+
class HashedDomElement:
|
31 |
+
"""
|
32 |
+
Hash of the dom element to be used as a unique identifier
|
33 |
+
"""
|
34 |
+
|
35 |
+
branch_path_hash: str
|
36 |
+
attributes_hash: str
|
37 |
+
xpath_hash: str
|
38 |
+
|
39 |
+
|
40 |
+
@dataclass(frozen=False)
|
41 |
+
class DOMBaseNode:
|
42 |
+
is_visible: bool
|
43 |
+
# Use None as default and set parent later to avoid circular reference issues
|
44 |
+
parent: Optional['DOMElementNode']
|
45 |
+
|
46 |
+
|
47 |
+
@dataclass(frozen=False)
|
48 |
+
class DOMTextNode(DOMBaseNode):
|
49 |
+
text: str
|
50 |
+
type: str = 'TEXT_NODE'
|
51 |
+
|
52 |
+
def has_parent_with_highlight_index(self) -> bool:
|
53 |
+
current = self.parent
|
54 |
+
while current is not None:
|
55 |
+
# stop if the element has a highlight index (will be handled separately)
|
56 |
+
if current.highlight_index is not None:
|
57 |
+
return True
|
58 |
+
|
59 |
+
current = current.parent
|
60 |
+
return False
|
61 |
+
|
62 |
+
def is_parent_in_viewport(self) -> bool:
|
63 |
+
if self.parent is None:
|
64 |
+
return False
|
65 |
+
return self.parent.is_in_viewport
|
66 |
+
|
67 |
+
def is_parent_top_element(self) -> bool:
|
68 |
+
if self.parent is None:
|
69 |
+
return False
|
70 |
+
return self.parent.is_top_element
|
71 |
+
|
72 |
+
|
73 |
+
@dataclass(frozen=False)
|
74 |
+
class DOMElementNode(DOMBaseNode):
|
75 |
+
"""
|
76 |
+
xpath: the xpath of the element from the last root node (shadow root or iframe OR document if no shadow root or iframe).
|
77 |
+
To properly reference the element we need to recursively switch the root node until we find the element (work you way up the tree with `.parent`)
|
78 |
+
"""
|
79 |
+
|
80 |
+
tag_name: str
|
81 |
+
xpath: str
|
82 |
+
attributes: Dict[str, str]
|
83 |
+
children: List[DOMBaseNode]
|
84 |
+
is_interactive: bool = False
|
85 |
+
is_top_element: bool = False
|
86 |
+
is_in_viewport: bool = False
|
87 |
+
shadow_root: bool = False
|
88 |
+
highlight_index: Optional[int] = None
|
89 |
+
viewport_coordinates: Optional[CoordinateSet] = None
|
90 |
+
page_coordinates: Optional[CoordinateSet] = None
|
91 |
+
viewport_info: Optional[ViewportInfo] = None
|
92 |
+
|
93 |
+
def __repr__(self) -> str:
|
94 |
+
tag_str = f'<{self.tag_name}'
|
95 |
+
|
96 |
+
# Add attributes
|
97 |
+
for key, value in self.attributes.items():
|
98 |
+
tag_str += f' {key}="{value}"'
|
99 |
+
tag_str += '>'
|
100 |
+
|
101 |
+
# Add extra info
|
102 |
+
extras = []
|
103 |
+
if self.is_interactive:
|
104 |
+
extras.append('interactive')
|
105 |
+
if self.is_top_element:
|
106 |
+
extras.append('top')
|
107 |
+
if self.shadow_root:
|
108 |
+
extras.append('shadow-root')
|
109 |
+
if self.highlight_index is not None:
|
110 |
+
extras.append(f'highlight:{self.highlight_index}')
|
111 |
+
if self.is_in_viewport:
|
112 |
+
extras.append('in-viewport')
|
113 |
+
|
114 |
+
if extras:
|
115 |
+
tag_str += f' [{", ".join(extras)}]'
|
116 |
+
|
117 |
+
return tag_str
|
118 |
+
|
119 |
+
def get_all_text_till_next_clickable_element(self, max_depth: int = -1) -> str:
|
120 |
+
text_parts = []
|
121 |
+
|
122 |
+
def collect_text(node: DOMBaseNode, current_depth: int) -> None:
|
123 |
+
if max_depth != -1 and current_depth > max_depth:
|
124 |
+
return
|
125 |
+
|
126 |
+
# Skip this branch if we hit a highlighted element (except for the current node)
|
127 |
+
if isinstance(node, DOMElementNode) and node != self and node.highlight_index is not None:
|
128 |
+
return
|
129 |
+
|
130 |
+
if isinstance(node, DOMTextNode):
|
131 |
+
text_parts.append(node.text)
|
132 |
+
elif isinstance(node, DOMElementNode):
|
133 |
+
for child in node.children:
|
134 |
+
collect_text(child, current_depth + 1)
|
135 |
+
|
136 |
+
collect_text(self, 0)
|
137 |
+
return '\n'.join(text_parts).strip()
|
138 |
+
|
139 |
+
def clickable_elements_to_string(self, include_attributes: list[str] | None = None) -> str:
|
140 |
+
"""Convert the processed DOM content to HTML."""
|
141 |
+
formatted_text = []
|
142 |
+
|
143 |
+
def process_node(node: DOMBaseNode, depth: int) -> None:
|
144 |
+
if isinstance(node, DOMElementNode):
|
145 |
+
# Add element with highlight_index
|
146 |
+
if node.highlight_index is not None:
|
147 |
+
attributes_str = ''
|
148 |
+
text = node.get_all_text_till_next_clickable_element()
|
149 |
+
if include_attributes:
|
150 |
+
attributes = list(
|
151 |
+
set(
|
152 |
+
[
|
153 |
+
str(value)
|
154 |
+
for key, value in node.attributes.items()
|
155 |
+
if key in include_attributes and value != node.tag_name
|
156 |
+
]
|
157 |
+
)
|
158 |
+
)
|
159 |
+
if text in attributes:
|
160 |
+
attributes.remove(text)
|
161 |
+
attributes_str = ';'.join(attributes)
|
162 |
+
line = f'[{node.highlight_index}]<{node.tag_name} '
|
163 |
+
if attributes_str:
|
164 |
+
line += f'{attributes_str}'
|
165 |
+
if text:
|
166 |
+
if attributes_str:
|
167 |
+
line += f'>{text}'
|
168 |
+
else:
|
169 |
+
line += f'{text}'
|
170 |
+
line += '/>'
|
171 |
+
formatted_text.append(line)
|
172 |
+
|
173 |
+
# Process children regardless
|
174 |
+
for child in node.children:
|
175 |
+
process_node(child, depth + 1)
|
176 |
+
|
177 |
+
elif isinstance(node, DOMTextNode):
|
178 |
+
# Add text only if it doesn't have a highlighted parent
|
179 |
+
if not node.has_parent_with_highlight_index() and node.is_visible: # and node.is_parent_top_element()
|
180 |
+
formatted_text.append(f'{node.text}')
|
181 |
+
|
182 |
+
process_node(self, 0)
|
183 |
+
return '\n'.join(formatted_text)
|
184 |
+
|
185 |
+
def get_file_upload_element(self, check_siblings: bool = True) -> Optional['DOMElementNode']:
|
186 |
+
# Check if current element is a file input
|
187 |
+
if self.tag_name == 'input' and self.attributes.get('type') == 'file':
|
188 |
+
return self
|
189 |
+
|
190 |
+
# Check children
|
191 |
+
for child in self.children:
|
192 |
+
if isinstance(child, DOMElementNode):
|
193 |
+
result = child.get_file_upload_element(check_siblings=False)
|
194 |
+
if result:
|
195 |
+
return result
|
196 |
+
|
197 |
+
# Check siblings only for the initial call
|
198 |
+
if check_siblings and self.parent:
|
199 |
+
for sibling in self.parent.children:
|
200 |
+
if sibling is not self and isinstance(sibling, DOMElementNode):
|
201 |
+
result = sibling.get_file_upload_element(check_siblings=False)
|
202 |
+
if result:
|
203 |
+
return result
|
204 |
+
|
205 |
+
return None
|
206 |
+
|
207 |
+
|
208 |
+
class DomTree(BaseModel):
|
209 |
+
element_tree: DOMElementNode
|
210 |
+
element_map: Dict[int, DOMElementNode]
|
examples/tools/browsers/util/dom_build.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding: utf-8
|
2 |
+
|
3 |
+
# Derived from browser_use DomService, we use it as a utility method, and supports sync and async.
|
4 |
+
|
5 |
+
import gc
|
6 |
+
import json
|
7 |
+
|
8 |
+
from typing import Dict, Any, Tuple, Optional
|
9 |
+
|
10 |
+
from aworld.utils.async_func import async_func
|
11 |
+
from examples.tools.browsers.util.dom import DOMElementNode, DOMBaseNode, DOMTextNode, ViewportInfo
|
12 |
+
from aworld.logs.util import logger
|
13 |
+
|
14 |
+
|
15 |
+
async def async_build_dom_tree(page, js_code: str, args: Dict[str, Any]) -> Tuple[DOMElementNode, Dict[int, DOMElementNode]]:
|
16 |
+
if await page.evaluate('1+1') != 2:
|
17 |
+
raise ValueError('The page cannot evaluate javascript code properly')
|
18 |
+
|
19 |
+
# NOTE: We execute JS code in the browser to extract important DOM information.
|
20 |
+
# The returned hash map contains information about the DOM tree and the
|
21 |
+
# relationship between the DOM elements.
|
22 |
+
try:
|
23 |
+
eval_page = await page.evaluate(js_code, args)
|
24 |
+
except Exception as e:
|
25 |
+
logger.error('Error evaluating JavaScript: %s', e)
|
26 |
+
raise
|
27 |
+
|
28 |
+
# Only log performance metrics in debug mode
|
29 |
+
if args.get("debugMode") and 'perfMetrics' in eval_page:
|
30 |
+
logger.debug('DOM Tree Building Performance Metrics:\n%s', json.dumps(eval_page['perfMetrics'], indent=2))
|
31 |
+
|
32 |
+
return await async_func(_construct_dom_tree)(eval_page)
|
33 |
+
|
34 |
+
|
35 |
+
def build_dom_tree(page, js_code: str, args: Dict[str, Any]) -> Tuple[DOMElementNode, Dict[int, DOMElementNode]]:
|
36 |
+
if page.evaluate('1+1') != 2:
|
37 |
+
raise ValueError('The page cannot evaluate javascript code properly')
|
38 |
+
|
39 |
+
# NOTE: We execute JS code in the browser to extract important DOM information.
|
40 |
+
# The returned hash map contains information about the DOM tree and the
|
41 |
+
# relationship between the DOM elements.
|
42 |
+
try:
|
43 |
+
eval_page = page.evaluate(js_code, args)
|
44 |
+
except Exception as e:
|
45 |
+
logger.error('Error evaluating JavaScript: %s', e)
|
46 |
+
raise
|
47 |
+
|
48 |
+
# Only log performance metrics in debug mode
|
49 |
+
if args.get("debugMode") and 'perfMetrics' in eval_page:
|
50 |
+
logger.debug('DOM Tree Building Performance Metrics:\n%s', json.dumps(eval_page['perfMetrics'], indent=2))
|
51 |
+
|
52 |
+
return _construct_dom_tree(eval_page)
|
53 |
+
|
54 |
+
|
55 |
+
def _construct_dom_tree(eval_page: dict, ) -> tuple[DOMElementNode, Dict[int, DOMElementNode]]:
|
56 |
+
js_node_map = eval_page['map']
|
57 |
+
js_root_id = eval_page['rootId']
|
58 |
+
|
59 |
+
selector_map = {}
|
60 |
+
node_map = {}
|
61 |
+
|
62 |
+
for id, node_data in js_node_map.items():
|
63 |
+
node, children_ids = _parse_node(node_data)
|
64 |
+
if node is None:
|
65 |
+
continue
|
66 |
+
|
67 |
+
node_map[id] = node
|
68 |
+
|
69 |
+
if isinstance(node, DOMElementNode) and node.highlight_index is not None:
|
70 |
+
selector_map[node.highlight_index] = node
|
71 |
+
|
72 |
+
# NOTE: We know that we are building the tree bottom up
|
73 |
+
# and all children are already processed.
|
74 |
+
if isinstance(node, DOMElementNode):
|
75 |
+
for child_id in children_ids:
|
76 |
+
if child_id not in node_map:
|
77 |
+
continue
|
78 |
+
|
79 |
+
child_node = node_map[child_id]
|
80 |
+
|
81 |
+
child_node.parent = node
|
82 |
+
node.children.append(child_node)
|
83 |
+
|
84 |
+
html_to_dict = node_map[str(js_root_id)]
|
85 |
+
|
86 |
+
del node_map
|
87 |
+
del js_node_map
|
88 |
+
del js_root_id
|
89 |
+
|
90 |
+
gc.collect()
|
91 |
+
|
92 |
+
if html_to_dict is None or not isinstance(html_to_dict, DOMElementNode):
|
93 |
+
raise ValueError('Failed to parse HTML to dictionary')
|
94 |
+
|
95 |
+
return html_to_dict, selector_map
|
96 |
+
|
97 |
+
|
98 |
+
def _parse_node(node_data: dict, ) -> Tuple[Optional[DOMBaseNode], list[int]]:
|
99 |
+
if not node_data:
|
100 |
+
return None, []
|
101 |
+
|
102 |
+
# Process text nodes immediately
|
103 |
+
if node_data.get('type') == 'TEXT_NODE':
|
104 |
+
text_node = DOMTextNode(
|
105 |
+
text=node_data['text'],
|
106 |
+
is_visible=node_data['isVisible'],
|
107 |
+
parent=None,
|
108 |
+
)
|
109 |
+
return text_node, []
|
110 |
+
|
111 |
+
# Process coordinates if they exist for element nodes
|
112 |
+
|
113 |
+
viewport_info = None
|
114 |
+
|
115 |
+
if 'viewport' in node_data:
|
116 |
+
viewport_info = ViewportInfo(
|
117 |
+
width=node_data['viewport']['width'],
|
118 |
+
height=node_data['viewport']['height'],
|
119 |
+
)
|
120 |
+
|
121 |
+
element_node = DOMElementNode(
|
122 |
+
tag_name=node_data['tagName'],
|
123 |
+
xpath=node_data['xpath'],
|
124 |
+
attributes=node_data.get('attributes', {}),
|
125 |
+
children=[],
|
126 |
+
is_visible=node_data.get('isVisible', False),
|
127 |
+
is_interactive=node_data.get('isInteractive', False),
|
128 |
+
is_top_element=node_data.get('isTopElement', False),
|
129 |
+
is_in_viewport=node_data.get('isInViewport', False),
|
130 |
+
highlight_index=node_data.get('highlightIndex'),
|
131 |
+
shadow_root=node_data.get('shadowRoot', False),
|
132 |
+
parent=None,
|
133 |
+
viewport_info=viewport_info,
|
134 |
+
)
|
135 |
+
|
136 |
+
children_ids = node_data.get('children', [])
|
137 |
+
|
138 |
+
return element_node, children_ids
|