Final_Assignment_AWorld

Sleeping

App Files Files Community

Duibonduil commited on Jun 30

Commit

f75ed7d

verified ·

1 Parent(s): 093728b

Upload 2 files

Browse files

Files changed (2) hide show

examples/tools/browsers/util/dom.py +210 -0
examples/tools/browsers/util/dom_build.py +138 -0

examples/tools/browsers/util/dom.py ADDED Viewed

	@@ -0,0 +1,210 @@

+# coding: utf-8
+from dataclasses import dataclass
+from typing import Optional, Dict, List
+from pydantic import BaseModel
+class Coordinates(BaseModel):
+    x: int
+    y: int
+class CoordinateSet(BaseModel):
+    top_left: Coordinates
+    top_right: Coordinates
+    bottom_left: Coordinates
+    bottom_right: Coordinates
+    center: Coordinates
+    width: int
+    height: int
+class ViewportInfo(BaseModel):
+    width: int
+    height: int
+@dataclass
+class HashedDomElement:
+    """
+    Hash of the dom element to be used as a unique identifier
+    """
+    branch_path_hash: str
+    attributes_hash: str
+    xpath_hash: str
+@dataclass(frozen=False)
+class DOMBaseNode:
+    is_visible: bool
+    # Use None as default and set parent later to avoid circular reference issues
+    parent: Optional['DOMElementNode']
+@dataclass(frozen=False)
+class DOMTextNode(DOMBaseNode):
+    text: str
+    type: str = 'TEXT_NODE'
+    def has_parent_with_highlight_index(self) -> bool:
+        current = self.parent
+        while current is not None:
+            # stop if the element has a highlight index (will be handled separately)
+            if current.highlight_index is not None:
+                return True
+            current = current.parent
+        return False
+    def is_parent_in_viewport(self) -> bool:
+        if self.parent is None:
+            return False
+        return self.parent.is_in_viewport
+    def is_parent_top_element(self) -> bool:
+        if self.parent is None:
+            return False
+        return self.parent.is_top_element
+@dataclass(frozen=False)
+class DOMElementNode(DOMBaseNode):
+    """
+    xpath: the xpath of the element from the last root node (shadow root or iframe OR document if no shadow root or iframe).
+    To properly reference the element we need to recursively switch the root node until we find the element (work you way up the tree with `.parent`)
+    """
+    tag_name: str
+    xpath: str
+    attributes: Dict[str, str]
+    children: List[DOMBaseNode]
+    is_interactive: bool = False
+    is_top_element: bool = False
+    is_in_viewport: bool = False
+    shadow_root: bool = False
+    highlight_index: Optional[int] = None
+    viewport_coordinates: Optional[CoordinateSet] = None
+    page_coordinates: Optional[CoordinateSet] = None
+    viewport_info: Optional[ViewportInfo] = None
+    def __repr__(self) -> str:
+        tag_str = f'<{self.tag_name}'
+        # Add attributes
+        for key, value in self.attributes.items():
+            tag_str += f' {key}="{value}"'
+        tag_str += '>'
+        # Add extra info
+        extras = []
+        if self.is_interactive:
+            extras.append('interactive')
+        if self.is_top_element:
+            extras.append('top')
+        if self.shadow_root:
+            extras.append('shadow-root')
+        if self.highlight_index is not None:
+            extras.append(f'highlight:{self.highlight_index}')
+        if self.is_in_viewport:
+            extras.append('in-viewport')
+        if extras:
+            tag_str += f' [{", ".join(extras)}]'
+        return tag_str
+    def get_all_text_till_next_clickable_element(self, max_depth: int = -1) -> str:
+        text_parts = []
+        def collect_text(node: DOMBaseNode, current_depth: int) -> None:
+            if max_depth != -1 and current_depth > max_depth:
+                return
+            # Skip this branch if we hit a highlighted element (except for the current node)
+            if isinstance(node, DOMElementNode) and node != self and node.highlight_index is not None:
+                return
+            if isinstance(node, DOMTextNode):
+                text_parts.append(node.text)
+            elif isinstance(node, DOMElementNode):
+                for child in node.children:
+                    collect_text(child, current_depth + 1)
+        collect_text(self, 0)
+        return '\n'.join(text_parts).strip()
+    def clickable_elements_to_string(self, include_attributes: list[str] | None = None) -> str:
+        """Convert the processed DOM content to HTML."""
+        formatted_text = []
+        def process_node(node: DOMBaseNode, depth: int) -> None:
+            if isinstance(node, DOMElementNode):
+                # Add element with highlight_index
+                if node.highlight_index is not None:
+                    attributes_str = ''
+                    text = node.get_all_text_till_next_clickable_element()
+                    if include_attributes:
+                        attributes = list(
+                            set(
+                                [
+                                    str(value)
+                                    for key, value in node.attributes.items()
+                                    if key in include_attributes and value != node.tag_name
+                                ]
+                            )
+                        )
+                        if text in attributes:
+                            attributes.remove(text)
+                        attributes_str = ';'.join(attributes)
+                    line = f'[{node.highlight_index}]<{node.tag_name} '
+                    if attributes_str:
+                        line += f'{attributes_str}'
+                    if text:
+                        if attributes_str:
+                            line += f'>{text}'
+                        else:
+                            line += f'{text}'
+                    line += '/>'
+                    formatted_text.append(line)
+                # Process children regardless
+                for child in node.children:
+                    process_node(child, depth + 1)
+            elif isinstance(node, DOMTextNode):
+                # Add text only if it doesn't have a highlighted parent
+                if not node.has_parent_with_highlight_index() and node.is_visible:  # and node.is_parent_top_element()
+                    formatted_text.append(f'{node.text}')
+        process_node(self, 0)
+        return '\n'.join(formatted_text)
+    def get_file_upload_element(self, check_siblings: bool = True) -> Optional['DOMElementNode']:
+        # Check if current element is a file input
+        if self.tag_name == 'input' and self.attributes.get('type') == 'file':
+            return self
+        # Check children
+        for child in self.children:
+            if isinstance(child, DOMElementNode):
+                result = child.get_file_upload_element(check_siblings=False)
+                if result:
+                    return result
+        # Check siblings only for the initial call
+        if check_siblings and self.parent:
+            for sibling in self.parent.children:
+                if sibling is not self and isinstance(sibling, DOMElementNode):
+                    result = sibling.get_file_upload_element(check_siblings=False)
+                    if result:
+                        return result
+        return None
+class DomTree(BaseModel):
+    element_tree: DOMElementNode
+    element_map: Dict[int, DOMElementNode]

examples/tools/browsers/util/dom_build.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# coding: utf-8
+# Derived from browser_use DomService, we use it as a utility method, and supports sync and async.
+import gc
+import json
+from typing import Dict, Any, Tuple, Optional
+from aworld.utils.async_func import async_func
+from examples.tools.browsers.util.dom import DOMElementNode, DOMBaseNode, DOMTextNode, ViewportInfo
+from aworld.logs.util import logger
+async def async_build_dom_tree(page, js_code: str, args: Dict[str, Any]) -> Tuple[DOMElementNode, Dict[int, DOMElementNode]]:
+    if await page.evaluate('1+1') != 2:
+        raise ValueError('The page cannot evaluate javascript code properly')
+    # NOTE: We execute JS code in the browser to extract important DOM information.
+    #       The returned hash map contains information about the DOM tree and the
+    #       relationship between the DOM elements.
+    try:
+        eval_page = await page.evaluate(js_code, args)
+    except Exception as e:
+        logger.error('Error evaluating JavaScript: %s', e)
+        raise
+    # Only log performance metrics in debug mode
+    if args.get("debugMode") and 'perfMetrics' in eval_page:
+        logger.debug('DOM Tree Building Performance Metrics:\n%s', json.dumps(eval_page['perfMetrics'], indent=2))
+    return await async_func(_construct_dom_tree)(eval_page)
+def build_dom_tree(page, js_code: str, args: Dict[str, Any]) -> Tuple[DOMElementNode, Dict[int, DOMElementNode]]:
+    if page.evaluate('1+1') != 2:
+        raise ValueError('The page cannot evaluate javascript code properly')
+    # NOTE: We execute JS code in the browser to extract important DOM information.
+    #       The returned hash map contains information about the DOM tree and the
+    #       relationship between the DOM elements.
+    try:
+        eval_page = page.evaluate(js_code, args)
+    except Exception as e:
+        logger.error('Error evaluating JavaScript: %s', e)
+        raise
+    # Only log performance metrics in debug mode
+    if args.get("debugMode") and 'perfMetrics' in eval_page:
+        logger.debug('DOM Tree Building Performance Metrics:\n%s', json.dumps(eval_page['perfMetrics'], indent=2))
+    return _construct_dom_tree(eval_page)
+def _construct_dom_tree(eval_page: dict, ) -> tuple[DOMElementNode, Dict[int, DOMElementNode]]:
+    js_node_map = eval_page['map']
+    js_root_id = eval_page['rootId']
+    selector_map = {}
+    node_map = {}
+    for id, node_data in js_node_map.items():
+        node, children_ids = _parse_node(node_data)
+        if node is None:
+            continue
+        node_map[id] = node
+        if isinstance(node, DOMElementNode) and node.highlight_index is not None:
+            selector_map[node.highlight_index] = node
+        # NOTE: We know that we are building the tree bottom up
+        #       and all children are already processed.
+        if isinstance(node, DOMElementNode):
+            for child_id in children_ids:
+                if child_id not in node_map:
+                    continue
+                child_node = node_map[child_id]
+                child_node.parent = node
+                node.children.append(child_node)
+    html_to_dict = node_map[str(js_root_id)]
+    del node_map
+    del js_node_map
+    del js_root_id
+    gc.collect()
+    if html_to_dict is None or not isinstance(html_to_dict, DOMElementNode):
+        raise ValueError('Failed to parse HTML to dictionary')
+    return html_to_dict, selector_map
+def _parse_node(node_data: dict, ) -> Tuple[Optional[DOMBaseNode], list[int]]:
+    if not node_data:
+        return None, []
+    # Process text nodes immediately
+    if node_data.get('type') == 'TEXT_NODE':
+        text_node = DOMTextNode(
+            text=node_data['text'],
+            is_visible=node_data['isVisible'],
+            parent=None,
+        )
+        return text_node, []
+    # Process coordinates if they exist for element nodes
+    viewport_info = None
+    if 'viewport' in node_data:
+        viewport_info = ViewportInfo(
+            width=node_data['viewport']['width'],
+            height=node_data['viewport']['height'],
+        )
+    element_node = DOMElementNode(
+        tag_name=node_data['tagName'],
+        xpath=node_data['xpath'],
+        attributes=node_data.get('attributes', {}),
+        children=[],
+        is_visible=node_data.get('isVisible', False),
+        is_interactive=node_data.get('isInteractive', False),
+        is_top_element=node_data.get('isTopElement', False),
+        is_in_viewport=node_data.get('isInViewport', False),
+        highlight_index=node_data.get('highlightIndex'),
+        shadow_root=node_data.get('shadowRoot', False),
+        parent=None,
+        viewport_info=viewport_info,
+    )
+    children_ids = node_data.get('children', [])
+    return element_node, children_ids