Final_Assignment_AWorld

Sleeping

App Files Files Community

Final_Assignment_AWorld / examples /tools /browsers /util /dom.py

Duibonduil

Upload 2 files

f75ed7d verified about 2 months ago

raw

history blame contribute delete

6.98 kB

	# coding: utf-8

	from dataclasses import dataclass
	from typing import Optional, Dict, List

	from pydantic import BaseModel


	class Coordinates(BaseModel):
	x: int
	y: int


	class CoordinateSet(BaseModel):
	top_left: Coordinates
	top_right: Coordinates
	bottom_left: Coordinates
	bottom_right: Coordinates
	center: Coordinates
	width: int
	height: int


	class ViewportInfo(BaseModel):
	width: int
	height: int


	@dataclass
	class HashedDomElement:
	"""
	Hash of the dom element to be used as a unique identifier
	"""

	branch_path_hash: str
	attributes_hash: str
	xpath_hash: str


	@dataclass(frozen=False)
	class DOMBaseNode:
	is_visible: bool
	# Use None as default and set parent later to avoid circular reference issues
	parent: Optional['DOMElementNode']


	@dataclass(frozen=False)
	class DOMTextNode(DOMBaseNode):
	text: str
	type: str = 'TEXT_NODE'

	def has_parent_with_highlight_index(self) -> bool:
	current = self.parent
	while current is not None:
	# stop if the element has a highlight index (will be handled separately)
	if current.highlight_index is not None:
	return True

	current = current.parent
	return False

	def is_parent_in_viewport(self) -> bool:
	if self.parent is None:
	return False
	return self.parent.is_in_viewport

	def is_parent_top_element(self) -> bool:
	if self.parent is None:
	return False
	return self.parent.is_top_element


	@dataclass(frozen=False)
	class DOMElementNode(DOMBaseNode):
	"""
	xpath: the xpath of the element from the last root node (shadow root or iframe OR document if no shadow root or iframe).
	To properly reference the element we need to recursively switch the root node until we find the element (work you way up the tree with `.parent`)
	"""

	tag_name: str
	xpath: str
	attributes: Dict[str, str]
	children: List[DOMBaseNode]
	is_interactive: bool = False
	is_top_element: bool = False
	is_in_viewport: bool = False
	shadow_root: bool = False
	highlight_index: Optional[int] = None
	viewport_coordinates: Optional[CoordinateSet] = None
	page_coordinates: Optional[CoordinateSet] = None
	viewport_info: Optional[ViewportInfo] = None

	def __repr__(self) -> str:
	tag_str = f'<{self.tag_name}'

	# Add attributes
	for key, value in self.attributes.items():
	tag_str += f' {key}="{value}"'
	tag_str += '>'

	# Add extra info
	extras = []
	if self.is_interactive:
	extras.append('interactive')
	if self.is_top_element:
	extras.append('top')
	if self.shadow_root:
	extras.append('shadow-root')
	if self.highlight_index is not None:
	extras.append(f'highlight:{self.highlight_index}')
	if self.is_in_viewport:
	extras.append('in-viewport')

	if extras:
	tag_str += f' [{", ".join(extras)}]'

	return tag_str

	def get_all_text_till_next_clickable_element(self, max_depth: int = -1) -> str:
	text_parts = []

	def collect_text(node: DOMBaseNode, current_depth: int) -> None:
	if max_depth != -1 and current_depth > max_depth:
	return

	# Skip this branch if we hit a highlighted element (except for the current node)
	if isinstance(node, DOMElementNode) and node != self and node.highlight_index is not None:
	return

	if isinstance(node, DOMTextNode):
	text_parts.append(node.text)
	elif isinstance(node, DOMElementNode):
	for child in node.children:
	collect_text(child, current_depth + 1)

	collect_text(self, 0)
	return '\n'.join(text_parts).strip()

	def clickable_elements_to_string(self, include_attributes: list[str] \| None = None) -> str:
	"""Convert the processed DOM content to HTML."""
	formatted_text = []

	def process_node(node: DOMBaseNode, depth: int) -> None:
	if isinstance(node, DOMElementNode):
	# Add element with highlight_index
	if node.highlight_index is not None:
	attributes_str = ''
	text = node.get_all_text_till_next_clickable_element()
	if include_attributes:
	attributes = list(
	set(
	[
	str(value)
	for key, value in node.attributes.items()
	if key in include_attributes and value != node.tag_name
	]
	)
	)
	if text in attributes:
	attributes.remove(text)
	attributes_str = ';'.join(attributes)
	line = f'[{node.highlight_index}]<{node.tag_name} '
	if attributes_str:
	line += f'{attributes_str}'
	if text:
	if attributes_str:
	line += f'>{text}'
	else:
	line += f'{text}'
	line += '/>'
	formatted_text.append(line)

	# Process children regardless
	for child in node.children:
	process_node(child, depth + 1)

	elif isinstance(node, DOMTextNode):
	# Add text only if it doesn't have a highlighted parent
	if not node.has_parent_with_highlight_index() and node.is_visible: # and node.is_parent_top_element()
	formatted_text.append(f'{node.text}')

	process_node(self, 0)
	return '\n'.join(formatted_text)

	def get_file_upload_element(self, check_siblings: bool = True) -> Optional['DOMElementNode']:
	# Check if current element is a file input
	if self.tag_name == 'input' and self.attributes.get('type') == 'file':
	return self

	# Check children
	for child in self.children:
	if isinstance(child, DOMElementNode):
	result = child.get_file_upload_element(check_siblings=False)
	if result:
	return result

	# Check siblings only for the initial call
	if check_siblings and self.parent:
	for sibling in self.parent.children:
	if sibling is not self and isinstance(sibling, DOMElementNode):
	result = sibling.get_file_upload_element(check_siblings=False)
	if result:
	return result

	return None


	class DomTree(BaseModel):
	element_tree: DOMElementNode
	element_map: Dict[int, DOMElementNode]