Duibonduil commited on
Commit
ed1558c
·
verified ·
1 Parent(s): f1e544e

Upload 3 files

Browse files
examples/tools/android/action/actions.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # Copyright (c) 2025 inclusionAI.
3
+
4
+ import json
5
+
6
+ from examples.tools.tool_action import AndroidAction
7
+ from aworld.core.tool.action_factory import ActionFactory
8
+ from aworld.core.common import ActionModel, ActionResult
9
+ from examples.tools.android.action.adb_controller import ADBController
10
+ from examples.tools.android.config.android_action_space import AndroidActionParamEnum
11
+ from aworld.core.tool.action import ExecutableAction
12
+
13
+
14
+ @ActionFactory.register(name=AndroidAction.TAP.value.name,
15
+ desc=AndroidAction.TAP.value.desc,
16
+ tool_name="android")
17
+ class Tap(ExecutableAction):
18
+ def act(self, action: ActionModel, **kwargs) -> ActionResult:
19
+ controller: ADBController = kwargs.get('controller')
20
+ tap_index = action.params[AndroidActionParamEnum.TAP_INDEX.value]
21
+ if tap_index is None:
22
+ raise Exception(f'Invalid action: {action}')
23
+ controller.tap(tap_index)
24
+ return ActionResult(content="", keep=True)
25
+
26
+
27
+ @ActionFactory.register(name=AndroidAction.INPUT_TEXT.value.name,
28
+ desc=AndroidAction.INPUT_TEXT.value.desc,
29
+ tool_name="android")
30
+ class InputText(ExecutableAction):
31
+ def act(self, action: ActionModel, **kwargs) -> ActionResult:
32
+ controller: ADBController = kwargs.get('controller')
33
+ input_text = action.params[AndroidActionParamEnum.INPUT_TEXT.value]
34
+ if input_text is None:
35
+ raise Exception(f'Invalid action: {action}')
36
+ controller.text(input_text)
37
+ return ActionResult(content="", keep=True)
38
+
39
+
40
+ @ActionFactory.register(name=AndroidAction.LONG_PRESS.value.name,
41
+ desc=AndroidAction.LONG_PRESS.value.desc,
42
+ tool_name="android")
43
+ class LongPress(ExecutableAction):
44
+ def act(self, action: ActionModel, **kwargs) -> ActionResult:
45
+ controller: ADBController = kwargs.get('controller')
46
+ long_press_index = action.params[AndroidActionParamEnum.LONG_PRESS_INDEX.value]
47
+ if long_press_index is None:
48
+ raise Exception(f'Invalid action: {action}')
49
+ controller.long_press(long_press_index)
50
+ return ActionResult(content="", keep=True)
51
+
52
+
53
+ @ActionFactory.register(name=AndroidAction.SWIPE.value.name,
54
+ desc=AndroidAction.SWIPE.value.desc,
55
+ tool_name="android")
56
+ class Swipe(ExecutableAction):
57
+ def act(self, action: ActionModel, **kwargs) -> ActionResult:
58
+ controller: ADBController = kwargs.get('controller')
59
+ swipe_start_index = action.params[AndroidActionParamEnum.SWIPE_START_INDEX.value]
60
+ direction = action.params[AndroidActionParamEnum.DIRECTION.value]
61
+ dist = action.params.get(AndroidActionParamEnum.DIST.value, None)
62
+ if swipe_start_index is None or direction is None:
63
+ raise Exception(f'Invalid action: {action}')
64
+ if dist:
65
+ controller.swipe(swipe_start_index, direction, dist)
66
+ else:
67
+ controller.swipe(swipe_start_index, direction)
68
+ return ActionResult(content="", keep=True)
69
+
70
+
71
+ @ActionFactory.register(name=AndroidAction.DONE.value.name,
72
+ desc=AndroidAction.DONE.value.desc,
73
+ tool_name="android")
74
+ class Done(ExecutableAction):
75
+ def act(self, action: ActionModel, **kwargs) -> ActionResult:
76
+ output_dict = action.model_dump(exclude={'success'})
77
+ return ActionResult(is_done=True, success=True, content=json.dumps(output_dict))
examples/tools/android/action/adb_controller.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # Copyright (c) 2025 inclusionAI.
3
+
4
+ import subprocess
5
+ import time
6
+ import re
7
+ import traceback
8
+ from time import sleep
9
+ from typing import Optional, Tuple, List
10
+ import base64
11
+
12
+ import xml.etree.ElementTree as ET
13
+ import os
14
+
15
+ from aworld.logs.util import logger, color_log, Color
16
+ from aworld.utils import import_package
17
+
18
+ configs = {"MIN_DIST": 30}
19
+
20
+
21
+ class AndroidElement:
22
+ def __init__(self, uid, bbox, attrib):
23
+ self.uid = uid
24
+ self.bbox = bbox
25
+ self.attrib = attrib
26
+ import_package('cv2', install_name='opencv-python')
27
+ import_package('pyshine')
28
+
29
+ def get_id_from_element(elem):
30
+ bounds = elem.attrib["bounds"][1:-1].split("][")
31
+ x1, y1 = map(int, bounds[0].split(","))
32
+ x2, y2 = map(int, bounds[1].split(","))
33
+ elem_w, elem_h = x2 - x1, y2 - y1
34
+ if "resource-id" in elem.attrib and elem.attrib["resource-id"]:
35
+ elem_id = elem.attrib["resource-id"].replace(":", ".").replace("/", "_")
36
+ else:
37
+ elem_id = f"{elem.attrib['class']}_{elem_w}_{elem_h}"
38
+ if "content-desc" in elem.attrib and elem.attrib["content-desc"] and len(elem.attrib["content-desc"]) < 20:
39
+ content_desc = elem.attrib['content-desc'].replace("/", "_").replace(" ", "").replace(":", "_")
40
+ elem_id += f"_{content_desc}"
41
+ return elem_id
42
+
43
+
44
+ def traverse_tree(xml_path, elem_list, attrib, add_index=False):
45
+ path = []
46
+ for event, elem in ET.iterparse(xml_path, ['start', 'end']):
47
+ if event == 'start':
48
+ path.append(elem)
49
+ if attrib in elem.attrib and elem.attrib[attrib] == "true":
50
+ parent_prefix = ""
51
+ if len(path) > 1:
52
+ parent_elem = path[-2]
53
+ # Checks if the parent element has the required attributes
54
+ has_bounds = "bounds" in parent_elem.attrib
55
+ has_rid_or_class = "resource-id" in parent_elem.attrib or "class" in parent_elem.attrib
56
+ if has_bounds and has_rid_or_class:
57
+ parent_prefix = get_id_from_element(parent_elem)
58
+ bounds = elem.attrib["bounds"][1:-1].split("][")
59
+ x1, y1 = map(int, bounds[0].split(","))
60
+ x2, y2 = map(int, bounds[1].split(","))
61
+ center = (x1 + x2) // 2, (y1 + y2) // 2
62
+ elem_id = get_id_from_element(elem)
63
+ if parent_prefix:
64
+ elem_id = parent_prefix + "_" + elem_id
65
+ if add_index:
66
+ elem_id += f"_{elem.attrib['index']}"
67
+ close = False
68
+ for e in elem_list:
69
+ bbox = e.bbox
70
+ center_ = (bbox[0][0] + bbox[1][0]) // 2, (bbox[0][1] + bbox[1][1]) // 2
71
+ dist = (abs(center[0] - center_[0]) ** 2 + abs(center[1] - center_[1]) ** 2) ** 0.5
72
+ if dist <= configs["MIN_DIST"]:
73
+ close = True
74
+ break
75
+ if not close:
76
+ elem_list.append(AndroidElement(elem_id, ((x1, y1), (x2, y2)), attrib))
77
+
78
+ if event == 'end':
79
+ path.pop()
80
+
81
+
82
+ def create_directory_for_file(file_path):
83
+ # Extract the directory from the file path
84
+ directory = os.path.dirname(file_path)
85
+
86
+ # Check if the directory exists
87
+ if not os.path.exists(directory):
88
+ # Create the directory
89
+ os.makedirs(directory)
90
+ # Print the absolute path of the directory
91
+ absolute_directory_path = os.path.abspath(directory)
92
+ logger.info(f"Directory absolute path: {absolute_directory_path}")
93
+
94
+
95
+ def draw_bbox_multi(img_path, output_path, elem_list):
96
+ import cv2
97
+ import pyshine as ps
98
+
99
+ imgcv = cv2.imread(img_path)
100
+ count = 1
101
+ for elem in elem_list:
102
+ try:
103
+ top_left = elem.bbox[0]
104
+ bottom_right = elem.bbox[1]
105
+ left, top = top_left[0], top_left[1]
106
+ right, bottom = bottom_right[0], bottom_right[1]
107
+
108
+ # draw rectangle
109
+ cv2.rectangle(imgcv,
110
+ (left, top),
111
+ (right, bottom),
112
+ (0, 0, 221),
113
+ 3)
114
+
115
+ label = str(count)
116
+ imgcv = ps.putBText(imgcv, label, text_offset_x=(left + right) // 2 + 10,
117
+ text_offset_y=(top + bottom) // 2 + 10,
118
+ vspace=10, hspace=10, font_scale=1, thickness=2, background_RGB=(221, 0, 0),
119
+ text_RGB=(255, 255, 255), alpha=0.0)
120
+
121
+ except Exception as e:
122
+ color_log(f"ERROR: An exception occurs while labeling the image\n{e}", Color.red)
123
+ logger.info(traceback.print_exc())
124
+ count += 1
125
+ cv2.imwrite(output_path, imgcv)
126
+ return imgcv
127
+
128
+
129
+ def draw_grid(img_path, output_path):
130
+ import cv2
131
+
132
+ def get_unit_len(n):
133
+ for i in range(1, n + 1):
134
+ if n % i == 0 and 120 <= i <= 180:
135
+ return i
136
+ return -1
137
+
138
+ image = cv2.imread(img_path)
139
+ height, width, _ = image.shape
140
+ color = (255, 116, 113)
141
+ unit_height = get_unit_len(height)
142
+ if unit_height < 0:
143
+ unit_height = 120
144
+ unit_width = get_unit_len(width)
145
+ if unit_width < 0:
146
+ unit_width = 120
147
+ thick = int(unit_width // 50)
148
+ rows = height // unit_height
149
+ cols = width // unit_width
150
+ for i in range(rows):
151
+ for j in range(cols):
152
+ label = i * cols + j + 1
153
+ left = int(j * unit_width)
154
+ top = int(i * unit_height)
155
+ right = int((j + 1) * unit_width)
156
+ bottom = int((i + 1) * unit_height)
157
+ cv2.rectangle(image, (left, top), (right, bottom), color, thick // 2)
158
+ cv2.putText(image, str(label), (left + int(unit_width * 0.05) + 3, top + int(unit_height * 0.3) + 3), 0,
159
+ int(0.01 * unit_width), (0, 0, 0), thick)
160
+ cv2.putText(image, str(label), (left + int(unit_width * 0.05), top + int(unit_height * 0.3)), 0,
161
+ int(0.01 * unit_width), color, thick)
162
+ cv2.imwrite(output_path, image)
163
+ return rows, cols
164
+
165
+
166
+ def encode_image(image_path):
167
+ with open(image_path, "rb") as image_file:
168
+ return base64.b64encode(image_file.read()).decode('utf-8')
169
+
170
+
171
+ class ADBController:
172
+ def __init__(self, avd_name: str = None,
173
+ adb_path: str = os.path.expanduser('~') + "/Library/Android/sdk/platform-tools/adb",
174
+ emulator_path: str = os.path.expanduser('~') + "/Library/Android/sdk/emulator/emulator",
175
+ timeout: int = 30):
176
+ self.avd_name = avd_name
177
+ self.adb_path = adb_path
178
+ self.emulator_path = emulator_path
179
+ self.timeout = timeout
180
+ self.emulator_process = None
181
+ self.device_serial = "emulator-5554" # default
182
+ self.current_elem_list = []
183
+ self.width, self.height = 0, 0
184
+
185
+ def start_emulator(self, avd_name: str = None, headless: bool = False,
186
+ max_retry: int = 2) -> bool:
187
+ avd = avd_name or self.avd_name
188
+ if not avd:
189
+ raise ValueError("AVD name must be specified")
190
+
191
+ for attempt in range(max_retry + 1):
192
+ if self._start_emulator_process(avd, headless):
193
+ if self._wait_for_device():
194
+ logger.info(f"start success,attempt count:{attempt + 1}")
195
+ self.width, self.height = self.get_screen_size()
196
+ return True
197
+ self.stop_emulator()
198
+ return False
199
+
200
+ def _start_emulator_process(self, avd: str, headless: bool) -> bool:
201
+ try:
202
+ cmd = [
203
+ self.emulator_path,
204
+ f"@{avd}",
205
+ "-no-snapshot",
206
+ "-no-audio",
207
+ "-gpu", "swiftshader",
208
+ "-wipe-data"
209
+ ]
210
+ if headless:
211
+ cmd.append("-no-window")
212
+
213
+ self.emulator_process = subprocess.Popen(
214
+ cmd,
215
+ stdout=subprocess.DEVNULL,
216
+ stderr=subprocess.STDOUT
217
+ )
218
+ return True
219
+ except Exception as e:
220
+ logger.warning(f"adb start fail: {str(e)}")
221
+ return False
222
+
223
+ def stop_emulator(self) -> bool:
224
+ try:
225
+ result = subprocess.run(
226
+ [self.adb_path, "-s", self.device_serial, "emu", "kill"],
227
+ timeout=self.timeout,
228
+ capture_output=True,
229
+ text=True
230
+ )
231
+ return "OK" in result.stdout
232
+ except subprocess.TimeoutExpired:
233
+ return False
234
+ finally:
235
+ if self.emulator_process:
236
+ self.emulator_process.terminate()
237
+
238
+ def execute_adb(self, command: list, device_serial: str = None) -> Tuple[bool, str]:
239
+ """execute adb command"""
240
+ device = device_serial or self.device_serial
241
+ full_cmd = [self.adb_path, "-s", device] + command
242
+
243
+ try:
244
+ result = subprocess.run(
245
+ full_cmd,
246
+ timeout=self.timeout,
247
+ check=True,
248
+ capture_output=True,
249
+ text=True
250
+ )
251
+ return True, result.stdout.strip()
252
+ except subprocess.CalledProcessError as e:
253
+ return False, f"Command failed: {e.stderr}"
254
+ except Exception as e:
255
+ return False, str(e)
256
+
257
+ def execute_adb_with_stdout(self, command: List[str]) -> Tuple[bool, Optional[str]]:
258
+ try:
259
+ result = subprocess.run(
260
+ ["adb", "-s", self.device_serial] + command,
261
+ stdout=subprocess.PIPE,
262
+ stderr=subprocess.PIPE,
263
+ text=True,
264
+ timeout=10
265
+ )
266
+ if result.returncode == 0:
267
+ return True, result.stdout.strip()
268
+ else:
269
+ return False, None
270
+ except subprocess.TimeoutExpired:
271
+ return False, None
272
+ except Exception as e:
273
+ return False, None
274
+
275
+ # ---------- device operate ----------
276
+
277
+ def screenshot(self, save_path: str) -> bool:
278
+ timestamp = int(time.time())
279
+ remote_path = f"/sdcard/screenshot_{timestamp}.png"
280
+
281
+ success, _ = self.execute_adb(["shell", "screencap", "-p", remote_path])
282
+ if not success:
283
+ return False
284
+
285
+ return self._pull_file(remote_path, save_path)
286
+
287
+ def dump_ui_xml(self, save_path: str) -> Optional[str]:
288
+ remote_path = "/sdcard/ui_dump.xml"
289
+ success, _ = self.execute_adb(["shell", "uiautomator", "dump", remote_path])
290
+ if not success:
291
+ logger.info("dump ui xml fail")
292
+ return None
293
+ success = self._pull_file(remote_path, save_path)
294
+ if not success:
295
+ logger.info("pull ui xml fail")
296
+ return None
297
+
298
+ with open(save_path, 'r', encoding='utf-8') as f:
299
+ xml_content = f.read()
300
+ return xml_content
301
+
302
+ def tap(self, element: int):
303
+ x, y = self.__get_element_center(element)
304
+ self.__tap_coordinate(x, y)
305
+
306
+ def text(self, text: str):
307
+ """
308
+ Input text, automatically replacing spaces with %s for proper ADB text input.
309
+
310
+ Parameters:
311
+ text: The text to input
312
+ """
313
+ # Replace spaces with %s for proper handling in ADB
314
+ formatted_text = text.replace(" ", "%s")
315
+ success, _ = self.execute_adb(["shell", "input", "text", formatted_text])
316
+ return success
317
+
318
+ def long_press(self, element: int):
319
+ x, y = self.__get_element_center(element)
320
+ self.__swipe_coordinate(x, y, x, y, 2000)
321
+
322
+ def swipe(self, element: int, direction: str, dist: str = "medium"):
323
+ """
324
+ Perform swipe operations based on screen element labels
325
+
326
+ Parameters:
327
+ element_tag: digital label displayed on the interface (1-based)
328
+ direction: swipe direction ["up", "down", "left", "right"]
329
+ dist: swipe distance ["short", "medium", "long"]
330
+ """
331
+
332
+ # 获取元素坐标
333
+ x, y = self.__get_element_center(element)
334
+
335
+ unit_dist = int(self.width / 10)
336
+ if dist == "long":
337
+ unit_dist *= 3
338
+ elif dist == "medium":
339
+ unit_dist *= 2
340
+ if direction == "up":
341
+ offset = 0, -2 * unit_dist
342
+ elif direction == "down":
343
+ offset = 0, 2 * unit_dist
344
+ elif direction == "left":
345
+ offset = -1 * unit_dist, 0
346
+ elif direction == "right":
347
+ offset = unit_dist, 0
348
+ else:
349
+ return False
350
+
351
+ self.__swipe_coordinate(x, y, x + offset[0], y + offset[1])
352
+
353
+ def screenshot_and_annotate(self, name_prefix=None, return_base64=True):
354
+ import cv2
355
+
356
+ """Collect screen information and mark interactive elements, and return data containing Base64 images"""
357
+ sleep(3)
358
+ if name_prefix is None:
359
+ name_prefix = str(time.time())
360
+ tmp_files_dir = os.path.join(os.path.dirname(__file__), "tmp_files")
361
+ os.makedirs(tmp_files_dir, exist_ok=True)
362
+ screenshot_path = os.path.join(tmp_files_dir, f"{name_prefix}_origin.png")
363
+ screenshot_res = self.screenshot(screenshot_path)
364
+ xml_path = os.path.join(tmp_files_dir, f"{name_prefix}.xml")
365
+ xml_res = self.dump_ui_xml(xml_path)
366
+ if screenshot_res == "ERROR" or xml_res is None:
367
+ logger.warning(f"Failed to take screenshot or read XML")
368
+ return None, None
369
+
370
+ # Parsing interactive elements
371
+ clickable_list = []
372
+ focusable_list = []
373
+ traverse_tree(xml_path, clickable_list, "clickable", True)
374
+ traverse_tree(xml_path, focusable_list, "focusable", True)
375
+
376
+ # Merge a list of duplicate elements
377
+ elem_list = clickable_list.copy()
378
+ for elem in focusable_list:
379
+ bbox = elem.bbox
380
+ center = (bbox[0][0] + bbox[1][0]) // 2, (bbox[0][1] + bbox[1][1]) // 2
381
+ if not any(
382
+ ((center[0] - ((e.bbox[0][0] + e.bbox[1][0]) // 2)) ** 2 +
383
+ (center[1] - ((e.bbox[0][1] + e.bbox[1][1]) // 2)) ** 2) ** 0.5 <= configs["MIN_DIST"]
384
+ for e in clickable_list
385
+ ):
386
+ elem_list.append(elem)
387
+
388
+ # Generate annotated images
389
+ labeled_path = os.path.join(tmp_files_dir, f"{name_prefix}_labeled.png")
390
+ labeled_img = draw_bbox_multi(screenshot_path, labeled_path, elem_list)
391
+
392
+ # Show Image Window
393
+ # cv2.imshow("image", labeled_img)
394
+ # cv2.waitKey(0)
395
+ # cv2.destroyAllWindows()
396
+
397
+ # Base64 encoding
398
+ base64_str = None
399
+ if return_base64:
400
+ # Convert color space BGR->RGB
401
+ rgb_image = cv2.cvtColor(labeled_img, cv2.COLOR_BGR2RGB)
402
+ # Compress to JPEG format (with adjustable quality parameters)
403
+ success, buffer = cv2.imencode(".jpg", rgb_image, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
404
+ if success:
405
+ base64_str = base64.b64encode(buffer).decode("utf-8")
406
+
407
+ self.current_elem_list = elem_list.copy()
408
+ logger.info(f"Current elem size{len(self.current_elem_list)}")
409
+ return xml_res, base64_str
410
+
411
+ def setup_connection(self) -> bool:
412
+ """Intelligent initialization device connection"""
413
+ # Prioritize physical equipment testing
414
+ if self.__connect_physical_device():
415
+ return True
416
+
417
+ # Try connecting to the simulator
418
+ if self.avd_name and self.start_emulator():
419
+ return True
420
+
421
+ raise ConnectionError("No available device found, please connect your phone or configure the simulator")
422
+
423
+ # ---------- Helper Methods ----------
424
+ def __connect_physical_device(self) -> bool:
425
+ """Connect an authorized USB device"""
426
+ devices = self.__get_authorized_devices()
427
+ if not devices:
428
+ return False
429
+
430
+ self.device = devices[0]
431
+ logger.info(f"Connected physical device: {self.device}")
432
+ self.device_serial = self.device
433
+ self.width, self.height = self.get_screen_size()
434
+ return True
435
+
436
+ def __get_authorized_devices(self) -> list:
437
+ """Get a list of authorized devices"""
438
+ success, output = self.execute_adb(["devices"])
439
+ if not success:
440
+ return []
441
+
442
+ return [
443
+ line.split("\t")[0]
444
+ for line in output.splitlines()
445
+ if "\tdevice" in line and "emulator" not in line
446
+ ]
447
+
448
+ def __tap_coordinate(self, x: int, y: int) -> bool:
449
+ """Click screen coordinates"""
450
+ success, _ = self.execute_adb(["shell", "input", "tap", str(x), str(y)])
451
+ return success
452
+
453
+ def __get_element_center(self, elem_idx: int) -> tuple:
454
+ """Calculate the coordinates of the center of the element"""
455
+ tl, br = self.current_elem_list[int(elem_idx) - 1].bbox
456
+ return (tl[0] + br[0]) // 2, (tl[1] + br[1]) // 2
457
+
458
+ def __swipe_coordinate(self, x1: int, y1: int, x2: int, y2: int, duration: int = 300) -> bool:
459
+ """Slide Operation"""
460
+ success, _ = self.execute_adb([
461
+ "shell", "input", "swipe",
462
+ str(x1), str(y1), str(x2), str(y2),
463
+ str(duration)
464
+ ])
465
+ return success
466
+
467
+ def _wait_for_device(self, timeout: int = 300) -> bool:
468
+ """Three-level waiting detection strategy"""
469
+ start_time = time.time()
470
+ stages = {
471
+ "adb_connected": False,
472
+ "boot_completed": False,
473
+ "services_ready": False
474
+ }
475
+
476
+ while time.time() - start_time < timeout:
477
+ # Step 1: Detect adb connection
478
+ if not stages["adb_connected"]:
479
+ _, devices = self.execute_adb(["devices"])
480
+ if self.device_serial in devices:
481
+ stages["adb_connected"] = True
482
+
483
+ # Step 2: Detection system boot completed
484
+ if stages["adb_connected"] and not stages["boot_completed"]:
485
+ _, output = self.execute_adb([
486
+ "shell", "getprop", "sys.boot_completed"
487
+ ])
488
+ if output.strip() == "1":
489
+ stages["boot_completed"] = True
490
+
491
+ # Step 3: Detecting Graphics Service Readiness
492
+ if stages["boot_completed"] and not stages["services_ready"]:
493
+ _, output = self.execute_adb([
494
+ "shell", "service check SurfaceFlinger"
495
+ ])
496
+ if "found" in output.lower():
497
+ return True
498
+
499
+ return False
500
+
501
+ def _pull_file(self, remote: str, local: str) -> bool:
502
+ """Pull device files to local"""
503
+ create_directory_for_file(local)
504
+ success, _ = self.execute_adb(["pull", remote, local])
505
+ if success:
506
+ self.execute_adb(["shell", "rm", remote]) # 清理临时文件
507
+ return success
508
+
509
+ def get_screen_size(self) -> Optional[Tuple[int, int]]:
510
+ """Get screen resolution"""
511
+ success, output = self.execute_adb(["shell", "wm", "size"])
512
+ if not success:
513
+ return None
514
+
515
+ match = re.search(r"(\d+)x(\d+)", output)
516
+ if match:
517
+ return int(match.group(1)), int(match.group(2))
518
+ return None
519
+
520
+
521
+ if __name__ == "__main__":
522
+ # Examples
523
+ controller = ADBController(avd_name="Medium_Phone_API_35")
524
+
525
+ # controller.stop_emulator()
526
+ if controller.setup_connection():
527
+ logger.info("Simulator started successfully")
528
+ width, height = controller.get_screen_size()
529
+ logger.info(f"Get the screen size{width},{height}")
530
+
531
+ # Take screenshots and annotate them
532
+ controller.screenshot_and_annotate()
533
+ controller.swipe(6, "up")
534
+
535
+ # controller.screenshot_and_annotate()
536
+ # controller.tap(6)
537
+ xml_txt, base64_txt = controller.screenshot_and_annotate()
538
+ logger.info(xml_txt)
539
+
540
+ # controller.stop_emulator()
541
+ logger.info("Close the simulator")
examples/tools/android/action/executor.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # Copyright (c) 2025 inclusionAI.
3
+
4
+ from typing import List
5
+ from aworld.core.tool.action_factory import ActionFactory
6
+ from aworld.core.common import ActionModel, ActionResult
7
+ from aworld.logs.util import logger
8
+ from examples.tools.android.action.adb_controller import ADBController
9
+ from aworld.core.tool.base import ToolActionExecutor
10
+
11
+
12
+ class AndroidToolActionExecutor(ToolActionExecutor):
13
+
14
+ def __init__(self, controller: ADBController):
15
+ self.controller = controller
16
+
17
+ def execute_action(self, actions: List[ActionModel], **kwargs) -> list[ActionResult]:
18
+ """Execute the specified android action sequence by agent policy.
19
+
20
+ Args:
21
+ actions: Tool action sequence.
22
+
23
+ Returns:
24
+ Browser action result list.
25
+ """
26
+ action_results = []
27
+ for action in actions:
28
+ action_result = self._exec(action, **kwargs)
29
+ action_results.append(action_result)
30
+ return action_results
31
+
32
+ def _exec(self, action_model: ActionModel, **kwargs):
33
+ action_name = action_model.action_name
34
+ if action_name not in ActionFactory:
35
+ action_name = action_model.tool_name + action_model.action_name
36
+ if action_name not in ActionFactory:
37
+ raise ValueError(f'Action {action_name} not found')
38
+
39
+ action = ActionFactory(action_name)
40
+ action_result = action.act(action_model, controller=self.controller, **kwargs)
41
+ logger.info(f"{action_name} execute finished")
42
+ return action_result