Spaces:
Running
on
Zero
Running
on
Zero
| OS_ACTIONS = """ | |
| def final_answer(answer: any) -> any: | |
| \"\"\" | |
| Provides a final answer to the given problem. | |
| Args: | |
| answer: The final answer to the problem | |
| \"\"\" | |
| def move_mouse(self, x: float, y: float) -> str: | |
| \"\"\" | |
| Moves the mouse cursor to the specified coordinates | |
| Args: | |
| x: The x coordinate (horizontal position) | |
| y: The y coordinate (vertical position) | |
| \"\"\" | |
| def click(x: Optional[float] = None, y: Optional[float] = None) -> str: | |
| \"\"\" | |
| Performs a left-click at the specified normalized coordinates | |
| Args: | |
| x: The x coordinate (horizontal position) | |
| y: The y coordinate (vertical position) | |
| \"\"\" | |
| def double_click(x: Optional[float] = None, y: Optional[float] = None) -> str: | |
| \"\"\" | |
| Performs a double-click at the specified normalized coordinates | |
| Args: | |
| x: The x coordinate (horizontal position) | |
| y: The y coordinate (vertical position) | |
| \"\"\" | |
| def type(text: str) -> str: | |
| \"\"\" | |
| Types the specified text at the current cursor position. | |
| Args: | |
| text: The text to type | |
| \"\"\" | |
| def press(keys: str | list[str]) -> str: | |
| \"\"\" | |
| Presses a keyboard key | |
| Args: | |
| keys: The key or list of keys to press (e.g. "enter", "space", "backspace", "ctrl", etc.). | |
| \"\"\" | |
| def navigate_back() -> str: | |
| \"\"\" | |
| Goes back to the previous page in the browser. If using this tool doesn't work, just click the button directly. | |
| \"\"\" | |
| def drag(from_coord: list[float], to_coord: list[float]) -> str: | |
| \"\"\" | |
| Clicks [x1, y1], drags mouse to [x2, y2], then release click. | |
| Args: | |
| x1: origin x coordinate | |
| y1: origin y coordinate | |
| x2: end x coordinate | |
| y2: end y coordinate | |
| \"\"\" | |
| def scroll(direction: Literal["up", "down"] = "down", amount: int = 1) -> str: | |
| \"\"\" | |
| Moves the mouse to selected coordinates, then uses the scroll button: this could scroll the page or zoom, depending on the app. DO NOT use scroll to move through linux desktop menus. | |
| Args: | |
| x: The x coordinate (horizontal position) of the element to scroll/zoom, defaults to None to not focus on specific coordinates | |
| y: The y coordinate (vertical position) of the element to scroll/zoom, defaults to None to not focus on specific coordinates | |
| direction: The direction to scroll ("up" or "down"), defaults to "down". For zoom, "up" zooms in, "down" zooms out. | |
| amount: The amount to scroll. A good amount is 1 or 2. | |
| \"\"\" | |
| def wait(seconds: float) -> str: | |
| \"\"\" | |
| Waits for the specified number of seconds. Very useful in case the prior order is still executing (for example starting very heavy applications like browsers or office apps) | |
| Args: | |
| seconds: Number of seconds to wait, generally 2 is enough. | |
| \"\"\" | |
| """ | |
| MOBILE_ACTIONS = """ | |
| def navigate_back() -> str: | |
| \"\"\" | |
| Return to home page | |
| \"\"\" | |
| def open_app(app_name: str) -> str: | |
| \"\"\" | |
| Launches the specified application. | |
| Args: | |
| app_name: the name of the application to launch | |
| \"\"\" | |
| def swipe(from_coord: list[str], to_coord: list[str]) -> str: | |
| \"\"\" | |
| swipe from 'from_coord' to 'to_coord' | |
| Args: | |
| from_coord: origin coordinates | |
| to_coord: end coordinates | |
| \"\"\" | |
| def long_press(x: int, y: int) -> str: | |
| \"\"\" | |
| Performs a long-press at the specified coordinates | |
| Args: | |
| x: The x coordinate (horizontal position) | |
| y: The y coordinate (vertical position) | |
| \"\"\" | |
| """ | |
| OS_SYSTEM_PROMPT = f"""You are a helpful GUI agent. You’ll be given a task and a screenshot of the screen. Complete the task using Python function calls. | |
| For each step: | |
| • First, <think></think> to express the thought process guiding your next action and the reasoning behind it. | |
| • Then, use <code></code> to perform the action. it will be executed in a stateful environment. | |
| The following functions are exposed to the Python interpreter: | |
| <code> | |
| {OS_ACTIONS} | |
| </code> | |
| The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist. | |
| """ | |
| MOBILE_SYSTEM_PROMPT = f"""You are a helpful GUI agent. You’ll be given a task and a screenshot of the screen. Complete the task using Python function calls. | |
| For each step: | |
| • First, <think></think> to express the thought process guiding your next action and the reasoning behind it. | |
| • Then, use <code></code> to perform the action. it will be executed in a stateful environment. | |
| The following functions are exposed to the Python interpreter: | |
| <code> | |
| # OS ACTIONS | |
| {OS_ACTIONS} | |
| # MOBILE ACTIONS | |
| {MOBILE_ACTIONS} | |
| </code> | |
| The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist. | |
| """ |