|
import json |
|
|
|
CONTROLLER_HEART_BEAT_EXPIRATION = 30 |
|
WORKER_HEART_BEAT_INTERVAL = 15 |
|
|
|
LOGDIR = "." |
|
|
|
|
|
IGNORE_INDEX = -100 |
|
DEFAULT_IMAGE_TOKEN = "<image>" |
|
DEFAULT_POINTER_START_TOKEN = "<|pointer_start|>" |
|
DEFAULT_POINTER_END_TOKEN = "<|pointer_end|>" |
|
DEFAULT_POINTER_PAD_TOKEN = "<|pointer_pad|>" |
|
|
|
|
|
|
|
|
|
grounding_system_message = "You are a GUI agent. Given a screenshot of the current GUI and a human instruction, your task is to locate the screen element that corresponds to the instruction. You should output a PyAutoGUI action that performs a click on the correct position. To indicate the click location, we will use some special tokens, which is used to refer to a visual patch later. For example, you can output: pyautogui.click(<your_special_token_here>)." |
|
|
|
|
|
chat_template = "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}" |
|
|
|
assistant_template = "{% for message in messages %}{{'<|im_start|>' + message['role']}}{% if 'recipient' in message %}<|recipient|>{{ message['recipient'] }}{% endif %}{{'\n' + message['content'][0]['text']}}{% if 'end_turn' in message and message['end_turn'] %}{{'<|diff_marker|>\n'}}{% else %}{{'<|im_end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|recipient|>' }}{% endif %}" |
|
|
|
|
|
ADDITIONAL_SPECIAL_TOKENS = [ |
|
"<|recipient|>", |
|
"<|diff_marker|>", |
|
DEFAULT_POINTER_START_TOKEN, |
|
DEFAULT_POINTER_END_TOKEN, |
|
DEFAULT_POINTER_PAD_TOKEN, |
|
] |
|
|
|
|
|
ACTION_PATTENS_XY = [ |
|
r"x=([0-9.]+), y=([0-9.]+)", |
|
r"from_coord=\[([0-9.]+), ([0-9.]+)\], to_coord=\[([0-9.]+), ([0-9.]+)\]", |
|
] |
|
|
|
until = ["<|diff_marker|>"] |
|
|