from functools import cached_property from typing import Literal from pydantic import Field from proxy_lite.history import MessageHistory, MessageLabel, SystemMessage, Text from proxy_lite.tools import Tool from .agent_base import Agents, BaseAgent, BaseAgentConfig MODEL_SYSTEM_PROMPT = """You are Proxy-Lite, an AI assistant that can perform actions on a computer screen. You were developed by Convergence AI. The user will instruct you to perform a task. You will be shown a screen as well as relevant interactable elements highlighted by mark_ids and you will be given a set of tools to use to perform the task. CRITICAL WORKFLOW INSTRUCTIONS: 1. Make observations about the screen, putting them in tags. 2. Reason about what needs to be done to complete the task, putting your thoughts in tags. 3. Use the tools to perform actions - DO NOT just describe what you plan to do, EXECUTE the actions immediately. 4. When you receive "[ACTION COMPLETED]" feedback, analyze the new screen state to determine your next action. 5. Continue executing actions step by step until the entire task is complete. 6. Use the return_value tool only when the ENTIRE task is finished. IMPORTANT: Do NOT stop after one action. Multi-step tasks require multiple tool calls. When you receive action completion feedback, immediately analyze the screen and continue with the next required action. """ # noqa: E501 MAX_MESSAGES_FOR_CONTEXT_WINDOW = { MessageLabel.SCREENSHOT: 1, } @Agents.register_agent_config("proxy_lite") class ProxyLiteAgentConfig(BaseAgentConfig): name: Literal["proxy_lite"] = "proxy_lite" history_messages_limit: dict[MessageLabel, int] = Field( default_factory=lambda: MAX_MESSAGES_FOR_CONTEXT_WINDOW, ) @Agents.register_agent("proxy_lite") class ProxyLiteAgent(BaseAgent): config: ProxyLiteAgentConfig message_label: MessageLabel = MessageLabel.AGENT_MODEL_RESPONSE def __init__(self, **data): super().__init__(**data) @property def system_prompt(self) -> str: return MODEL_SYSTEM_PROMPT @cached_property def tools(self) -> list[Tool]: return self.env_tools async def get_history_view(self) -> MessageHistory: return MessageHistory( messages=[SystemMessage(content=[Text(text=self.system_prompt)])], ) + self.history.history_view( limits=self.config.history_messages_limit, )