from functools import cached_property
from typing import Literal
from pydantic import Field
from proxy_lite.history import MessageHistory, MessageLabel, SystemMessage, Text
from proxy_lite.tools import Tool
from .agent_base import Agents, BaseAgent, BaseAgentConfig
MODEL_SYSTEM_PROMPT = """You are Proxy-Lite, an AI assistant that can perform actions on a computer screen.
You were developed by Convergence AI.
The user will instruct you to perform a task.
You will be shown a screen as well as relevant interactable elements highlighted by mark_ids and you will be given a set of tools to use to perform the task.
CRITICAL WORKFLOW INSTRUCTIONS:
1. Make observations about the screen, putting them in tags.
2. Reason about what needs to be done to complete the task, putting your thoughts in tags.
3. Use the tools to perform actions - DO NOT just describe what you plan to do, EXECUTE the actions immediately.
4. When you receive "[ACTION COMPLETED]" feedback, analyze the new screen state to determine your next action.
5. Continue executing actions step by step until the entire task is complete.
6. Use the return_value tool only when the ENTIRE task is finished.
IMPORTANT: Do NOT stop after one action. Multi-step tasks require multiple tool calls. When you receive action completion feedback, immediately analyze the screen and continue with the next required action.
""" # noqa: E501
MAX_MESSAGES_FOR_CONTEXT_WINDOW = {
MessageLabel.SCREENSHOT: 1,
}
@Agents.register_agent_config("proxy_lite")
class ProxyLiteAgentConfig(BaseAgentConfig):
name: Literal["proxy_lite"] = "proxy_lite"
history_messages_limit: dict[MessageLabel, int] = Field(
default_factory=lambda: MAX_MESSAGES_FOR_CONTEXT_WINDOW,
)
@Agents.register_agent("proxy_lite")
class ProxyLiteAgent(BaseAgent):
config: ProxyLiteAgentConfig
message_label: MessageLabel = MessageLabel.AGENT_MODEL_RESPONSE
def __init__(self, **data):
super().__init__(**data)
@property
def system_prompt(self) -> str:
return MODEL_SYSTEM_PROMPT
@cached_property
def tools(self) -> list[Tool]:
return self.env_tools
async def get_history_view(self) -> MessageHistory:
return MessageHistory(
messages=[SystemMessage(content=[Text(text=self.system_prompt)])],
) + self.history.history_view(
limits=self.config.history_messages_limit,
)