Trisha Tomy
fixes + cleanup flow
c73909d
import base64
from functools import cached_property
from typing import Any, Literal, Optional, Self, List # Added List import
from proxy_lite.browser.browser import BrowserSession
from proxy_lite.environments.environment_base import (
Action,
BaseEnvironment,
BaseEnvironmentConfig,
Environments,
Observation,
State,
)
from proxy_lite.tools import BrowserTool, Tool, ToolExecutionResponse
from proxy_lite.logger import logger
@Environments.register_environment_config("webbrowser")
class WebBrowserEnvironmentConfig(BaseEnvironmentConfig):
name: Literal["webbrowser"] = "webbrowser"
homepage: str = "https://google.com"
annotate_image: bool = True
screenshot_delay: float = 1.0 # seconds
include_html: bool = True
include_poi_text: bool = True
record_pois: bool = True
viewport_width: int = 1280
viewport_height: int = 720
browserbase_timeout: int = 7200
headless: bool = True
keep_original_image: bool = False
no_pois_in_image: bool = False
# --- MODIFICATION START ---
# Added for automatic login functionality
perform_login: bool = False
salesforce_login_url: Optional[str] = None
salesforce_username: Optional[str] = None
salesforce_password: Optional[str] = None
target_url: Optional[str] = None
# --- MODIFICATION END ---
@Environments.register_environment("webbrowser")
class WebBrowserEnvironment(BaseEnvironment):
config: WebBrowserEnvironmentConfig
browser: Optional[BrowserSession] = None
cancelled_last_action: bool = False
class Config:
arbitrary_types_allowed = True
async def __aenter__(self) -> Self:
# Initialize the BrowserSession
# Type cast to access WebBrowserEnvironmentConfig attributes
config = self.config # type: WebBrowserEnvironmentConfig
self.browser = self.browser_session(
viewport_width=config.viewport_width, # type: ignore
viewport_height=config.viewport_height, # type: ignore
headless=config.headless, # type: ignore
)
await self.browser.__aenter__()
# Initialize other resources if necessary
if self.logger:
self.logger.info("🌐 [bold blue]Browser session started.[/]")
return self
async def __aexit__(self, exc_type, exc_value, traceback):
# Clean up the BrowserSession
if self.browser:
await self.browser.__aexit__(exc_type, exc_value, traceback)
@property
def info_for_user(self) -> str:
return "This is a web browser environment. You can navigate the web, search the web, and perform actions on the web." # noqa: E501
@cached_property
def tools(self) -> list[Tool]:
if self.browser is None:
raise RuntimeError("Browser session not initialized")
return [BrowserTool(session=self.browser)] # type: ignore
@cached_property
def browser_session(self) -> type[BrowserSession]:
return BrowserSession
@property
def cookies(self) -> list[dict]:
return []
async def initialise(self) -> Observation:
if self.browser is None:
raise RuntimeError("Browser session not initialized")
config = self.config # type: WebBrowserEnvironmentConfig
if self.logger:
self.logger.debug(f"DEBUG: Initialising WebBrowserEnvironment. Homepage: {config.homepage}") # type: ignore
# Check if automatic login is required
if config.perform_login and config.salesforce_login_url and config.salesforce_username and config.salesforce_password: # type: ignore
if self.logger:
self.logger.info(f"πŸ”‘ Performing automatic Salesforce login to {config.salesforce_login_url}") # type: ignore
try:
# Navigate to login page
await self.browser.goto(config.salesforce_login_url) # type: ignore
if self.logger:
self.logger.debug(f"πŸ”‘ Navigated to login page: {self.browser.current_url}")
# Wait for login elements to be available
if self.browser.current_page:
# Use more robust selectors that match actual Salesforce login page structure
# Try primary selectors first, with fallbacks
try:
await self.browser.current_page.wait_for_selector('#username', timeout=10000)
username_selector = '#username'
except:
# Fallback selectors for username
await self.browser.current_page.wait_for_selector('input[name="username"], input[type="email"]', timeout=10000)
username_selector = 'input[name="username"], input[type="email"]'
try:
await self.browser.current_page.wait_for_selector('#password', timeout=10000)
password_selector = '#password'
except:
# Fallback selectors for password
await self.browser.current_page.wait_for_selector('input[name="password"], input[type="password"]', timeout=10000)
password_selector = 'input[name="password"], input[type="password"]'
# Fill in credentials
await self.browser.current_page.fill(username_selector, config.salesforce_username) # type: ignore
await self.browser.current_page.fill(password_selector, config.salesforce_password) # type: ignore
if self.logger:
self.logger.debug("πŸ”‘ Credentials filled, submitting login form")
# Submit login form - use more robust selector for login button
try:
await self.browser.current_page.click('#Login')
except:
# Fallback selectors for login button
await self.browser.current_page.click('input[type="submit"], button[type="submit"], .btn-primary')
# Wait for login to complete (check for successful redirect)
await self.browser.current_page.wait_for_load_state('networkidle', timeout=30000)
if self.logger:
self.logger.info(f"πŸ”‘ Login completed successfully. Current URL: {self.browser.current_url}")
self.logger.info("πŸ”‘ Login process complete - agent will handle navigation using open_new_tab_and_go_to")
except Exception as e:
if self.logger:
self.logger.error(f"ERROR: Automatic login failed: {e}")
raise # Re-raise to propagate the error
else:
# No automatic login, navigate to homepage normally
try:
await self.browser.goto(config.homepage) # type: ignore
if self.logger:
self.logger.debug(f"DEBUG: Browser navigated to homepage. Current URL: {self.browser.current_url}")
except Exception as e:
if self.logger:
self.logger.error(f"ERROR: Failed to navigate to homepage {config.homepage}: {e}") # type: ignore
raise # Re-raise to propagate the error
original_img, annotated_img = await self.browser.screenshot(
delay=config.screenshot_delay, # type: ignore
)
if config.no_pois_in_image: # type: ignore
base64_image = base64.b64encode(original_img).decode("utf-8")
else:
base64_image = base64.b64encode(annotated_img).decode("utf-8")
html_content = await self.browser.current_page.content() if config.include_html else None # type: ignore
info: dict[str, Any] = {"url": self.browser.current_url}
if config.record_pois: # type: ignore
info["pois"] = self.browser.pois
if config.keep_original_image: # type: ignore
info["original_image"] = base64.b64encode(original_img).decode("utf-8")
if self.logger:
self.logger.debug(f"DEBUG: Initial observation captured. URL: {self.browser.current_url}")
return Observation(
state=State(
text=f"URL: {self.browser.current_url}"
+ (f"\n{self.browser.poi_text}" if config.include_poi_text else ""), # type: ignore
image=base64_image,
html=html_content,
),
terminated=False,
reward=None,
info=info,
)
async def should_perform_action(self) -> bool:
# if cancelled last action, run the action without updating POIs
if self.cancelled_last_action:
self.cancelled_last_action = False
return True
# check for page changes
if self.browser is None:
return False
old_points = [tuple(point) for point in self.browser.poi_centroids]
await self.browser.update_poi()
new_points = [tuple(point) for point in self.browser.poi_centroids]
page_changed_mid_action = old_points != new_points
# record if the last action was cancelled
if page_changed_mid_action:
self.cancelled_last_action = True
return False
return True
async def execute_action(self, action: Action) -> Observation:
if self.browser is None:
raise RuntimeError("Browser session not initialized")
config = self.config # type: WebBrowserEnvironmentConfig
responses = []
cancelled_tools_flag = False
if await self.should_perform_action():
tool_calls = action.tool_calls or []
for tool_call in tool_calls:
# Perform the chosen action
try:
tool_response = await self.execute_tool(tool_call)
if tool_response is None:
tool_response = ToolExecutionResponse(content="Tool execution returned None", id=tool_call.id)
else:
tool_response.id = tool_call.id
responses.append(tool_response)
except Exception as e: # noqa: PERF203
if self.logger:
self.logger.warning("🌐 An error occurred taking action: %s", str(e), exc_info=False)
tool_response = ToolExecutionResponse(content=str(e), id=tool_call.id)
responses.append(tool_response)
else:
if self.logger:
self.logger.warning("🌐 Page changed since last observation, cancelling action.")
self.cancelled_last_action = True
tool_calls = action.tool_calls or []
for tool_call in tool_calls:
tool_response = ToolExecutionResponse(
content="The page changed before the action could be executed, instead of being ran it was cancelled.", # noqa: E501
id=tool_call.id,
)
responses.append(tool_response)
cancelled_tools_flag = True
original_img, annotated_img = await self.browser.screenshot(
delay=config.screenshot_delay, # type: ignore
)
base64_image = base64.b64encode(annotated_img).decode("utf-8")
info: dict[str, Any] = {"url": self.browser.current_url, "cancelled_tools": cancelled_tools_flag}
if config.record_pois: # type: ignore
info["pois"] = self.browser.pois
if config.keep_original_image: # type: ignore
info["original_image"] = base64.b64encode(original_img).decode("utf-8")
html_content = await self.browser.current_page.content() if config.include_html else None # type: ignore
return Observation(
state=State(
text=f"URL: {self.browser.current_url}"
+ (f"\n{self.browser.poi_text}" if config.include_poi_text else ""), # type: ignore
image=base64_image,
html=html_content,
tool_responses=responses,
),
terminated=False,
reward=None,
info=info,
)
async def observe(self) -> Observation:
if self.browser is None:
raise RuntimeError("Browser session not initialized")
# Note: observe method may not exist on BrowserSession - implement basic observation
# return await self.browser.observe() # type: ignore
raise NotImplementedError("Observe method not implemented")
async def evaluate(self, **kwargs: dict[str, Any]) -> dict[str, Any]:
return {}
async def get_info(self) -> dict[str, Any]:
info = {}
return info