import base64 from functools import cached_property from typing import Any, Literal, Optional, Self, List # Added List import from proxy_lite.browser.browser import BrowserSession from proxy_lite.environments.environment_base import ( Action, BaseEnvironment, BaseEnvironmentConfig, Environments, Observation, State, ) from proxy_lite.tools import BrowserTool, Tool, ToolExecutionResponse from proxy_lite.logger import logger @Environments.register_environment_config("webbrowser") class WebBrowserEnvironmentConfig(BaseEnvironmentConfig): name: Literal["webbrowser"] = "webbrowser" homepage: str = "https://google.com" annotate_image: bool = True screenshot_delay: float = 1.0 # seconds include_html: bool = True include_poi_text: bool = True record_pois: bool = True viewport_width: int = 1280 viewport_height: int = 720 browserbase_timeout: int = 7200 headless: bool = True keep_original_image: bool = False no_pois_in_image: bool = False # --- MODIFICATION START --- # Added for automatic login functionality perform_login: bool = False salesforce_login_url: Optional[str] = None salesforce_username: Optional[str] = None salesforce_password: Optional[str] = None target_url: Optional[str] = None # --- MODIFICATION END --- @Environments.register_environment("webbrowser") class WebBrowserEnvironment(BaseEnvironment): config: WebBrowserEnvironmentConfig browser: Optional[BrowserSession] = None cancelled_last_action: bool = False class Config: arbitrary_types_allowed = True async def __aenter__(self) -> Self: # Initialize the BrowserSession # Type cast to access WebBrowserEnvironmentConfig attributes config = self.config # type: WebBrowserEnvironmentConfig self.browser = self.browser_session( viewport_width=config.viewport_width, # type: ignore viewport_height=config.viewport_height, # type: ignore headless=config.headless, # type: ignore ) await self.browser.__aenter__() # Initialize other resources if necessary if self.logger: self.logger.info("🌐 [bold blue]Browser session started.[/]") return self async def __aexit__(self, exc_type, exc_value, traceback): # Clean up the BrowserSession if self.browser: await self.browser.__aexit__(exc_type, exc_value, traceback) @property def info_for_user(self) -> str: return "This is a web browser environment. You can navigate the web, search the web, and perform actions on the web." # noqa: E501 @cached_property def tools(self) -> list[Tool]: if self.browser is None: raise RuntimeError("Browser session not initialized") return [BrowserTool(session=self.browser)] # type: ignore @cached_property def browser_session(self) -> type[BrowserSession]: return BrowserSession @property def cookies(self) -> list[dict]: return [] async def initialise(self) -> Observation: if self.browser is None: raise RuntimeError("Browser session not initialized") config = self.config # type: WebBrowserEnvironmentConfig if self.logger: self.logger.debug(f"DEBUG: Initialising WebBrowserEnvironment. Homepage: {config.homepage}") # type: ignore # Check if automatic login is required if config.perform_login and config.salesforce_login_url and config.salesforce_username and config.salesforce_password: # type: ignore if self.logger: self.logger.info(f"🔑 Performing automatic Salesforce login to {config.salesforce_login_url}") # type: ignore try: # Navigate to login page await self.browser.goto(config.salesforce_login_url) # type: ignore if self.logger: self.logger.debug(f"🔑 Navigated to login page: {self.browser.current_url}") # Wait for login elements to be available if self.browser.current_page: # Use more robust selectors that match actual Salesforce login page structure # Try primary selectors first, with fallbacks try: await self.browser.current_page.wait_for_selector('#username', timeout=10000) username_selector = '#username' except: # Fallback selectors for username await self.browser.current_page.wait_for_selector('input[name="username"], input[type="email"]', timeout=10000) username_selector = 'input[name="username"], input[type="email"]' try: await self.browser.current_page.wait_for_selector('#password', timeout=10000) password_selector = '#password' except: # Fallback selectors for password await self.browser.current_page.wait_for_selector('input[name="password"], input[type="password"]', timeout=10000) password_selector = 'input[name="password"], input[type="password"]' # Fill in credentials await self.browser.current_page.fill(username_selector, config.salesforce_username) # type: ignore await self.browser.current_page.fill(password_selector, config.salesforce_password) # type: ignore if self.logger: self.logger.debug("🔑 Credentials filled, submitting login form") # Submit login form - use more robust selector for login button try: await self.browser.current_page.click('#Login') except: # Fallback selectors for login button await self.browser.current_page.click('input[type="submit"], button[type="submit"], .btn-primary') # Wait for login to complete (check for successful redirect) await self.browser.current_page.wait_for_load_state('networkidle', timeout=30000) if self.logger: self.logger.info(f"🔑 Login completed successfully. Current URL: {self.browser.current_url}") self.logger.info("🔑 Login process complete - agent will handle navigation using open_new_tab_and_go_to") except Exception as e: if self.logger: self.logger.error(f"ERROR: Automatic login failed: {e}") raise # Re-raise to propagate the error else: # No automatic login, navigate to homepage normally try: await self.browser.goto(config.homepage) # type: ignore if self.logger: self.logger.debug(f"DEBUG: Browser navigated to homepage. Current URL: {self.browser.current_url}") except Exception as e: if self.logger: self.logger.error(f"ERROR: Failed to navigate to homepage {config.homepage}: {e}") # type: ignore raise # Re-raise to propagate the error original_img, annotated_img = await self.browser.screenshot( delay=config.screenshot_delay, # type: ignore ) if config.no_pois_in_image: # type: ignore base64_image = base64.b64encode(original_img).decode("utf-8") else: base64_image = base64.b64encode(annotated_img).decode("utf-8") html_content = await self.browser.current_page.content() if config.include_html else None # type: ignore info: dict[str, Any] = {"url": self.browser.current_url} if config.record_pois: # type: ignore info["pois"] = self.browser.pois if config.keep_original_image: # type: ignore info["original_image"] = base64.b64encode(original_img).decode("utf-8") if self.logger: self.logger.debug(f"DEBUG: Initial observation captured. URL: {self.browser.current_url}") return Observation( state=State( text=f"URL: {self.browser.current_url}" + (f"\n{self.browser.poi_text}" if config.include_poi_text else ""), # type: ignore image=base64_image, html=html_content, ), terminated=False, reward=None, info=info, ) async def should_perform_action(self) -> bool: # if cancelled last action, run the action without updating POIs if self.cancelled_last_action: self.cancelled_last_action = False return True # check for page changes if self.browser is None: return False old_points = [tuple(point) for point in self.browser.poi_centroids] await self.browser.update_poi() new_points = [tuple(point) for point in self.browser.poi_centroids] page_changed_mid_action = old_points != new_points # record if the last action was cancelled if page_changed_mid_action: self.cancelled_last_action = True return False return True async def execute_action(self, action: Action) -> Observation: if self.browser is None: raise RuntimeError("Browser session not initialized") config = self.config # type: WebBrowserEnvironmentConfig responses = [] cancelled_tools_flag = False if await self.should_perform_action(): tool_calls = action.tool_calls or [] for tool_call in tool_calls: # Perform the chosen action try: tool_response = await self.execute_tool(tool_call) if tool_response is None: tool_response = ToolExecutionResponse(content="Tool execution returned None", id=tool_call.id) else: tool_response.id = tool_call.id responses.append(tool_response) except Exception as e: # noqa: PERF203 if self.logger: self.logger.warning("🌐 An error occurred taking action: %s", str(e), exc_info=False) tool_response = ToolExecutionResponse(content=str(e), id=tool_call.id) responses.append(tool_response) else: if self.logger: self.logger.warning("🌐 Page changed since last observation, cancelling action.") self.cancelled_last_action = True tool_calls = action.tool_calls or [] for tool_call in tool_calls: tool_response = ToolExecutionResponse( content="The page changed before the action could be executed, instead of being ran it was cancelled.", # noqa: E501 id=tool_call.id, ) responses.append(tool_response) cancelled_tools_flag = True original_img, annotated_img = await self.browser.screenshot( delay=config.screenshot_delay, # type: ignore ) base64_image = base64.b64encode(annotated_img).decode("utf-8") info: dict[str, Any] = {"url": self.browser.current_url, "cancelled_tools": cancelled_tools_flag} if config.record_pois: # type: ignore info["pois"] = self.browser.pois if config.keep_original_image: # type: ignore info["original_image"] = base64.b64encode(original_img).decode("utf-8") html_content = await self.browser.current_page.content() if config.include_html else None # type: ignore return Observation( state=State( text=f"URL: {self.browser.current_url}" + (f"\n{self.browser.poi_text}" if config.include_poi_text else ""), # type: ignore image=base64_image, html=html_content, tool_responses=responses, ), terminated=False, reward=None, info=info, ) async def observe(self) -> Observation: if self.browser is None: raise RuntimeError("Browser session not initialized") # Note: observe method may not exist on BrowserSession - implement basic observation # return await self.browser.observe() # type: ignore raise NotImplementedError("Observe method not implemented") async def evaluate(self, **kwargs: dict[str, Any]) -> dict[str, Any]: return {} async def get_info(self) -> dict[str, Any]: info = {} return info