File size: 13,107 Bytes
c9803a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60c7a7f
 
 
 
 
 
c9803a3
 
 
 
 
 
 
 
 
 
 
 
 
 
60c7a7f
 
c9803a3
60c7a7f
 
 
c9803a3
 
 
60c7a7f
 
c9803a3
 
 
 
60c7a7f
 
c9803a3
 
 
 
 
 
 
60c7a7f
 
 
c9803a3
 
 
 
 
 
 
c73909d
c9803a3
 
60c7a7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c73909d
60c7a7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9803a3
 
60c7a7f
c9803a3
60c7a7f
c9803a3
 
 
 
60c7a7f
c9803a3
60c7a7f
 
c9803a3
60c7a7f
c9803a3
 
60c7a7f
 
c9803a3
 
 
60c7a7f
c9803a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60c7a7f
 
 
c9803a3
 
 
 
 
 
 
 
 
 
 
 
60c7a7f
 
 
 
c9803a3
 
60c7a7f
c9803a3
60c7a7f
 
c9803a3
 
60c7a7f
 
 
 
 
c9803a3
 
60c7a7f
 
c9803a3
 
 
60c7a7f
 
c9803a3
60c7a7f
 
c9803a3
 
 
 
 
 
60c7a7f
c9803a3
60c7a7f
c9803a3
 
 
 
60c7a7f
 
c9803a3
60c7a7f
c9803a3
 
60c7a7f
c9803a3
 
 
60c7a7f
c9803a3
 
 
 
 
 
 
 
 
 
60c7a7f
 
 
 
 
c9803a3
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
import base64
from functools import cached_property
from typing import Any, Literal, Optional, Self, List # Added List import

from proxy_lite.browser.browser import BrowserSession
from proxy_lite.environments.environment_base import (
    Action,
    BaseEnvironment,
    BaseEnvironmentConfig,
    Environments,
    Observation,
    State,
)
from proxy_lite.tools import BrowserTool, Tool, ToolExecutionResponse
from proxy_lite.logger import logger

@Environments.register_environment_config("webbrowser")
class WebBrowserEnvironmentConfig(BaseEnvironmentConfig):
    name: Literal["webbrowser"] = "webbrowser"
    homepage: str = "https://google.com"
    annotate_image: bool = True
    screenshot_delay: float = 1.0  # seconds
    include_html: bool = True
    include_poi_text: bool = True
    record_pois: bool = True
    viewport_width: int = 1280
    viewport_height: int = 720
    browserbase_timeout: int = 7200
    headless: bool = True
    keep_original_image: bool = False
    no_pois_in_image: bool = False
    # --- MODIFICATION START ---
    # Added for automatic login functionality
    perform_login: bool = False
    salesforce_login_url: Optional[str] = None
    salesforce_username: Optional[str] = None
    salesforce_password: Optional[str] = None
    target_url: Optional[str] = None
    # --- MODIFICATION END ---


@Environments.register_environment("webbrowser")
class WebBrowserEnvironment(BaseEnvironment):
    config: WebBrowserEnvironmentConfig
    browser: Optional[BrowserSession] = None
    cancelled_last_action: bool = False

    class Config:
        arbitrary_types_allowed = True

    async def __aenter__(self) -> Self:
        # Initialize the BrowserSession
        # Type cast to access WebBrowserEnvironmentConfig attributes
        config = self.config  # type: WebBrowserEnvironmentConfig
        self.browser = self.browser_session(
            viewport_width=config.viewport_width,  # type: ignore
            viewport_height=config.viewport_height,  # type: ignore
            headless=config.headless,  # type: ignore
        )
        await self.browser.__aenter__()
        # Initialize other resources if necessary
        if self.logger:
            self.logger.info("🌐 [bold blue]Browser session started.[/]")
        return self

    async def __aexit__(self, exc_type, exc_value, traceback):
        # Clean up the BrowserSession
        if self.browser:
            await self.browser.__aexit__(exc_type, exc_value, traceback)

    @property
    def info_for_user(self) -> str:
        return "This is a web browser environment. You can navigate the web, search the web, and perform actions on the web."  # noqa: E501

    @cached_property
    def tools(self) -> list[Tool]:
        if self.browser is None:
            raise RuntimeError("Browser session not initialized")
        return [BrowserTool(session=self.browser)]  # type: ignore

    @cached_property
    def browser_session(self) -> type[BrowserSession]:
        return BrowserSession

    @property
    def cookies(self) -> list[dict]:
        return []
    
    async def initialise(self) -> Observation:
        if self.browser is None:
            raise RuntimeError("Browser session not initialized")
        
        config = self.config  # type: WebBrowserEnvironmentConfig
        
        if self.logger:
            self.logger.debug(f"DEBUG: Initialising WebBrowserEnvironment. Homepage: {config.homepage}")  # type: ignore
        
        # Check if automatic login is required
        if config.perform_login and config.salesforce_login_url and config.salesforce_username and config.salesforce_password:  # type: ignore
            if self.logger:
                self.logger.info(f"πŸ”‘ Performing automatic Salesforce login to {config.salesforce_login_url}")  # type: ignore
            
            try:
                # Navigate to login page
                await self.browser.goto(config.salesforce_login_url)  # type: ignore
                if self.logger:
                    self.logger.debug(f"πŸ”‘ Navigated to login page: {self.browser.current_url}")
                
                # Wait for login elements to be available
                if self.browser.current_page:
                    # Use more robust selectors that match actual Salesforce login page structure
                    # Try primary selectors first, with fallbacks
                    try:
                        await self.browser.current_page.wait_for_selector('#username', timeout=10000)
                        username_selector = '#username'
                    except:
                        # Fallback selectors for username
                        await self.browser.current_page.wait_for_selector('input[name="username"], input[type="email"]', timeout=10000)
                        username_selector = 'input[name="username"], input[type="email"]'
                    
                    try:
                        await self.browser.current_page.wait_for_selector('#password', timeout=10000)
                        password_selector = '#password'
                    except:
                        # Fallback selectors for password
                        await self.browser.current_page.wait_for_selector('input[name="password"], input[type="password"]', timeout=10000)
                        password_selector = 'input[name="password"], input[type="password"]'
                    
                    # Fill in credentials
                    await self.browser.current_page.fill(username_selector, config.salesforce_username)  # type: ignore
                    await self.browser.current_page.fill(password_selector, config.salesforce_password)  # type: ignore
                    
                    if self.logger:
                        self.logger.debug("πŸ”‘ Credentials filled, submitting login form")
                    
                    # Submit login form - use more robust selector for login button
                    try:
                        await self.browser.current_page.click('#Login')
                    except:
                        # Fallback selectors for login button
                        await self.browser.current_page.click('input[type="submit"], button[type="submit"], .btn-primary')
                    
                    # Wait for login to complete (check for successful redirect)
                    await self.browser.current_page.wait_for_load_state('networkidle', timeout=30000)
                
                if self.logger:
                    self.logger.info(f"πŸ”‘ Login completed successfully. Current URL: {self.browser.current_url}")
                    self.logger.info("πŸ”‘ Login process complete - agent will handle navigation using open_new_tab_and_go_to")
                
            except Exception as e:
                if self.logger:
                    self.logger.error(f"ERROR: Automatic login failed: {e}")
                raise # Re-raise to propagate the error
        
        else:
            # No automatic login, navigate to homepage normally
            try:
                await self.browser.goto(config.homepage)  # type: ignore
                if self.logger:
                    self.logger.debug(f"DEBUG: Browser navigated to homepage. Current URL: {self.browser.current_url}")
            except Exception as e:
                if self.logger:
                    self.logger.error(f"ERROR: Failed to navigate to homepage {config.homepage}: {e}")  # type: ignore
                raise # Re-raise to propagate the error

        original_img, annotated_img = await self.browser.screenshot(
            delay=config.screenshot_delay,  # type: ignore
        )
        if config.no_pois_in_image:  # type: ignore
            base64_image = base64.b64encode(original_img).decode("utf-8")
        else:
            base64_image = base64.b64encode(annotated_img).decode("utf-8")

        html_content = await self.browser.current_page.content() if config.include_html else None  # type: ignore

        info: dict[str, Any] = {"url": self.browser.current_url}
        if config.record_pois:  # type: ignore
            info["pois"] = self.browser.pois
        if config.keep_original_image:  # type: ignore
            info["original_image"] = base64.b64encode(original_img).decode("utf-8")

        if self.logger:
            self.logger.debug(f"DEBUG: Initial observation captured. URL: {self.browser.current_url}")
        return Observation(
            state=State(
                text=f"URL: {self.browser.current_url}"
                + (f"\n{self.browser.poi_text}" if config.include_poi_text else ""),  # type: ignore
                image=base64_image,
                html=html_content,
            ),
            terminated=False,
            reward=None,
            info=info,
        )

    async def should_perform_action(self) -> bool:
        # if cancelled last action, run the action without updating POIs
        if self.cancelled_last_action:
            self.cancelled_last_action = False
            return True

        # check for page changes
        if self.browser is None:
            return False
            
        old_points = [tuple(point) for point in self.browser.poi_centroids]
        await self.browser.update_poi()
        new_points = [tuple(point) for point in self.browser.poi_centroids]
        page_changed_mid_action = old_points != new_points

        # record if the last action was cancelled
        if page_changed_mid_action:
            self.cancelled_last_action = True
            return False
        return True

    async def execute_action(self, action: Action) -> Observation:
        if self.browser is None:
            raise RuntimeError("Browser session not initialized")
            
        config = self.config  # type: WebBrowserEnvironmentConfig
        responses = []
        cancelled_tools_flag = False
        
        if await self.should_perform_action():
            tool_calls = action.tool_calls or []
            for tool_call in tool_calls:
                # Perform the chosen action
                try:
                    tool_response = await self.execute_tool(tool_call)
                    if tool_response is None:
                        tool_response = ToolExecutionResponse(content="Tool execution returned None", id=tool_call.id)
                    else:
                        tool_response.id = tool_call.id
                    responses.append(tool_response)
                except Exception as e:  # noqa: PERF203
                    if self.logger:
                        self.logger.warning("🌐 An error occurred taking action: %s", str(e), exc_info=False)
                    tool_response = ToolExecutionResponse(content=str(e), id=tool_call.id)
                    responses.append(tool_response)
        else:
            if self.logger:
                self.logger.warning("🌐 Page changed since last observation, cancelling action.")
            self.cancelled_last_action = True
            tool_calls = action.tool_calls or []
            for tool_call in tool_calls:
                tool_response = ToolExecutionResponse(
                    content="The page changed before the action could be executed, instead of being ran it was cancelled.",  # noqa: E501
                    id=tool_call.id,
                )
                responses.append(tool_response)
                cancelled_tools_flag = True
                
        original_img, annotated_img = await self.browser.screenshot(
            delay=config.screenshot_delay,  # type: ignore
        )

        base64_image = base64.b64encode(annotated_img).decode("utf-8")

        info: dict[str, Any] = {"url": self.browser.current_url, "cancelled_tools": cancelled_tools_flag}
        if config.record_pois:  # type: ignore
            info["pois"] = self.browser.pois
        if config.keep_original_image:  # type: ignore
            info["original_image"] = base64.b64encode(original_img).decode("utf-8")

        html_content = await self.browser.current_page.content() if config.include_html else None  # type: ignore
        return Observation(
            state=State(
                text=f"URL: {self.browser.current_url}"
                + (f"\n{self.browser.poi_text}" if config.include_poi_text else ""),  # type: ignore
                image=base64_image,
                html=html_content,
                tool_responses=responses,
            ),
            terminated=False,
            reward=None,
            info=info,
        )

    async def observe(self) -> Observation:
        if self.browser is None:
            raise RuntimeError("Browser session not initialized")
        # Note: observe method may not exist on BrowserSession - implement basic observation
        # return await self.browser.observe()  # type: ignore
        raise NotImplementedError("Observe method not implemented")

    async def evaluate(self, **kwargs: dict[str, Any]) -> dict[str, Any]:
        return {}

    async def get_info(self) -> dict[str, Any]:
        info = {}
        return info