Spaces:
Running
Running
Commit
·
405ca38
1
Parent(s):
a4ca470
Add Git LFS support and migrate binary files
Browse files- .dockerignore +5 -0
- .env.example +68 -0
- .github/workflows/build.yml +124 -0
- .gitignore +192 -0
- .vscode/settings.json +11 -0
- Dockerfile +99 -0
- LICENSE +21 -0
- README_DEPLOYMENT.md +70 -0
- SECURITY.md +19 -0
- app.py +17 -0
- assets/examples/test.png +3 -0
- assets/web-ui.png +3 -0
- docker-compose.yml +80 -0
- requirements.txt +10 -0
- src/__init__.py +0 -0
- src/agent/__init__.py +0 -0
- src/agent/browser_use/browser_use_agent.py +169 -0
- src/agent/deep_research/deep_research_agent.py +1261 -0
- src/browser/__init__.py +0 -0
- src/browser/custom_browser.py +109 -0
- src/browser/custom_context.py +22 -0
- src/controller/__init__.py +0 -0
- src/controller/custom_controller.py +182 -0
- src/utils/__init__.py +0 -0
- src/utils/config.py +100 -0
- src/utils/llm_provider.py +354 -0
- src/utils/mcp_client.py +254 -0
- src/utils/utils.py +39 -0
- src/webui/__init__.py +0 -0
- src/webui/components/__init__.py +0 -0
- src/webui/components/agent_settings_tab.py +269 -0
- src/webui/components/browser_settings_tab.py +161 -0
- src/webui/components/browser_use_agent_tab.py +1083 -0
- src/webui/components/deep_research_agent_tab.py +457 -0
- src/webui/components/load_save_config_tab.py +50 -0
- src/webui/interface.py +95 -0
- src/webui/webui_manager.py +122 -0
- supervisord.conf +80 -0
- tests/test_agents.py +400 -0
- tests/test_controller.py +131 -0
- tests/test_llm_api.py +159 -0
- tests/test_playwright.py +31 -0
- webui.py +19 -0
.dockerignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
data
|
2 |
+
tmp
|
3 |
+
results
|
4 |
+
|
5 |
+
.env
|
.env.example
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
OPENAI_ENDPOINT=https://api.openai.com/v1
|
2 |
+
OPENAI_API_KEY=
|
3 |
+
|
4 |
+
ANTHROPIC_API_KEY=
|
5 |
+
ANTHROPIC_ENDPOINT=https://api.anthropic.com
|
6 |
+
|
7 |
+
GOOGLE_API_KEY=
|
8 |
+
|
9 |
+
AZURE_OPENAI_ENDPOINT=
|
10 |
+
AZURE_OPENAI_API_KEY=
|
11 |
+
AZURE_OPENAI_API_VERSION=2025-01-01-preview
|
12 |
+
|
13 |
+
DEEPSEEK_ENDPOINT=https://api.deepseek.com
|
14 |
+
DEEPSEEK_API_KEY=
|
15 |
+
|
16 |
+
MISTRAL_API_KEY=
|
17 |
+
MISTRAL_ENDPOINT=https://api.mistral.ai/v1
|
18 |
+
|
19 |
+
OLLAMA_ENDPOINT=http://localhost:11434
|
20 |
+
|
21 |
+
ALIBABA_ENDPOINT=https://dashscope.aliyuncs.com/compatible-mode/v1
|
22 |
+
ALIBABA_API_KEY=
|
23 |
+
|
24 |
+
MOONSHOT_ENDPOINT=https://api.moonshot.cn/v1
|
25 |
+
MOONSHOT_API_KEY=
|
26 |
+
|
27 |
+
UNBOUND_ENDPOINT=https://api.getunbound.ai
|
28 |
+
UNBOUND_API_KEY=
|
29 |
+
|
30 |
+
SiliconFLOW_ENDPOINT=https://api.siliconflow.cn/v1/
|
31 |
+
SiliconFLOW_API_KEY=
|
32 |
+
|
33 |
+
IBM_ENDPOINT=https://us-south.ml.cloud.ibm.com
|
34 |
+
IBM_API_KEY=
|
35 |
+
IBM_PROJECT_ID=
|
36 |
+
|
37 |
+
GROK_ENDPOINT="https://api.x.ai/v1"
|
38 |
+
GROK_API_KEY=
|
39 |
+
|
40 |
+
#set default LLM
|
41 |
+
DEFAULT_LLM=openai
|
42 |
+
|
43 |
+
|
44 |
+
# Set to false to disable anonymized telemetry
|
45 |
+
ANONYMIZED_TELEMETRY=false
|
46 |
+
|
47 |
+
# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
|
48 |
+
BROWSER_USE_LOGGING_LEVEL=info
|
49 |
+
|
50 |
+
# Browser settings
|
51 |
+
BROWSER_PATH=
|
52 |
+
BROWSER_USER_DATA=
|
53 |
+
BROWSER_DEBUGGING_PORT=9222
|
54 |
+
BROWSER_DEBUGGING_HOST=localhost
|
55 |
+
# Set to true to keep browser open between AI tasks
|
56 |
+
KEEP_BROWSER_OPEN=true
|
57 |
+
USE_OWN_BROWSER=false
|
58 |
+
BROWSER_CDP=
|
59 |
+
# Display settings
|
60 |
+
# Format: WIDTHxHEIGHTxDEPTH
|
61 |
+
RESOLUTION=1920x1080x24
|
62 |
+
# Width in pixels
|
63 |
+
RESOLUTION_WIDTH=1920
|
64 |
+
# Height in pixels
|
65 |
+
RESOLUTION_HEIGHT=1080
|
66 |
+
|
67 |
+
# VNC settings
|
68 |
+
VNC_PASSWORD=youvncpassword
|
.github/workflows/build.yml
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Build Docker Image
|
2 |
+
|
3 |
+
on:
|
4 |
+
release:
|
5 |
+
types: [published]
|
6 |
+
push:
|
7 |
+
branches: [main]
|
8 |
+
|
9 |
+
env:
|
10 |
+
GITHUB_CR_REPO: ghcr.io/${{ github.repository }}
|
11 |
+
|
12 |
+
jobs:
|
13 |
+
build:
|
14 |
+
runs-on: ubuntu-latest
|
15 |
+
strategy:
|
16 |
+
fail-fast: false
|
17 |
+
matrix:
|
18 |
+
platform:
|
19 |
+
- linux/amd64
|
20 |
+
- linux/arm64
|
21 |
+
steps:
|
22 |
+
- name: Prepare
|
23 |
+
run: |
|
24 |
+
platform=${{ matrix.platform }}
|
25 |
+
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
26 |
+
|
27 |
+
- name: Docker meta
|
28 |
+
id: meta
|
29 |
+
uses: docker/metadata-action@v5
|
30 |
+
with:
|
31 |
+
images: |
|
32 |
+
${{ env.GITHUB_CR_REPO }}
|
33 |
+
|
34 |
+
- name: Login to GHCR
|
35 |
+
uses: docker/login-action@v3
|
36 |
+
with:
|
37 |
+
registry: ghcr.io
|
38 |
+
username: ${{ github.repository_owner }}
|
39 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
40 |
+
|
41 |
+
- name: Set up QEMU
|
42 |
+
uses: docker/setup-qemu-action@v3
|
43 |
+
|
44 |
+
- name: Set up Docker Buildx
|
45 |
+
uses: docker/setup-buildx-action@v3
|
46 |
+
|
47 |
+
- name: Build and push by digest
|
48 |
+
id: build
|
49 |
+
uses: docker/build-push-action@v6
|
50 |
+
with:
|
51 |
+
platforms: ${{ matrix.platform }}
|
52 |
+
labels: ${{ steps.meta.outputs.labels }}
|
53 |
+
tags: |
|
54 |
+
${{ env.GITHUB_CR_REPO }}
|
55 |
+
build-args: |
|
56 |
+
TARGETPLATFORM=${{ matrix.platform }}
|
57 |
+
outputs: type=image,push-by-digest=true,name-canonical=true,push=true
|
58 |
+
|
59 |
+
- name: Export digest
|
60 |
+
run: |
|
61 |
+
mkdir -p ${{ runner.temp }}/digests
|
62 |
+
digest="${{ steps.build.outputs.digest }}"
|
63 |
+
touch "${{ runner.temp }}/digests/${digest#sha256:}"
|
64 |
+
|
65 |
+
- name: Upload digest
|
66 |
+
uses: actions/upload-artifact@v4
|
67 |
+
with:
|
68 |
+
name: digests-${{ env.PLATFORM_PAIR }}
|
69 |
+
path: ${{ runner.temp }}/digests/*
|
70 |
+
if-no-files-found: error
|
71 |
+
retention-days: 1
|
72 |
+
|
73 |
+
merge:
|
74 |
+
runs-on: ubuntu-latest
|
75 |
+
needs:
|
76 |
+
- build
|
77 |
+
steps:
|
78 |
+
- name: Download digests
|
79 |
+
uses: actions/download-artifact@v4
|
80 |
+
with:
|
81 |
+
path: ${{ runner.temp }}/digests
|
82 |
+
pattern: digests-*
|
83 |
+
merge-multiple: true
|
84 |
+
|
85 |
+
- name: Login to GHCR
|
86 |
+
uses: docker/login-action@v3
|
87 |
+
with:
|
88 |
+
registry: ghcr.io
|
89 |
+
username: ${{ github.repository_owner }}
|
90 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
91 |
+
|
92 |
+
- name: Set up Docker Buildx
|
93 |
+
uses: docker/setup-buildx-action@v3
|
94 |
+
|
95 |
+
- name: Docker meta
|
96 |
+
id: meta
|
97 |
+
uses: docker/metadata-action@v5
|
98 |
+
with:
|
99 |
+
images: |
|
100 |
+
${{ env.GITHUB_CR_REPO }}
|
101 |
+
tags: |
|
102 |
+
type=ref,event=branch
|
103 |
+
type=ref,event=pr
|
104 |
+
type=semver,pattern={{version}}
|
105 |
+
type=semver,pattern={{major}}
|
106 |
+
|
107 |
+
- name: Docker tags
|
108 |
+
run: |
|
109 |
+
tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")
|
110 |
+
if [ -z "$tags" ]; then
|
111 |
+
echo "DOCKER_METADATA_OUTPUT_VERSION=${{ github.ref_name }}" >> $GITHUB_ENV
|
112 |
+
tags="-t ${{ env.GITHUB_CR_REPO }}:${{ github.ref_name }}"
|
113 |
+
fi
|
114 |
+
echo "DOCKER_METADATA_TAGS=$tags" >> $GITHUB_ENV
|
115 |
+
|
116 |
+
- name: Create manifest list and push
|
117 |
+
working-directory: ${{ runner.temp }}/digests
|
118 |
+
run: |
|
119 |
+
docker buildx imagetools create ${{ env.DOCKER_METADATA_TAGS }} \
|
120 |
+
$(printf '${{ env.GITHUB_CR_REPO }}@sha256:%s ' *)
|
121 |
+
|
122 |
+
- name: Inspect image
|
123 |
+
run: |
|
124 |
+
docker buildx imagetools inspect ${{ env.GITHUB_CR_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION }}
|
.gitignore
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
110 |
+
.pdm.toml
|
111 |
+
.pdm-python
|
112 |
+
.pdm-build/
|
113 |
+
|
114 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
115 |
+
__pypackages__/
|
116 |
+
|
117 |
+
# Celery stuff
|
118 |
+
celerybeat-schedule
|
119 |
+
celerybeat.pid
|
120 |
+
|
121 |
+
# SageMath parsed files
|
122 |
+
*.sage.py
|
123 |
+
|
124 |
+
# Environments
|
125 |
+
.env
|
126 |
+
.venv
|
127 |
+
env/
|
128 |
+
venv/
|
129 |
+
ENV/
|
130 |
+
env.bak/
|
131 |
+
venv.bak/
|
132 |
+
test_env/
|
133 |
+
myenv
|
134 |
+
|
135 |
+
|
136 |
+
# Spyder project settings
|
137 |
+
.spyderproject
|
138 |
+
.spyproject
|
139 |
+
|
140 |
+
# Rope project settings
|
141 |
+
.ropeproject
|
142 |
+
|
143 |
+
# mkdocs documentation
|
144 |
+
/site
|
145 |
+
|
146 |
+
# mypy
|
147 |
+
.mypy_cache/
|
148 |
+
.dmypy.json
|
149 |
+
dmypy.json
|
150 |
+
|
151 |
+
# Pyre type checker
|
152 |
+
.pyre/
|
153 |
+
|
154 |
+
# pytype static type analyzer
|
155 |
+
.pytype/
|
156 |
+
|
157 |
+
# Cython debug symbols
|
158 |
+
cython_debug/
|
159 |
+
|
160 |
+
# PyCharm
|
161 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
162 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
163 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
164 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
165 |
+
.idea/
|
166 |
+
temp
|
167 |
+
tmp
|
168 |
+
|
169 |
+
|
170 |
+
.DS_Store
|
171 |
+
|
172 |
+
private_example.py
|
173 |
+
private_example
|
174 |
+
|
175 |
+
browser_cookies.json
|
176 |
+
cookies.json
|
177 |
+
AgentHistory.json
|
178 |
+
cv_04_24.pdf
|
179 |
+
AgentHistoryList.json
|
180 |
+
*.gif
|
181 |
+
|
182 |
+
# For Sharing (.pem files)
|
183 |
+
.gradio/
|
184 |
+
|
185 |
+
# For Docker
|
186 |
+
data/
|
187 |
+
|
188 |
+
# For Config Files (Current Settings)
|
189 |
+
.config.pkl
|
190 |
+
*.pdf
|
191 |
+
|
192 |
+
workflow
|
.vscode/settings.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"python.analysis.typeCheckingMode": "basic",
|
3 |
+
"[python]": {
|
4 |
+
"editor.defaultFormatter": "charliermarsh.ruff",
|
5 |
+
"editor.formatOnSave": true,
|
6 |
+
"editor.codeActionsOnSave": {
|
7 |
+
"source.fixAll.ruff": "explicit",
|
8 |
+
"source.organizeImports.ruff": "explicit"
|
9 |
+
}
|
10 |
+
}
|
11 |
+
}
|
Dockerfile
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
+
|
3 |
+
# Set platform for multi-arch builds (Docker Buildx will set this)
|
4 |
+
ARG TARGETPLATFORM
|
5 |
+
ARG NODE_MAJOR=20
|
6 |
+
|
7 |
+
# Install system dependencies
|
8 |
+
RUN apt-get update && apt-get install -y \
|
9 |
+
wget \
|
10 |
+
netcat-traditional \
|
11 |
+
gnupg \
|
12 |
+
curl \
|
13 |
+
unzip \
|
14 |
+
xvfb \
|
15 |
+
libgconf-2-4 \
|
16 |
+
libxss1 \
|
17 |
+
libnss3 \
|
18 |
+
libnspr4 \
|
19 |
+
libasound2 \
|
20 |
+
libatk1.0-0 \
|
21 |
+
libatk-bridge2.0-0 \
|
22 |
+
libcups2 \
|
23 |
+
libdbus-1-3 \
|
24 |
+
libdrm2 \
|
25 |
+
libgbm1 \
|
26 |
+
libgtk-3-0 \
|
27 |
+
libxcomposite1 \
|
28 |
+
libxdamage1 \
|
29 |
+
libxfixes3 \
|
30 |
+
libxrandr2 \
|
31 |
+
xdg-utils \
|
32 |
+
fonts-liberation \
|
33 |
+
dbus \
|
34 |
+
xauth \
|
35 |
+
x11vnc \
|
36 |
+
tigervnc-tools \
|
37 |
+
supervisor \
|
38 |
+
net-tools \
|
39 |
+
procps \
|
40 |
+
git \
|
41 |
+
python3-numpy \
|
42 |
+
fontconfig \
|
43 |
+
fonts-dejavu \
|
44 |
+
fonts-dejavu-core \
|
45 |
+
fonts-dejavu-extra \
|
46 |
+
vim \
|
47 |
+
&& rm -rf /var/lib/apt/lists/*
|
48 |
+
|
49 |
+
# Install noVNC
|
50 |
+
RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
|
51 |
+
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
|
52 |
+
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
|
53 |
+
|
54 |
+
# Install Node.js using NodeSource PPA
|
55 |
+
RUN mkdir -p /etc/apt/keyrings \
|
56 |
+
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
57 |
+
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
|
58 |
+
&& apt-get update \
|
59 |
+
&& apt-get install nodejs -y \
|
60 |
+
&& rm -rf /var/lib/apt/lists/*
|
61 |
+
|
62 |
+
# Verify Node.js and npm installation (optional, but good for debugging)
|
63 |
+
RUN node -v && npm -v && npx -v
|
64 |
+
|
65 |
+
# Set up working directory
|
66 |
+
WORKDIR /app
|
67 |
+
|
68 |
+
# Copy requirements and install Python dependencies
|
69 |
+
COPY requirements.txt .
|
70 |
+
|
71 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
72 |
+
|
73 |
+
# Install playwright browsers and dependencies
|
74 |
+
# playwright documentation suggests PLAYWRIGHT_BROWSERS_PATH is still relevant
|
75 |
+
# or that playwright installs to a similar default location that Playwright would.
|
76 |
+
# Let's assume playwright respects PLAYWRIGHT_BROWSERS_PATH or its default install location is findable.
|
77 |
+
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-browsers
|
78 |
+
RUN mkdir -p $PLAYWRIGHT_BROWSERS_PATH
|
79 |
+
|
80 |
+
# Install recommended: Google Chrome (instead of just Chromium for better undetectability)
|
81 |
+
# The 'playwright install chrome' command might download and place it.
|
82 |
+
# The '--with-deps' equivalent for playwright install is to run 'playwright install-deps chrome' after.
|
83 |
+
# RUN playwright install chrome --with-deps
|
84 |
+
|
85 |
+
# Alternative: Install Chromium if Google Chrome is problematic in certain environments
|
86 |
+
RUN playwright install chromium --with-deps
|
87 |
+
|
88 |
+
|
89 |
+
# Copy the application code
|
90 |
+
COPY . .
|
91 |
+
|
92 |
+
# Set up supervisor configuration
|
93 |
+
RUN mkdir -p /var/log/supervisor
|
94 |
+
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
95 |
+
|
96 |
+
EXPOSE 7788 6080 5901 9222
|
97 |
+
|
98 |
+
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
99 |
+
#CMD ["/bin/bash"]
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 Browser Use Inc.
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README_DEPLOYMENT.md
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Browser Automation WebUI - Deployment Guide
|
2 |
+
|
3 |
+
## Deploying to Hugging Face Spaces
|
4 |
+
|
5 |
+
### Prerequisites
|
6 |
+
- A Hugging Face account
|
7 |
+
- Your code pushed to a Git repository
|
8 |
+
|
9 |
+
### Steps to Deploy
|
10 |
+
|
11 |
+
1. **Create a new Space on Hugging Face**
|
12 |
+
- Go to https://huggingface.co/spaces
|
13 |
+
- Click "Create new Space"
|
14 |
+
- Choose "Gradio" as the SDK
|
15 |
+
- Select your repository or create a new one
|
16 |
+
|
17 |
+
2. **File Structure for Deployment**
|
18 |
+
```
|
19 |
+
web-ui/
|
20 |
+
├── app.py # Main entry point (created)
|
21 |
+
├── requirements.txt # Dependencies
|
22 |
+
├── src/ # Source code
|
23 |
+
└── README.md # Documentation
|
24 |
+
```
|
25 |
+
|
26 |
+
3. **Key Files for Deployment**
|
27 |
+
- `app.py`: Main entry point that Gradio will use
|
28 |
+
- `requirements.txt`: All necessary dependencies
|
29 |
+
- `src/`: Your source code directory
|
30 |
+
|
31 |
+
### Troubleshooting the "Failed to canonicalize script path" Error
|
32 |
+
|
33 |
+
This error typically occurs when:
|
34 |
+
- Gradio can't find the main entry point
|
35 |
+
- Import paths are not properly configured
|
36 |
+
- File structure doesn't match deployment expectations
|
37 |
+
|
38 |
+
**Solution**: The `app.py` file has been created to serve as the proper entry point for Gradio deployment.
|
39 |
+
|
40 |
+
### Environment Variables
|
41 |
+
|
42 |
+
If your app requires environment variables, you can set them in the Hugging Face Space settings:
|
43 |
+
- Go to your Space settings
|
44 |
+
- Navigate to "Repository secrets"
|
45 |
+
- Add any required environment variables
|
46 |
+
|
47 |
+
### Local Testing
|
48 |
+
|
49 |
+
To test the deployment locally before pushing:
|
50 |
+
|
51 |
+
```bash
|
52 |
+
cd web-ui
|
53 |
+
python app.py
|
54 |
+
```
|
55 |
+
|
56 |
+
This should start the Gradio interface without the canonicalization error.
|
57 |
+
|
58 |
+
### Common Issues and Solutions
|
59 |
+
|
60 |
+
1. **Import Errors**: Make sure all imports use relative paths from the project root
|
61 |
+
2. **Missing Dependencies**: Ensure all packages are listed in `requirements.txt`
|
62 |
+
3. **Path Issues**: The `app.py` file includes proper path configuration
|
63 |
+
|
64 |
+
### Deployment Checklist
|
65 |
+
|
66 |
+
- [ ] `app.py` exists and is properly configured
|
67 |
+
- [ ] All dependencies are in `requirements.txt`
|
68 |
+
- [ ] All import paths are correct
|
69 |
+
- [ ] Environment variables are configured (if needed)
|
70 |
+
- [ ] Local testing works without errors
|
SECURITY.md
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Reporting Security Issues
|
2 |
+
|
3 |
+
If you believe you have found a security vulnerability in browser-use, please report it through coordinated disclosure.
|
4 |
+
|
5 |
+
**Please do not report security vulnerabilities through the repository issues, discussions, or pull requests.**
|
6 |
+
|
7 |
+
Instead, please open a new [Github security advisory](https://github.com/browser-use/web-ui/security/advisories/new).
|
8 |
+
|
9 |
+
Please include as much of the information listed below as you can to help me better understand and resolve the issue:
|
10 |
+
|
11 |
+
* The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
|
12 |
+
* Full paths of source file(s) related to the manifestation of the issue
|
13 |
+
* The location of the affected source code (tag/branch/commit or direct URL)
|
14 |
+
* Any special configuration required to reproduce the issue
|
15 |
+
* Step-by-step instructions to reproduce the issue
|
16 |
+
* Proof-of-concept or exploit code (if possible)
|
17 |
+
* Impact of the issue, including how an attacker might exploit the issue
|
18 |
+
|
19 |
+
This information will help me triage your report more quickly.
|
app.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
|
4 |
+
# Add the current directory to Python path to ensure imports work
|
5 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
6 |
+
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
from src.webui.interface import create_ui
|
11 |
+
|
12 |
+
# Create the Gradio app
|
13 |
+
demo = create_ui(theme_name="Ocean")
|
14 |
+
|
15 |
+
# For deployment, we need to expose the app directly
|
16 |
+
if __name__ == "__main__":
|
17 |
+
demo.launch()
|
assets/examples/test.png
ADDED
![]() |
Git LFS Details
|
assets/web-ui.png
ADDED
![]() |
Git LFS Details
|
docker-compose.yml
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
services:
|
2 |
+
# debug: docker compose run --rm -it browser-use-webui bash
|
3 |
+
browser-use-webui:
|
4 |
+
# image: ghcr.io/browser-use/web-ui # Using precompiled image
|
5 |
+
build:
|
6 |
+
context: .
|
7 |
+
dockerfile: Dockerfile
|
8 |
+
args:
|
9 |
+
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
|
10 |
+
ports:
|
11 |
+
- "7788:7788"
|
12 |
+
- "6080:6080"
|
13 |
+
- "5901:5901"
|
14 |
+
- "9222:9222"
|
15 |
+
environment:
|
16 |
+
# LLM API Keys & Endpoints
|
17 |
+
- OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
|
18 |
+
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
19 |
+
- ANTHROPIC_ENDPOINT=${ANTHROPIC_ENDPOINT:-https://api.anthropic.com}
|
20 |
+
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
21 |
+
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
|
22 |
+
- AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
|
23 |
+
- AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
|
24 |
+
- AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION:-2025-01-01-preview}
|
25 |
+
- DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
|
26 |
+
- DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
|
27 |
+
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://localhost:11434}
|
28 |
+
- MISTRAL_ENDPOINT=${MISTRAL_ENDPOINT:-https://api.mistral.ai/v1}
|
29 |
+
- MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
|
30 |
+
- ALIBABA_ENDPOINT=${ALIBABA_ENDPOINT:-https://dashscope.aliyuncs.com/compatible-mode/v1}
|
31 |
+
- ALIBABA_API_KEY=${ALIBABA_API_KEY:-}
|
32 |
+
- MOONSHOT_ENDPOINT=${MOONSHOT_ENDPOINT:-https://api.moonshot.cn/v1}
|
33 |
+
- MOONSHOT_API_KEY=${MOONSHOT_API_KEY:-}
|
34 |
+
- UNBOUND_ENDPOINT=${UNBOUND_ENDPOINT:-https://api.getunbound.ai}
|
35 |
+
- UNBOUND_API_KEY=${UNBOUND_API_KEY:-}
|
36 |
+
- SiliconFLOW_ENDPOINT=${SiliconFLOW_ENDPOINT:-https://api.siliconflow.cn/v1/}
|
37 |
+
- SiliconFLOW_API_KEY=${SiliconFLOW_API_KEY:-}
|
38 |
+
- IBM_ENDPOINT=${IBM_ENDPOINT:-https://us-south.ml.cloud.ibm.com}
|
39 |
+
- IBM_API_KEY=${IBM_API_KEY:-}
|
40 |
+
- IBM_PROJECT_ID=${IBM_PROJECT_ID:-}
|
41 |
+
|
42 |
+
# Application Settings
|
43 |
+
- ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
|
44 |
+
- BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
|
45 |
+
|
46 |
+
# Browser Settings
|
47 |
+
- BROWSER_PATH=
|
48 |
+
- BROWSER_USER_DATA=
|
49 |
+
- BROWSER_DEBUGGING_PORT=${BROWSER_DEBUGGING_PORT:-9222}
|
50 |
+
- BROWSER_DEBUGGING_HOST=localhost
|
51 |
+
- USE_OWN_BROWSER=false
|
52 |
+
- KEEP_BROWSER_OPEN=true
|
53 |
+
- BROWSER_CDP=${BROWSER_CDP:-} # e.g., http://localhost:9222
|
54 |
+
|
55 |
+
# Display Settings
|
56 |
+
- DISPLAY=:99
|
57 |
+
# This ENV is used by the Dockerfile during build time if playwright respects it.
|
58 |
+
# It's not strictly needed at runtime by docker-compose unless your app or scripts also read it.
|
59 |
+
- PLAYWRIGHT_BROWSERS_PATH=/ms-browsers # Matches Dockerfile ENV
|
60 |
+
- RESOLUTION=${RESOLUTION:-1920x1080x24}
|
61 |
+
- RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
|
62 |
+
- RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
|
63 |
+
|
64 |
+
# VNC Settings
|
65 |
+
- VNC_PASSWORD=${VNC_PASSWORD:-youvncpassword}
|
66 |
+
|
67 |
+
volumes:
|
68 |
+
- /tmp/.X11-unix:/tmp/.X11-unix
|
69 |
+
# - ./my_chrome_data:/app/data/chrome_data # Optional: persist browser data
|
70 |
+
restart: unless-stopped
|
71 |
+
shm_size: '2gb'
|
72 |
+
cap_add:
|
73 |
+
- SYS_ADMIN
|
74 |
+
tmpfs:
|
75 |
+
- /tmp
|
76 |
+
healthcheck:
|
77 |
+
test: ["CMD", "nc", "-z", "localhost", "5901"] # VNC port
|
78 |
+
interval: 10s
|
79 |
+
timeout: 5s
|
80 |
+
retries: 3
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
browser-use==0.1.48
|
2 |
+
pyperclip==1.9.0
|
3 |
+
gradio==5.27.0
|
4 |
+
json-repair
|
5 |
+
langchain-mistralai==0.2.4
|
6 |
+
MainContentExtractor==0.0.4
|
7 |
+
langchain-ibm==0.3.10
|
8 |
+
langchain_mcp_adapters==0.0.9
|
9 |
+
langgraph==0.3.34
|
10 |
+
langchain-community
|
src/__init__.py
ADDED
File without changes
|
src/agent/__init__.py
ADDED
File without changes
|
src/agent/browser_use/browser_use_agent.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import asyncio
|
4 |
+
import logging
|
5 |
+
import os
|
6 |
+
|
7 |
+
# from lmnr.sdk.decorators import observe
|
8 |
+
from browser_use.agent.gif import create_history_gif
|
9 |
+
from browser_use.agent.service import Agent, AgentHookFunc
|
10 |
+
from browser_use.agent.views import (
|
11 |
+
ActionResult,
|
12 |
+
AgentHistory,
|
13 |
+
AgentHistoryList,
|
14 |
+
AgentStepInfo,
|
15 |
+
ToolCallingMethod,
|
16 |
+
)
|
17 |
+
from browser_use.browser.views import BrowserStateHistory
|
18 |
+
from browser_use.utils import time_execution_async
|
19 |
+
from dotenv import load_dotenv
|
20 |
+
from browser_use.agent.message_manager.utils import is_model_without_tool_support
|
21 |
+
|
22 |
+
load_dotenv()
|
23 |
+
logger = logging.getLogger(__name__)
|
24 |
+
|
25 |
+
SKIP_LLM_API_KEY_VERIFICATION = (
|
26 |
+
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
|
27 |
+
)
|
28 |
+
|
29 |
+
|
30 |
+
class BrowserUseAgent(Agent):
|
31 |
+
def _set_tool_calling_method(self) -> ToolCallingMethod | None:
|
32 |
+
tool_calling_method = self.settings.tool_calling_method
|
33 |
+
if tool_calling_method == 'auto':
|
34 |
+
if is_model_without_tool_support(self.model_name):
|
35 |
+
return 'raw'
|
36 |
+
elif self.chat_model_library == 'ChatGoogleGenerativeAI':
|
37 |
+
return None
|
38 |
+
elif self.chat_model_library == 'ChatOpenAI':
|
39 |
+
return 'function_calling'
|
40 |
+
elif self.chat_model_library == 'AzureChatOpenAI':
|
41 |
+
return 'function_calling'
|
42 |
+
else:
|
43 |
+
return None
|
44 |
+
else:
|
45 |
+
return tool_calling_method
|
46 |
+
|
47 |
+
@time_execution_async("--run (agent)")
|
48 |
+
async def run(
|
49 |
+
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
|
50 |
+
on_step_end: AgentHookFunc | None = None
|
51 |
+
) -> AgentHistoryList:
|
52 |
+
"""Execute the task with maximum number of steps"""
|
53 |
+
|
54 |
+
loop = asyncio.get_event_loop()
|
55 |
+
|
56 |
+
# Set up the Ctrl+C signal handler with callbacks specific to this agent
|
57 |
+
from browser_use.utils import SignalHandler
|
58 |
+
|
59 |
+
signal_handler = SignalHandler(
|
60 |
+
loop=loop,
|
61 |
+
pause_callback=self.pause,
|
62 |
+
resume_callback=self.resume,
|
63 |
+
custom_exit_callback=None, # No special cleanup needed on forced exit
|
64 |
+
exit_on_second_int=True,
|
65 |
+
)
|
66 |
+
signal_handler.register()
|
67 |
+
|
68 |
+
try:
|
69 |
+
self._log_agent_run()
|
70 |
+
|
71 |
+
# Execute initial actions if provided
|
72 |
+
if self.initial_actions:
|
73 |
+
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
|
74 |
+
self.state.last_result = result
|
75 |
+
|
76 |
+
for step in range(max_steps):
|
77 |
+
# Check if waiting for user input after Ctrl+C
|
78 |
+
if self.state.paused:
|
79 |
+
signal_handler.wait_for_resume()
|
80 |
+
signal_handler.reset()
|
81 |
+
|
82 |
+
# Check if we should stop due to too many failures
|
83 |
+
if self.state.consecutive_failures >= self.settings.max_failures:
|
84 |
+
logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
|
85 |
+
break
|
86 |
+
|
87 |
+
# Check control flags before each step
|
88 |
+
if self.state.stopped:
|
89 |
+
logger.info('Agent stopped')
|
90 |
+
break
|
91 |
+
|
92 |
+
while self.state.paused:
|
93 |
+
await asyncio.sleep(0.2) # Small delay to prevent CPU spinning
|
94 |
+
if self.state.stopped: # Allow stopping while paused
|
95 |
+
break
|
96 |
+
|
97 |
+
if on_step_start is not None:
|
98 |
+
await on_step_start(self)
|
99 |
+
|
100 |
+
step_info = AgentStepInfo(step_number=step, max_steps=max_steps)
|
101 |
+
await self.step(step_info)
|
102 |
+
|
103 |
+
if on_step_end is not None:
|
104 |
+
await on_step_end(self)
|
105 |
+
|
106 |
+
if self.state.history.is_done():
|
107 |
+
if self.settings.validate_output and step < max_steps - 1:
|
108 |
+
if not await self._validate_output():
|
109 |
+
continue
|
110 |
+
|
111 |
+
await self.log_completion()
|
112 |
+
break
|
113 |
+
else:
|
114 |
+
error_message = 'Failed to complete task in maximum steps'
|
115 |
+
|
116 |
+
self.state.history.history.append(
|
117 |
+
AgentHistory(
|
118 |
+
model_output=None,
|
119 |
+
result=[ActionResult(error=error_message, include_in_memory=True)],
|
120 |
+
state=BrowserStateHistory(
|
121 |
+
url='',
|
122 |
+
title='',
|
123 |
+
tabs=[],
|
124 |
+
interacted_element=[],
|
125 |
+
screenshot=None,
|
126 |
+
),
|
127 |
+
metadata=None,
|
128 |
+
)
|
129 |
+
)
|
130 |
+
|
131 |
+
logger.info(f'❌ {error_message}')
|
132 |
+
|
133 |
+
return self.state.history
|
134 |
+
|
135 |
+
except KeyboardInterrupt:
|
136 |
+
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
|
137 |
+
logger.info('Got KeyboardInterrupt during execution, returning current history')
|
138 |
+
return self.state.history
|
139 |
+
|
140 |
+
finally:
|
141 |
+
# Unregister signal handlers before cleanup
|
142 |
+
signal_handler.unregister()
|
143 |
+
|
144 |
+
if self.settings.save_playwright_script_path:
|
145 |
+
logger.info(
|
146 |
+
f'Agent run finished. Attempting to save Playwright script to: {self.settings.save_playwright_script_path}'
|
147 |
+
)
|
148 |
+
try:
|
149 |
+
# Extract sensitive data keys if sensitive_data is provided
|
150 |
+
keys = list(self.sensitive_data.keys()) if self.sensitive_data else None
|
151 |
+
# Pass browser and context config to the saving method
|
152 |
+
self.state.history.save_as_playwright_script(
|
153 |
+
self.settings.save_playwright_script_path,
|
154 |
+
sensitive_data_keys=keys,
|
155 |
+
browser_config=self.browser.config,
|
156 |
+
context_config=self.browser_context.config,
|
157 |
+
)
|
158 |
+
except Exception as script_gen_err:
|
159 |
+
# Log any error during script generation/saving
|
160 |
+
logger.error(f'Failed to save Playwright script: {script_gen_err}', exc_info=True)
|
161 |
+
|
162 |
+
await self.close()
|
163 |
+
|
164 |
+
if self.settings.generate_gif:
|
165 |
+
output_path: str = 'agent_history.gif'
|
166 |
+
if isinstance(self.settings.generate_gif, str):
|
167 |
+
output_path = self.settings.generate_gif
|
168 |
+
|
169 |
+
create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
|
src/agent/deep_research/deep_research_agent.py
ADDED
@@ -0,0 +1,1261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import json
|
3 |
+
import logging
|
4 |
+
import os
|
5 |
+
import threading
|
6 |
+
import uuid
|
7 |
+
from pathlib import Path
|
8 |
+
from typing import Any, Dict, List, Optional, TypedDict
|
9 |
+
|
10 |
+
from browser_use.browser.browser import BrowserConfig
|
11 |
+
from langchain_community.tools.file_management import (
|
12 |
+
ListDirectoryTool,
|
13 |
+
ReadFileTool,
|
14 |
+
WriteFileTool,
|
15 |
+
)
|
16 |
+
|
17 |
+
# Langchain imports
|
18 |
+
from langchain_core.messages import (
|
19 |
+
AIMessage,
|
20 |
+
BaseMessage,
|
21 |
+
HumanMessage,
|
22 |
+
SystemMessage,
|
23 |
+
ToolMessage,
|
24 |
+
)
|
25 |
+
from langchain_core.prompts import ChatPromptTemplate
|
26 |
+
from langchain_core.tools import StructuredTool, Tool
|
27 |
+
|
28 |
+
# Langgraph imports
|
29 |
+
from langgraph.graph import StateGraph
|
30 |
+
from pydantic import BaseModel, Field
|
31 |
+
|
32 |
+
from browser_use.browser.context import BrowserContextConfig
|
33 |
+
|
34 |
+
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
35 |
+
from src.browser.custom_browser import CustomBrowser
|
36 |
+
from src.controller.custom_controller import CustomController
|
37 |
+
from src.utils.mcp_client import setup_mcp_client_and_tools
|
38 |
+
|
39 |
+
logger = logging.getLogger(__name__)
|
40 |
+
|
41 |
+
# Constants
|
42 |
+
REPORT_FILENAME = "report.md"
|
43 |
+
PLAN_FILENAME = "research_plan.md"
|
44 |
+
SEARCH_INFO_FILENAME = "search_info.json"
|
45 |
+
|
46 |
+
_AGENT_STOP_FLAGS = {}
|
47 |
+
_BROWSER_AGENT_INSTANCES = {}
|
48 |
+
|
49 |
+
|
50 |
+
async def run_single_browser_task(
|
51 |
+
task_query: str,
|
52 |
+
task_id: str,
|
53 |
+
llm: Any, # Pass the main LLM
|
54 |
+
browser_config: Dict[str, Any],
|
55 |
+
stop_event: threading.Event,
|
56 |
+
use_vision: bool = False,
|
57 |
+
) -> Dict[str, Any]:
|
58 |
+
"""
|
59 |
+
Runs a single BrowserUseAgent task.
|
60 |
+
Manages browser creation and closing for this specific task.
|
61 |
+
"""
|
62 |
+
if not BrowserUseAgent:
|
63 |
+
return {
|
64 |
+
"query": task_query,
|
65 |
+
"error": "BrowserUseAgent components not available.",
|
66 |
+
}
|
67 |
+
|
68 |
+
# --- Browser Setup ---
|
69 |
+
# These should ideally come from the main agent's config
|
70 |
+
headless = browser_config.get("headless", False)
|
71 |
+
window_w = browser_config.get("window_width", 1280)
|
72 |
+
window_h = browser_config.get("window_height", 1100)
|
73 |
+
browser_user_data_dir = browser_config.get("user_data_dir", None)
|
74 |
+
use_own_browser = browser_config.get("use_own_browser", False)
|
75 |
+
browser_binary_path = browser_config.get("browser_binary_path", None)
|
76 |
+
wss_url = browser_config.get("wss_url", None)
|
77 |
+
cdp_url = browser_config.get("cdp_url", None)
|
78 |
+
disable_security = browser_config.get("disable_security", False)
|
79 |
+
|
80 |
+
bu_browser = None
|
81 |
+
bu_browser_context = None
|
82 |
+
try:
|
83 |
+
logger.info(f"Starting browser task for query: {task_query}")
|
84 |
+
extra_args = []
|
85 |
+
if use_own_browser:
|
86 |
+
browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
|
87 |
+
if browser_binary_path == "":
|
88 |
+
browser_binary_path = None
|
89 |
+
browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
|
90 |
+
if browser_user_data:
|
91 |
+
extra_args += [f"--user-data-dir={browser_user_data}"]
|
92 |
+
else:
|
93 |
+
browser_binary_path = None
|
94 |
+
|
95 |
+
bu_browser = CustomBrowser(
|
96 |
+
config=BrowserConfig(
|
97 |
+
headless=headless,
|
98 |
+
browser_binary_path=browser_binary_path,
|
99 |
+
extra_browser_args=extra_args,
|
100 |
+
wss_url=wss_url,
|
101 |
+
cdp_url=cdp_url,
|
102 |
+
new_context_config=BrowserContextConfig(
|
103 |
+
window_width=window_w,
|
104 |
+
window_height=window_h,
|
105 |
+
)
|
106 |
+
)
|
107 |
+
)
|
108 |
+
|
109 |
+
context_config = BrowserContextConfig(
|
110 |
+
save_downloads_path="./tmp/downloads",
|
111 |
+
window_height=window_h,
|
112 |
+
window_width=window_w,
|
113 |
+
force_new_context=True,
|
114 |
+
)
|
115 |
+
bu_browser_context = await bu_browser.new_context(config=context_config)
|
116 |
+
|
117 |
+
# Simple controller example, replace with your actual implementation if needed
|
118 |
+
bu_controller = CustomController()
|
119 |
+
|
120 |
+
# Construct the task prompt for BrowserUseAgent
|
121 |
+
# Instruct it to find specific info and return title/URL
|
122 |
+
bu_task_prompt = f"""
|
123 |
+
Research Task: {task_query}
|
124 |
+
Objective: Find relevant information answering the query.
|
125 |
+
Output Requirements: For each relevant piece of information found, please provide:
|
126 |
+
1. A concise summary of the information.
|
127 |
+
2. The title of the source page or document.
|
128 |
+
3. The URL of the source.
|
129 |
+
Focus on accuracy and relevance. Avoid irrelevant details.
|
130 |
+
PDF cannot directly extract _content, please try to download first, then using read_file, if you can't save or read, please try other methods.
|
131 |
+
"""
|
132 |
+
|
133 |
+
bu_agent_instance = BrowserUseAgent(
|
134 |
+
task=bu_task_prompt,
|
135 |
+
llm=llm, # Use the passed LLM
|
136 |
+
browser=bu_browser,
|
137 |
+
browser_context=bu_browser_context,
|
138 |
+
controller=bu_controller,
|
139 |
+
use_vision=use_vision,
|
140 |
+
source="webui",
|
141 |
+
)
|
142 |
+
|
143 |
+
# Store instance for potential stop() call
|
144 |
+
task_key = f"{task_id}_{uuid.uuid4()}"
|
145 |
+
_BROWSER_AGENT_INSTANCES[task_key] = bu_agent_instance
|
146 |
+
|
147 |
+
# --- Run with Stop Check ---
|
148 |
+
# BrowserUseAgent needs to internally check a stop signal or have a stop method.
|
149 |
+
# We simulate checking before starting and assume `run` might be interruptible
|
150 |
+
# or have its own stop mechanism we can trigger via bu_agent_instance.stop().
|
151 |
+
if stop_event.is_set():
|
152 |
+
logger.info(f"Browser task for '{task_query}' cancelled before start.")
|
153 |
+
return {"query": task_query, "result": None, "status": "cancelled"}
|
154 |
+
|
155 |
+
# The run needs to be awaitable and ideally accept a stop signal or have a .stop() method
|
156 |
+
# result = await bu_agent_instance.run(max_steps=max_steps) # Add max_steps if applicable
|
157 |
+
# Let's assume a simplified run for now
|
158 |
+
logger.info(f"Running BrowserUseAgent for: {task_query}")
|
159 |
+
result = await bu_agent_instance.run() # Assuming run is the main method
|
160 |
+
logger.info(f"BrowserUseAgent finished for: {task_query}")
|
161 |
+
|
162 |
+
final_data = result.final_result()
|
163 |
+
|
164 |
+
if stop_event.is_set():
|
165 |
+
logger.info(f"Browser task for '{task_query}' stopped during execution.")
|
166 |
+
return {"query": task_query, "result": final_data, "status": "stopped"}
|
167 |
+
else:
|
168 |
+
logger.info(f"Browser result for '{task_query}': {final_data}")
|
169 |
+
return {"query": task_query, "result": final_data, "status": "completed"}
|
170 |
+
|
171 |
+
except Exception as e:
|
172 |
+
logger.error(
|
173 |
+
f"Error during browser task for query '{task_query}': {e}", exc_info=True
|
174 |
+
)
|
175 |
+
return {"query": task_query, "error": str(e), "status": "failed"}
|
176 |
+
finally:
|
177 |
+
if bu_browser_context:
|
178 |
+
try:
|
179 |
+
await bu_browser_context.close()
|
180 |
+
bu_browser_context = None
|
181 |
+
logger.info("Closed browser context.")
|
182 |
+
except Exception as e:
|
183 |
+
logger.error(f"Error closing browser context: {e}")
|
184 |
+
if bu_browser:
|
185 |
+
try:
|
186 |
+
await bu_browser.close()
|
187 |
+
bu_browser = None
|
188 |
+
logger.info("Closed browser.")
|
189 |
+
except Exception as e:
|
190 |
+
logger.error(f"Error closing browser: {e}")
|
191 |
+
|
192 |
+
if task_key in _BROWSER_AGENT_INSTANCES:
|
193 |
+
del _BROWSER_AGENT_INSTANCES[task_key]
|
194 |
+
|
195 |
+
|
196 |
+
class BrowserSearchInput(BaseModel):
|
197 |
+
queries: List[str] = Field(
|
198 |
+
description="List of distinct search queries to find information relevant to the research task."
|
199 |
+
)
|
200 |
+
|
201 |
+
|
202 |
+
async def _run_browser_search_tool(
|
203 |
+
queries: List[str],
|
204 |
+
task_id: str, # Injected dependency
|
205 |
+
llm: Any, # Injected dependency
|
206 |
+
browser_config: Dict[str, Any],
|
207 |
+
stop_event: threading.Event,
|
208 |
+
max_parallel_browsers: int = 1,
|
209 |
+
) -> List[Dict[str, Any]]:
|
210 |
+
"""
|
211 |
+
Internal function to execute parallel browser searches based on LLM-provided queries.
|
212 |
+
Handles concurrency and stop signals.
|
213 |
+
"""
|
214 |
+
|
215 |
+
# Limit queries just in case LLM ignores the description
|
216 |
+
queries = queries[:max_parallel_browsers]
|
217 |
+
logger.info(
|
218 |
+
f"[Browser Tool {task_id}] Running search for {len(queries)} queries: {queries}"
|
219 |
+
)
|
220 |
+
|
221 |
+
results = []
|
222 |
+
semaphore = asyncio.Semaphore(max_parallel_browsers)
|
223 |
+
|
224 |
+
async def task_wrapper(query):
|
225 |
+
async with semaphore:
|
226 |
+
if stop_event.is_set():
|
227 |
+
logger.info(
|
228 |
+
f"[Browser Tool {task_id}] Skipping task due to stop signal: {query}"
|
229 |
+
)
|
230 |
+
return {"query": query, "result": None, "status": "cancelled"}
|
231 |
+
# Pass necessary injected configs and the stop event
|
232 |
+
return await run_single_browser_task(
|
233 |
+
query,
|
234 |
+
task_id,
|
235 |
+
llm, # Pass the main LLM (or a dedicated one if needed)
|
236 |
+
browser_config,
|
237 |
+
stop_event,
|
238 |
+
# use_vision could be added here if needed
|
239 |
+
)
|
240 |
+
|
241 |
+
tasks = [task_wrapper(query) for query in queries]
|
242 |
+
search_results = await asyncio.gather(*tasks, return_exceptions=True)
|
243 |
+
|
244 |
+
processed_results = []
|
245 |
+
for i, res in enumerate(search_results):
|
246 |
+
query = queries[i] # Get corresponding query
|
247 |
+
if isinstance(res, Exception):
|
248 |
+
logger.error(
|
249 |
+
f"[Browser Tool {task_id}] Gather caught exception for query '{query}': {res}",
|
250 |
+
exc_info=True,
|
251 |
+
)
|
252 |
+
processed_results.append(
|
253 |
+
{"query": query, "error": str(res), "status": "failed"}
|
254 |
+
)
|
255 |
+
elif isinstance(res, dict):
|
256 |
+
processed_results.append(res)
|
257 |
+
else:
|
258 |
+
logger.error(
|
259 |
+
f"[Browser Tool {task_id}] Unexpected result type for query '{query}': {type(res)}"
|
260 |
+
)
|
261 |
+
processed_results.append(
|
262 |
+
{"query": query, "error": "Unexpected result type", "status": "failed"}
|
263 |
+
)
|
264 |
+
|
265 |
+
logger.info(
|
266 |
+
f"[Browser Tool {task_id}] Finished search. Results count: {len(processed_results)}"
|
267 |
+
)
|
268 |
+
return processed_results
|
269 |
+
|
270 |
+
|
271 |
+
def create_browser_search_tool(
|
272 |
+
llm: Any,
|
273 |
+
browser_config: Dict[str, Any],
|
274 |
+
task_id: str,
|
275 |
+
stop_event: threading.Event,
|
276 |
+
max_parallel_browsers: int = 1,
|
277 |
+
) -> StructuredTool:
|
278 |
+
"""Factory function to create the browser search tool with necessary dependencies."""
|
279 |
+
# Use partial to bind the dependencies that aren't part of the LLM call arguments
|
280 |
+
from functools import partial
|
281 |
+
|
282 |
+
bound_tool_func = partial(
|
283 |
+
_run_browser_search_tool,
|
284 |
+
task_id=task_id,
|
285 |
+
llm=llm,
|
286 |
+
browser_config=browser_config,
|
287 |
+
stop_event=stop_event,
|
288 |
+
max_parallel_browsers=max_parallel_browsers,
|
289 |
+
)
|
290 |
+
|
291 |
+
return StructuredTool.from_function(
|
292 |
+
coroutine=bound_tool_func,
|
293 |
+
name="parallel_browser_search",
|
294 |
+
description=f"""Use this tool to actively search the web for information related to a specific research task or question.
|
295 |
+
It runs up to {max_parallel_browsers} searches in parallel using a browser agent for better results than simple scraping.
|
296 |
+
Provide a list of distinct search queries(up to {max_parallel_browsers}) that are likely to yield relevant information.""",
|
297 |
+
args_schema=BrowserSearchInput,
|
298 |
+
)
|
299 |
+
|
300 |
+
|
301 |
+
# --- Langgraph State Definition ---
|
302 |
+
|
303 |
+
|
304 |
+
class ResearchTaskItem(TypedDict):
|
305 |
+
# step: int # Maybe step within category, or just implicit by order
|
306 |
+
task_description: str
|
307 |
+
status: str # "pending", "completed", "failed"
|
308 |
+
queries: Optional[List[str]]
|
309 |
+
result_summary: Optional[str]
|
310 |
+
|
311 |
+
|
312 |
+
class ResearchCategoryItem(TypedDict):
|
313 |
+
category_name: str
|
314 |
+
tasks: List[ResearchTaskItem]
|
315 |
+
# Optional: category_status: str # Could be "pending", "in_progress", "completed"
|
316 |
+
|
317 |
+
|
318 |
+
class DeepResearchState(TypedDict):
|
319 |
+
task_id: str
|
320 |
+
topic: str
|
321 |
+
research_plan: List[ResearchCategoryItem] # CHANGED
|
322 |
+
search_results: List[Dict[str, Any]]
|
323 |
+
llm: Any
|
324 |
+
tools: List[Tool]
|
325 |
+
output_dir: Path
|
326 |
+
browser_config: Dict[str, Any]
|
327 |
+
final_report: Optional[str]
|
328 |
+
current_category_index: int
|
329 |
+
current_task_index_in_category: int
|
330 |
+
stop_requested: bool
|
331 |
+
error_message: Optional[str]
|
332 |
+
messages: List[BaseMessage]
|
333 |
+
|
334 |
+
|
335 |
+
# --- Langgraph Nodes ---
|
336 |
+
|
337 |
+
|
338 |
+
def _load_previous_state(task_id: str, output_dir: str) -> Dict[str, Any]:
|
339 |
+
state_updates = {}
|
340 |
+
plan_file = os.path.join(output_dir, PLAN_FILENAME)
|
341 |
+
search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
|
342 |
+
|
343 |
+
loaded_plan: List[ResearchCategoryItem] = []
|
344 |
+
next_cat_idx, next_task_idx = 0, 0
|
345 |
+
found_pending = False
|
346 |
+
|
347 |
+
if os.path.exists(plan_file):
|
348 |
+
try:
|
349 |
+
with open(plan_file, "r", encoding="utf-8") as f:
|
350 |
+
current_category: Optional[ResearchCategoryItem] = None
|
351 |
+
lines = f.readlines()
|
352 |
+
cat_counter = 0
|
353 |
+
task_counter_in_cat = 0
|
354 |
+
|
355 |
+
for line_num, line_content in enumerate(lines):
|
356 |
+
line = line_content.strip()
|
357 |
+
if line.startswith("## "): # Category
|
358 |
+
if current_category: # Save previous category
|
359 |
+
loaded_plan.append(current_category)
|
360 |
+
if not found_pending: # If previous category was all done, advance cat counter
|
361 |
+
cat_counter += 1
|
362 |
+
task_counter_in_cat = 0
|
363 |
+
category_name = line[line.find(" "):].strip() # Get text after "## X. "
|
364 |
+
current_category = ResearchCategoryItem(category_name=category_name, tasks=[])
|
365 |
+
elif (line.startswith("- [ ]") or line.startswith("- [x]") or line.startswith(
|
366 |
+
"- [-]")) and current_category: # Task
|
367 |
+
status = "pending"
|
368 |
+
if line.startswith("- [x]"):
|
369 |
+
status = "completed"
|
370 |
+
elif line.startswith("- [-]"):
|
371 |
+
status = "failed"
|
372 |
+
|
373 |
+
task_desc = line[5:].strip()
|
374 |
+
current_category["tasks"].append(
|
375 |
+
ResearchTaskItem(task_description=task_desc, status=status, queries=None,
|
376 |
+
result_summary=None)
|
377 |
+
)
|
378 |
+
if status == "pending" and not found_pending:
|
379 |
+
next_cat_idx = cat_counter
|
380 |
+
next_task_idx = task_counter_in_cat
|
381 |
+
found_pending = True
|
382 |
+
if not found_pending: # only increment if previous tasks were completed/failed
|
383 |
+
task_counter_in_cat += 1
|
384 |
+
|
385 |
+
if current_category: # Append last category
|
386 |
+
loaded_plan.append(current_category)
|
387 |
+
|
388 |
+
if loaded_plan:
|
389 |
+
state_updates["research_plan"] = loaded_plan
|
390 |
+
if not found_pending and loaded_plan: # All tasks were completed or failed
|
391 |
+
next_cat_idx = len(loaded_plan) # Points beyond the last category
|
392 |
+
next_task_idx = 0
|
393 |
+
state_updates["current_category_index"] = next_cat_idx
|
394 |
+
state_updates["current_task_index_in_category"] = next_task_idx
|
395 |
+
logger.info(
|
396 |
+
f"Loaded hierarchical research plan from {plan_file}. "
|
397 |
+
f"Next task: Category {next_cat_idx}, Task {next_task_idx} in category."
|
398 |
+
)
|
399 |
+
else:
|
400 |
+
logger.warning(f"Plan file {plan_file} was empty or malformed.")
|
401 |
+
|
402 |
+
except Exception as e:
|
403 |
+
logger.error(f"Failed to load or parse research plan {plan_file}: {e}", exc_info=True)
|
404 |
+
state_updates["error_message"] = f"Failed to load research plan: {e}"
|
405 |
+
else:
|
406 |
+
logger.info(f"Plan file {plan_file} not found. Will start fresh.")
|
407 |
+
|
408 |
+
if os.path.exists(search_file):
|
409 |
+
try:
|
410 |
+
with open(search_file, "r", encoding="utf-8") as f:
|
411 |
+
state_updates["search_results"] = json.load(f)
|
412 |
+
logger.info(f"Loaded search results from {search_file}")
|
413 |
+
except Exception as e:
|
414 |
+
logger.error(f"Failed to load search results {search_file}: {e}")
|
415 |
+
state_updates["error_message"] = (
|
416 |
+
state_updates.get("error_message", "") + f" Failed to load search results: {e}").strip()
|
417 |
+
|
418 |
+
return state_updates
|
419 |
+
|
420 |
+
|
421 |
+
def _save_plan_to_md(plan: List[ResearchCategoryItem], output_dir: str):
|
422 |
+
plan_file = os.path.join(output_dir, PLAN_FILENAME)
|
423 |
+
try:
|
424 |
+
with open(plan_file, "w", encoding="utf-8") as f:
|
425 |
+
f.write(f"# Research Plan\n\n")
|
426 |
+
for cat_idx, category in enumerate(plan):
|
427 |
+
f.write(f"## {cat_idx + 1}. {category['category_name']}\n\n")
|
428 |
+
for task_idx, task in enumerate(category['tasks']):
|
429 |
+
marker = "- [x]" if task["status"] == "completed" else "- [ ]" if task[
|
430 |
+
"status"] == "pending" else "- [-]" # [-] for failed
|
431 |
+
f.write(f" {marker} {task['task_description']}\n")
|
432 |
+
f.write("\n")
|
433 |
+
logger.info(f"Hierarchical research plan saved to {plan_file}")
|
434 |
+
except Exception as e:
|
435 |
+
logger.error(f"Failed to save research plan to {plan_file}: {e}")
|
436 |
+
|
437 |
+
|
438 |
+
def _save_search_results_to_json(results: List[Dict[str, Any]], output_dir: str):
|
439 |
+
"""Appends or overwrites search results to a JSON file."""
|
440 |
+
search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
|
441 |
+
try:
|
442 |
+
# Simple overwrite for now, could be append
|
443 |
+
with open(search_file, "w", encoding="utf-8") as f:
|
444 |
+
json.dump(results, f, indent=2, ensure_ascii=False)
|
445 |
+
logger.info(f"Search results saved to {search_file}")
|
446 |
+
except Exception as e:
|
447 |
+
logger.error(f"Failed to save search results to {search_file}: {e}")
|
448 |
+
|
449 |
+
|
450 |
+
def _save_report_to_md(report: str, output_dir: Path):
|
451 |
+
"""Saves the final report to a markdown file."""
|
452 |
+
report_file = os.path.join(output_dir, REPORT_FILENAME)
|
453 |
+
try:
|
454 |
+
with open(report_file, "w", encoding="utf-8") as f:
|
455 |
+
f.write(report)
|
456 |
+
logger.info(f"Final report saved to {report_file}")
|
457 |
+
except Exception as e:
|
458 |
+
logger.error(f"Failed to save final report to {report_file}: {e}")
|
459 |
+
|
460 |
+
|
461 |
+
async def planning_node(state: DeepResearchState) -> Dict[str, Any]:
|
462 |
+
logger.info("--- Entering Planning Node ---")
|
463 |
+
if state.get("stop_requested"):
|
464 |
+
logger.info("Stop requested, skipping planning.")
|
465 |
+
return {"stop_requested": True}
|
466 |
+
|
467 |
+
llm = state["llm"]
|
468 |
+
topic = state["topic"]
|
469 |
+
existing_plan = state.get("research_plan")
|
470 |
+
output_dir = state["output_dir"]
|
471 |
+
|
472 |
+
if existing_plan and (
|
473 |
+
state.get("current_category_index", 0) > 0 or state.get("current_task_index_in_category", 0) > 0):
|
474 |
+
logger.info("Resuming with existing plan.")
|
475 |
+
_save_plan_to_md(existing_plan, output_dir) # Ensure it's saved initially
|
476 |
+
# current_category_index and current_task_index_in_category should be set by _load_previous_state
|
477 |
+
return {"research_plan": existing_plan}
|
478 |
+
|
479 |
+
logger.info(f"Generating new research plan for topic: {topic}")
|
480 |
+
|
481 |
+
prompt_text = f"""You are a meticulous research assistant. Your goal is to create a hierarchical research plan to thoroughly investigate the topic: "{topic}".
|
482 |
+
The plan should be structured into several main research categories. Each category should contain a list of specific, actionable research tasks or questions.
|
483 |
+
Format the output as a JSON list of objects. Each object represents a research category and should have:
|
484 |
+
1. "category_name": A string for the name of the research category.
|
485 |
+
2. "tasks": A list of strings, where each string is a specific research task for that category.
|
486 |
+
|
487 |
+
Example JSON Output:
|
488 |
+
[
|
489 |
+
{{
|
490 |
+
"category_name": "Understanding Core Concepts and Definitions",
|
491 |
+
"tasks": [
|
492 |
+
"Define the primary terminology associated with '{topic}'.",
|
493 |
+
"Identify the fundamental principles and theories underpinning '{topic}'."
|
494 |
+
]
|
495 |
+
}},
|
496 |
+
{{
|
497 |
+
"category_name": "Historical Development and Key Milestones",
|
498 |
+
"tasks": [
|
499 |
+
"Trace the historical evolution of '{topic}'.",
|
500 |
+
"Identify key figures, events, or breakthroughs in the development of '{topic}'."
|
501 |
+
]
|
502 |
+
}},
|
503 |
+
{{
|
504 |
+
"category_name": "Current State-of-the-Art and Applications",
|
505 |
+
"tasks": [
|
506 |
+
"Analyze the current advancements and prominent applications of '{topic}'.",
|
507 |
+
"Investigate ongoing research and active areas of development related to '{topic}'."
|
508 |
+
]
|
509 |
+
}},
|
510 |
+
{{
|
511 |
+
"category_name": "Challenges, Limitations, and Future Outlook",
|
512 |
+
"tasks": [
|
513 |
+
"Identify the major challenges and limitations currently facing '{topic}'.",
|
514 |
+
"Explore potential future trends, ethical considerations, and societal impacts of '{topic}'."
|
515 |
+
]
|
516 |
+
}}
|
517 |
+
]
|
518 |
+
|
519 |
+
Generate a plan with 3-10 categories, and 2-6 tasks per category for the topic: "{topic}" according to the complexity of the topic.
|
520 |
+
Ensure the output is a valid JSON array.
|
521 |
+
"""
|
522 |
+
messages = [
|
523 |
+
SystemMessage(content="You are a research planning assistant outputting JSON."),
|
524 |
+
HumanMessage(content=prompt_text)
|
525 |
+
]
|
526 |
+
|
527 |
+
try:
|
528 |
+
response = await llm.ainvoke(messages)
|
529 |
+
raw_content = response.content
|
530 |
+
# The LLM might wrap the JSON in backticks
|
531 |
+
if raw_content.strip().startswith("```json"):
|
532 |
+
raw_content = raw_content.strip()[7:-3].strip()
|
533 |
+
elif raw_content.strip().startswith("```"):
|
534 |
+
raw_content = raw_content.strip()[3:-3].strip()
|
535 |
+
|
536 |
+
logger.debug(f"LLM response for plan: {raw_content}")
|
537 |
+
parsed_plan_from_llm = json.loads(raw_content)
|
538 |
+
|
539 |
+
new_plan: List[ResearchCategoryItem] = []
|
540 |
+
for cat_idx, category_data in enumerate(parsed_plan_from_llm):
|
541 |
+
if not isinstance(category_data,
|
542 |
+
dict) or "category_name" not in category_data or "tasks" not in category_data:
|
543 |
+
logger.warning(f"Skipping invalid category data: {category_data}")
|
544 |
+
continue
|
545 |
+
|
546 |
+
tasks: List[ResearchTaskItem] = []
|
547 |
+
for task_idx, task_desc in enumerate(category_data["tasks"]):
|
548 |
+
if isinstance(task_desc, str):
|
549 |
+
tasks.append(
|
550 |
+
ResearchTaskItem(
|
551 |
+
task_description=task_desc,
|
552 |
+
status="pending",
|
553 |
+
queries=None,
|
554 |
+
result_summary=None,
|
555 |
+
)
|
556 |
+
)
|
557 |
+
else: # Sometimes LLM puts tasks as {"task": "description"}
|
558 |
+
if isinstance(task_desc, dict) and "task_description" in task_desc:
|
559 |
+
tasks.append(
|
560 |
+
ResearchTaskItem(
|
561 |
+
task_description=task_desc["task_description"],
|
562 |
+
status="pending",
|
563 |
+
queries=None,
|
564 |
+
result_summary=None,
|
565 |
+
)
|
566 |
+
)
|
567 |
+
elif isinstance(task_desc, dict) and "task" in task_desc: # common LLM mistake
|
568 |
+
tasks.append(
|
569 |
+
ResearchTaskItem(
|
570 |
+
task_description=task_desc["task"],
|
571 |
+
status="pending",
|
572 |
+
queries=None,
|
573 |
+
result_summary=None,
|
574 |
+
)
|
575 |
+
)
|
576 |
+
else:
|
577 |
+
logger.warning(
|
578 |
+
f"Skipping invalid task data: {task_desc} in category {category_data['category_name']}")
|
579 |
+
|
580 |
+
new_plan.append(
|
581 |
+
ResearchCategoryItem(
|
582 |
+
category_name=category_data["category_name"],
|
583 |
+
tasks=tasks,
|
584 |
+
)
|
585 |
+
)
|
586 |
+
|
587 |
+
if not new_plan:
|
588 |
+
logger.error("LLM failed to generate a valid plan structure from JSON.")
|
589 |
+
return {"error_message": "Failed to generate research plan structure."}
|
590 |
+
|
591 |
+
logger.info(f"Generated research plan with {len(new_plan)} categories.")
|
592 |
+
_save_plan_to_md(new_plan, output_dir) # Save the hierarchical plan
|
593 |
+
|
594 |
+
return {
|
595 |
+
"research_plan": new_plan,
|
596 |
+
"current_category_index": 0,
|
597 |
+
"current_task_index_in_category": 0,
|
598 |
+
"search_results": [],
|
599 |
+
}
|
600 |
+
|
601 |
+
except json.JSONDecodeError as e:
|
602 |
+
logger.error(f"Failed to parse JSON from LLM for plan: {e}. Response was: {raw_content}", exc_info=True)
|
603 |
+
return {"error_message": f"LLM generated invalid JSON for research plan: {e}"}
|
604 |
+
except Exception as e:
|
605 |
+
logger.error(f"Error during planning: {e}", exc_info=True)
|
606 |
+
return {"error_message": f"LLM Error during planning: {e}"}
|
607 |
+
|
608 |
+
|
609 |
+
async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
|
610 |
+
logger.info("--- Entering Research Execution Node ---")
|
611 |
+
if state.get("stop_requested"):
|
612 |
+
logger.info("Stop requested, skipping research execution.")
|
613 |
+
return {
|
614 |
+
"stop_requested": True,
|
615 |
+
"current_category_index": state["current_category_index"],
|
616 |
+
"current_task_index_in_category": state["current_task_index_in_category"],
|
617 |
+
}
|
618 |
+
|
619 |
+
plan = state["research_plan"]
|
620 |
+
cat_idx = state["current_category_index"]
|
621 |
+
task_idx = state["current_task_index_in_category"]
|
622 |
+
llm = state["llm"]
|
623 |
+
tools = state["tools"]
|
624 |
+
output_dir = str(state["output_dir"])
|
625 |
+
task_id = state["task_id"] # For _AGENT_STOP_FLAGS
|
626 |
+
|
627 |
+
# This check should ideally be handled by `should_continue`
|
628 |
+
if not plan or cat_idx >= len(plan):
|
629 |
+
logger.info("Research plan complete or categories exhausted.")
|
630 |
+
return {} # should route to synthesis
|
631 |
+
|
632 |
+
current_category = plan[cat_idx]
|
633 |
+
if task_idx >= len(current_category["tasks"]):
|
634 |
+
logger.info(f"All tasks in category '{current_category['category_name']}' completed. Moving to next category.")
|
635 |
+
# This logic is now effectively handled by should_continue and the index updates below
|
636 |
+
# The next iteration will be caught by should_continue or this node with updated indices
|
637 |
+
return {
|
638 |
+
"current_category_index": cat_idx + 1,
|
639 |
+
"current_task_index_in_category": 0,
|
640 |
+
"messages": state["messages"] # Pass messages along
|
641 |
+
}
|
642 |
+
|
643 |
+
current_task = current_category["tasks"][task_idx]
|
644 |
+
|
645 |
+
if current_task["status"] == "completed":
|
646 |
+
logger.info(
|
647 |
+
f"Task '{current_task['task_description']}' in category '{current_category['category_name']}' already completed. Skipping.")
|
648 |
+
# Logic to find next task
|
649 |
+
next_task_idx = task_idx + 1
|
650 |
+
next_cat_idx = cat_idx
|
651 |
+
if next_task_idx >= len(current_category["tasks"]):
|
652 |
+
next_cat_idx += 1
|
653 |
+
next_task_idx = 0
|
654 |
+
return {
|
655 |
+
"current_category_index": next_cat_idx,
|
656 |
+
"current_task_index_in_category": next_task_idx,
|
657 |
+
"messages": state["messages"] # Pass messages along
|
658 |
+
}
|
659 |
+
|
660 |
+
logger.info(
|
661 |
+
f"Executing research task: '{current_task['task_description']}' (Category: '{current_category['category_name']}')"
|
662 |
+
)
|
663 |
+
|
664 |
+
llm_with_tools = llm.bind_tools(tools)
|
665 |
+
|
666 |
+
# Construct messages for LLM invocation
|
667 |
+
task_prompt_content = (
|
668 |
+
f"Current Research Category: {current_category['category_name']}\n"
|
669 |
+
f"Specific Task: {current_task['task_description']}\n\n"
|
670 |
+
"Please use the available tools, especially 'parallel_browser_search', to gather information for this specific task. "
|
671 |
+
"Provide focused search queries relevant ONLY to this task. "
|
672 |
+
"If you believe you have sufficient information from previous steps for this specific task, you can indicate that you are ready to summarize or that no further search is needed."
|
673 |
+
)
|
674 |
+
current_task_message_history = [
|
675 |
+
HumanMessage(content=task_prompt_content)
|
676 |
+
]
|
677 |
+
if not state["messages"]: # First actual execution message
|
678 |
+
invocation_messages = [
|
679 |
+
SystemMessage(
|
680 |
+
content="You are a research assistant executing one task of a research plan. Focus on the current task only."),
|
681 |
+
] + current_task_message_history
|
682 |
+
else:
|
683 |
+
invocation_messages = state["messages"] + current_task_message_history
|
684 |
+
|
685 |
+
try:
|
686 |
+
logger.info(f"Invoking LLM with tools for task: {current_task['task_description']}")
|
687 |
+
ai_response: BaseMessage = await llm_with_tools.ainvoke(invocation_messages)
|
688 |
+
logger.info("LLM invocation complete.")
|
689 |
+
|
690 |
+
tool_results = []
|
691 |
+
executed_tool_names = []
|
692 |
+
current_search_results = state.get("search_results", []) # Get existing search results
|
693 |
+
|
694 |
+
if not isinstance(ai_response, AIMessage) or not ai_response.tool_calls:
|
695 |
+
logger.warning(
|
696 |
+
f"LLM did not call any tool for task '{current_task['task_description']}'. Response: {ai_response.content[:100]}..."
|
697 |
+
)
|
698 |
+
current_task["status"] = "pending" # Or "completed_no_tool" if LLM explains it's done
|
699 |
+
current_task["result_summary"] = f"LLM did not use a tool. Response: {ai_response.content}"
|
700 |
+
current_task["current_category_index"] = cat_idx
|
701 |
+
current_task["current_task_index_in_category"] = task_idx
|
702 |
+
return current_task
|
703 |
+
# We still save the plan and advance.
|
704 |
+
else:
|
705 |
+
# Process tool calls
|
706 |
+
for tool_call in ai_response.tool_calls:
|
707 |
+
tool_name = tool_call.get("name")
|
708 |
+
tool_args = tool_call.get("args", {})
|
709 |
+
tool_call_id = tool_call.get("id")
|
710 |
+
|
711 |
+
logger.info(f"LLM requested tool call: {tool_name} with args: {tool_args}")
|
712 |
+
executed_tool_names.append(tool_name)
|
713 |
+
selected_tool = next((t for t in tools if t.name == tool_name), None)
|
714 |
+
|
715 |
+
if not selected_tool:
|
716 |
+
logger.error(f"LLM called tool '{tool_name}' which is not available.")
|
717 |
+
tool_results.append(
|
718 |
+
ToolMessage(content=f"Error: Tool '{tool_name}' not found.", tool_call_id=tool_call_id))
|
719 |
+
continue
|
720 |
+
|
721 |
+
try:
|
722 |
+
stop_event = _AGENT_STOP_FLAGS.get(task_id)
|
723 |
+
if stop_event and stop_event.is_set():
|
724 |
+
logger.info(f"Stop requested before executing tool: {tool_name}")
|
725 |
+
current_task["status"] = "pending" # Or a new "stopped" status
|
726 |
+
_save_plan_to_md(plan, output_dir)
|
727 |
+
return {"stop_requested": True, "research_plan": plan, "current_category_index": cat_idx,
|
728 |
+
"current_task_index_in_category": task_idx}
|
729 |
+
|
730 |
+
logger.info(f"Executing tool: {tool_name}")
|
731 |
+
tool_output = await selected_tool.ainvoke(tool_args)
|
732 |
+
logger.info(f"Tool '{tool_name}' executed successfully.")
|
733 |
+
|
734 |
+
if tool_name == "parallel_browser_search":
|
735 |
+
current_search_results.extend(tool_output) # tool_output is List[Dict]
|
736 |
+
else: # For other tools, we might need specific handling or just log
|
737 |
+
logger.info(f"Result from tool '{tool_name}': {str(tool_output)[:200]}...")
|
738 |
+
# Storing non-browser results might need a different structure or key in search_results
|
739 |
+
current_search_results.append(
|
740 |
+
{"tool_name": tool_name, "args": tool_args, "output": str(tool_output),
|
741 |
+
"status": "completed"})
|
742 |
+
|
743 |
+
tool_results.append(ToolMessage(content=json.dumps(tool_output), tool_call_id=tool_call_id))
|
744 |
+
|
745 |
+
except Exception as e:
|
746 |
+
logger.error(f"Error executing tool '{tool_name}': {e}", exc_info=True)
|
747 |
+
tool_results.append(
|
748 |
+
ToolMessage(content=f"Error executing tool {tool_name}: {e}", tool_call_id=tool_call_id))
|
749 |
+
current_search_results.append(
|
750 |
+
{"tool_name": tool_name, "args": tool_args, "status": "failed", "error": str(e)})
|
751 |
+
|
752 |
+
# After processing all tool calls for this task
|
753 |
+
step_failed_tool_execution = any("Error:" in str(tr.content) for tr in tool_results)
|
754 |
+
# Consider a task successful if a browser search was attempted and didn't immediately error out during call
|
755 |
+
# The browser search itself returns status for each query.
|
756 |
+
browser_tool_attempted_successfully = "parallel_browser_search" in executed_tool_names and not step_failed_tool_execution
|
757 |
+
|
758 |
+
if step_failed_tool_execution:
|
759 |
+
current_task["status"] = "failed"
|
760 |
+
current_task[
|
761 |
+
"result_summary"] = f"Tool execution failed. Errors: {[tr.content for tr in tool_results if 'Error' in str(tr.content)]}"
|
762 |
+
elif executed_tool_names: # If any tool was called
|
763 |
+
current_task["status"] = "completed"
|
764 |
+
current_task["result_summary"] = f"Executed tool(s): {', '.join(executed_tool_names)}."
|
765 |
+
# TODO: Could ask LLM to summarize the tool_results for this task if needed, rather than just listing tools.
|
766 |
+
else: # No tool calls but AI response had .tool_calls structure (empty)
|
767 |
+
current_task["status"] = "failed" # Or a more specific status
|
768 |
+
current_task["result_summary"] = "LLM prepared for tool call but provided no tools."
|
769 |
+
|
770 |
+
# Save progress
|
771 |
+
_save_plan_to_md(plan, output_dir)
|
772 |
+
_save_search_results_to_json(current_search_results, output_dir)
|
773 |
+
|
774 |
+
# Determine next indices
|
775 |
+
next_task_idx = task_idx + 1
|
776 |
+
next_cat_idx = cat_idx
|
777 |
+
if next_task_idx >= len(current_category["tasks"]):
|
778 |
+
next_cat_idx += 1
|
779 |
+
next_task_idx = 0
|
780 |
+
|
781 |
+
updated_messages = state["messages"] + current_task_message_history + [ai_response] + tool_results
|
782 |
+
|
783 |
+
return {
|
784 |
+
"research_plan": plan,
|
785 |
+
"search_results": current_search_results,
|
786 |
+
"current_category_index": next_cat_idx,
|
787 |
+
"current_task_index_in_category": next_task_idx,
|
788 |
+
"messages": updated_messages,
|
789 |
+
}
|
790 |
+
|
791 |
+
except Exception as e:
|
792 |
+
logger.error(f"Unhandled error during research execution for task '{current_task['task_description']}': {e}",
|
793 |
+
exc_info=True)
|
794 |
+
current_task["status"] = "failed"
|
795 |
+
_save_plan_to_md(plan, output_dir)
|
796 |
+
# Determine next indices even on error to attempt to move on
|
797 |
+
next_task_idx = task_idx + 1
|
798 |
+
next_cat_idx = cat_idx
|
799 |
+
if next_task_idx >= len(current_category["tasks"]):
|
800 |
+
next_cat_idx += 1
|
801 |
+
next_task_idx = 0
|
802 |
+
return {
|
803 |
+
"research_plan": plan,
|
804 |
+
"current_category_index": next_cat_idx,
|
805 |
+
"current_task_index_in_category": next_task_idx,
|
806 |
+
"error_message": f"Core Execution Error on task '{current_task['task_description']}': {e}",
|
807 |
+
"messages": state["messages"] + current_task_message_history # Preserve messages up to error
|
808 |
+
}
|
809 |
+
|
810 |
+
|
811 |
+
async def synthesis_node(state: DeepResearchState) -> Dict[str, Any]:
|
812 |
+
"""Synthesizes the final report from the collected search results."""
|
813 |
+
logger.info("--- Entering Synthesis Node ---")
|
814 |
+
if state.get("stop_requested"):
|
815 |
+
logger.info("Stop requested, skipping synthesis.")
|
816 |
+
return {"stop_requested": True}
|
817 |
+
|
818 |
+
llm = state["llm"]
|
819 |
+
topic = state["topic"]
|
820 |
+
search_results = state.get("search_results", [])
|
821 |
+
output_dir = state["output_dir"]
|
822 |
+
plan = state["research_plan"] # Include plan for context
|
823 |
+
|
824 |
+
if not search_results:
|
825 |
+
logger.warning("No search results found to synthesize report.")
|
826 |
+
report = f"# Research Report: {topic}\n\nNo information was gathered during the research process."
|
827 |
+
_save_report_to_md(report, output_dir)
|
828 |
+
return {"final_report": report}
|
829 |
+
|
830 |
+
logger.info(
|
831 |
+
f"Synthesizing report from {len(search_results)} collected search result entries."
|
832 |
+
)
|
833 |
+
|
834 |
+
# Prepare context for the LLM
|
835 |
+
# Format search results nicely, maybe group by query or original plan step
|
836 |
+
formatted_results = ""
|
837 |
+
references = {}
|
838 |
+
ref_count = 1
|
839 |
+
for i, result_entry in enumerate(search_results):
|
840 |
+
query = result_entry.get("query", "Unknown Query") # From parallel_browser_search
|
841 |
+
tool_name = result_entry.get("tool_name") # From other tools
|
842 |
+
status = result_entry.get("status", "unknown")
|
843 |
+
result_data = result_entry.get("result") # From BrowserUseAgent's final_result
|
844 |
+
tool_output_str = result_entry.get("output") # From other tools
|
845 |
+
|
846 |
+
if tool_name == "parallel_browser_search" and status == "completed" and result_data:
|
847 |
+
# result_data is the summary from BrowserUseAgent
|
848 |
+
formatted_results += f'### Finding from Web Search Query: "{query}"\n'
|
849 |
+
formatted_results += f"- **Summary:**\n{result_data}\n" # result_data is already a summary string here
|
850 |
+
# If result_data contained title/URL, you'd format them here.
|
851 |
+
# The current BrowserUseAgent returns a string summary directly as 'final_data' in run_single_browser_task
|
852 |
+
formatted_results += "---\n"
|
853 |
+
elif tool_name != "parallel_browser_search" and status == "completed" and tool_output_str:
|
854 |
+
formatted_results += f'### Finding from Tool: "{tool_name}" (Args: {result_entry.get("args")})\n'
|
855 |
+
formatted_results += f"- **Output:**\n{tool_output_str}\n"
|
856 |
+
formatted_results += "---\n"
|
857 |
+
elif status == "failed":
|
858 |
+
error = result_entry.get("error")
|
859 |
+
q_or_t = f"Query: \"{query}\"" if query != "Unknown Query" else f"Tool: \"{tool_name}\""
|
860 |
+
formatted_results += f'### Failed {q_or_t}\n'
|
861 |
+
formatted_results += f"- **Error:** {error}\n"
|
862 |
+
formatted_results += "---\n"
|
863 |
+
|
864 |
+
# Prepare the research plan context
|
865 |
+
plan_summary = "\nResearch Plan Followed:\n"
|
866 |
+
for cat_idx, category in enumerate(plan):
|
867 |
+
plan_summary += f"\n#### Category {cat_idx + 1}: {category['category_name']}\n"
|
868 |
+
for task_idx, task in enumerate(category['tasks']):
|
869 |
+
marker = "[x]" if task["status"] == "completed" else "[ ]" if task["status"] == "pending" else "[-]"
|
870 |
+
plan_summary += f" - {marker} {task['task_description']}\n"
|
871 |
+
|
872 |
+
synthesis_prompt = ChatPromptTemplate.from_messages(
|
873 |
+
[
|
874 |
+
(
|
875 |
+
"system",
|
876 |
+
"""You are a professional researcher tasked with writing a comprehensive and well-structured report based on collected findings.
|
877 |
+
The report should address the research topic thoroughly, synthesizing the information gathered from various sources.
|
878 |
+
Structure the report logically:
|
879 |
+
1. Briefly introduce the topic and the report's scope (mentioning the research plan followed, including categories and tasks, is good).
|
880 |
+
2. Discuss the key findings, organizing them thematically, possibly aligning with the research categories. Analyze, compare, and contrast information.
|
881 |
+
3. Summarize the main points and offer concluding thoughts.
|
882 |
+
|
883 |
+
Ensure the tone is objective and professional.
|
884 |
+
If findings are contradictory or incomplete, acknowledge this.
|
885 |
+
""", # Removed citation part for simplicity for now, as browser agent returns summaries.
|
886 |
+
),
|
887 |
+
(
|
888 |
+
"human",
|
889 |
+
f"""
|
890 |
+
**Research Topic:** {topic}
|
891 |
+
|
892 |
+
{plan_summary}
|
893 |
+
|
894 |
+
**Collected Findings:**
|
895 |
+
```
|
896 |
+
{formatted_results}
|
897 |
+
```
|
898 |
+
|
899 |
+
Please generate the final research report in Markdown format based **only** on the information above.
|
900 |
+
""",
|
901 |
+
),
|
902 |
+
]
|
903 |
+
)
|
904 |
+
|
905 |
+
try:
|
906 |
+
response = await llm.ainvoke(
|
907 |
+
synthesis_prompt.format_prompt(
|
908 |
+
topic=topic,
|
909 |
+
plan_summary=plan_summary,
|
910 |
+
formatted_results=formatted_results,
|
911 |
+
).to_messages()
|
912 |
+
)
|
913 |
+
final_report_md = response.content
|
914 |
+
|
915 |
+
# Append the reference list automatically to the end of the generated markdown
|
916 |
+
if references:
|
917 |
+
report_references_section = "\n\n## References\n\n"
|
918 |
+
# Sort refs by ID for consistent output
|
919 |
+
sorted_refs = sorted(references.values(), key=lambda x: x["id"])
|
920 |
+
for ref in sorted_refs:
|
921 |
+
report_references_section += (
|
922 |
+
f"[{ref['id']}] {ref['title']} - {ref['url']}\n"
|
923 |
+
)
|
924 |
+
final_report_md += report_references_section
|
925 |
+
|
926 |
+
logger.info("Successfully synthesized the final report.")
|
927 |
+
_save_report_to_md(final_report_md, output_dir)
|
928 |
+
return {"final_report": final_report_md}
|
929 |
+
|
930 |
+
except Exception as e:
|
931 |
+
logger.error(f"Error during report synthesis: {e}", exc_info=True)
|
932 |
+
return {"error_message": f"LLM Error during synthesis: {e}"}
|
933 |
+
|
934 |
+
|
935 |
+
# --- Langgraph Edges and Conditional Logic ---
|
936 |
+
|
937 |
+
|
938 |
+
def should_continue(state: DeepResearchState) -> str:
|
939 |
+
logger.info("--- Evaluating Condition: Should Continue? ---")
|
940 |
+
if state.get("stop_requested"):
|
941 |
+
logger.info("Stop requested, routing to END.")
|
942 |
+
return "end_run"
|
943 |
+
if state.get("error_message") and "Core Execution Error" in state["error_message"]: # Critical error in node
|
944 |
+
logger.warning(f"Critical error detected: {state['error_message']}. Routing to END.")
|
945 |
+
return "end_run"
|
946 |
+
|
947 |
+
plan = state.get("research_plan")
|
948 |
+
cat_idx = state.get("current_category_index", 0)
|
949 |
+
task_idx = state.get("current_task_index_in_category", 0) # This is the *next* task to check
|
950 |
+
|
951 |
+
if not plan:
|
952 |
+
logger.warning("No research plan found. Routing to END.")
|
953 |
+
return "end_run"
|
954 |
+
|
955 |
+
# Check if the current indices point to a valid pending task
|
956 |
+
if cat_idx < len(plan):
|
957 |
+
current_category = plan[cat_idx]
|
958 |
+
if task_idx < len(current_category["tasks"]):
|
959 |
+
# We are trying to execute the task at plan[cat_idx]["tasks"][task_idx]
|
960 |
+
# The research_execution_node will handle if it's already completed.
|
961 |
+
logger.info(
|
962 |
+
f"Plan has potential pending tasks (next up: Category {cat_idx}, Task {task_idx}). Routing to Research Execution."
|
963 |
+
)
|
964 |
+
return "execute_research"
|
965 |
+
else: # task_idx is out of bounds for current category, means we need to check next category
|
966 |
+
if cat_idx + 1 < len(plan): # If there is a next category
|
967 |
+
logger.info(
|
968 |
+
f"Finished tasks in category {cat_idx}. Moving to category {cat_idx + 1}. Routing to Research Execution."
|
969 |
+
)
|
970 |
+
# research_execution_node will update state to {current_category_index: cat_idx + 1, current_task_index_in_category: 0}
|
971 |
+
# Or rather, the previous execution node already set these indices to the start of the next category.
|
972 |
+
return "execute_research"
|
973 |
+
|
974 |
+
# If we've gone through all categories and tasks (cat_idx >= len(plan))
|
975 |
+
logger.info("All plan categories and tasks processed or current indices are out of bounds. Routing to Synthesis.")
|
976 |
+
return "synthesize_report"
|
977 |
+
|
978 |
+
|
979 |
+
# --- DeepSearchAgent Class ---
|
980 |
+
|
981 |
+
|
982 |
+
class DeepResearchAgent:
|
983 |
+
def __init__(
|
984 |
+
self,
|
985 |
+
llm: Any,
|
986 |
+
browser_config: Dict[str, Any],
|
987 |
+
mcp_server_config: Optional[Dict[str, Any]] = None,
|
988 |
+
):
|
989 |
+
"""
|
990 |
+
Initializes the DeepSearchAgent.
|
991 |
+
|
992 |
+
Args:
|
993 |
+
llm: The Langchain compatible language model instance.
|
994 |
+
browser_config: Configuration dictionary for the BrowserUseAgent tool.
|
995 |
+
Example: {"headless": True, "window_width": 1280, ...}
|
996 |
+
mcp_server_config: Optional configuration for the MCP client.
|
997 |
+
"""
|
998 |
+
self.llm = llm
|
999 |
+
self.browser_config = browser_config
|
1000 |
+
self.mcp_server_config = mcp_server_config
|
1001 |
+
self.mcp_client = None
|
1002 |
+
self.stopped = False
|
1003 |
+
self.graph = self._compile_graph()
|
1004 |
+
self.current_task_id: Optional[str] = None
|
1005 |
+
self.stop_event: Optional[threading.Event] = None
|
1006 |
+
self.runner: Optional[asyncio.Task] = None # To hold the asyncio task for run
|
1007 |
+
|
1008 |
+
async def _setup_tools(
|
1009 |
+
self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
|
1010 |
+
) -> List[Tool]:
|
1011 |
+
"""Sets up the basic tools (File I/O) and optional MCP tools."""
|
1012 |
+
tools = [
|
1013 |
+
WriteFileTool(),
|
1014 |
+
ReadFileTool(),
|
1015 |
+
ListDirectoryTool(),
|
1016 |
+
] # Basic file operations
|
1017 |
+
browser_use_tool = create_browser_search_tool(
|
1018 |
+
llm=self.llm,
|
1019 |
+
browser_config=self.browser_config,
|
1020 |
+
task_id=task_id,
|
1021 |
+
stop_event=stop_event,
|
1022 |
+
max_parallel_browsers=max_parallel_browsers,
|
1023 |
+
)
|
1024 |
+
tools += [browser_use_tool]
|
1025 |
+
# Add MCP tools if config is provided
|
1026 |
+
if self.mcp_server_config:
|
1027 |
+
try:
|
1028 |
+
logger.info("Setting up MCP client and tools...")
|
1029 |
+
if not self.mcp_client:
|
1030 |
+
self.mcp_client = await setup_mcp_client_and_tools(
|
1031 |
+
self.mcp_server_config
|
1032 |
+
)
|
1033 |
+
mcp_tools = self.mcp_client.get_tools()
|
1034 |
+
logger.info(f"Loaded {len(mcp_tools)} MCP tools.")
|
1035 |
+
tools.extend(mcp_tools)
|
1036 |
+
except Exception as e:
|
1037 |
+
logger.error(f"Failed to set up MCP tools: {e}", exc_info=True)
|
1038 |
+
elif self.mcp_server_config:
|
1039 |
+
logger.warning(
|
1040 |
+
"MCP server config provided, but setup function unavailable."
|
1041 |
+
)
|
1042 |
+
tools_map = {tool.name: tool for tool in tools}
|
1043 |
+
return tools_map.values()
|
1044 |
+
|
1045 |
+
async def close_mcp_client(self):
|
1046 |
+
if self.mcp_client:
|
1047 |
+
await self.mcp_client.__aexit__(None, None, None)
|
1048 |
+
self.mcp_client = None
|
1049 |
+
|
1050 |
+
def _compile_graph(self) -> StateGraph:
|
1051 |
+
"""Compiles the Langgraph state machine."""
|
1052 |
+
workflow = StateGraph(DeepResearchState)
|
1053 |
+
|
1054 |
+
# Add nodes
|
1055 |
+
workflow.add_node("plan_research", planning_node)
|
1056 |
+
workflow.add_node("execute_research", research_execution_node)
|
1057 |
+
workflow.add_node("synthesize_report", synthesis_node)
|
1058 |
+
workflow.add_node(
|
1059 |
+
"end_run", lambda state: logger.info("--- Reached End Run Node ---") or {}
|
1060 |
+
) # Simple end node
|
1061 |
+
|
1062 |
+
# Define edges
|
1063 |
+
workflow.set_entry_point("plan_research")
|
1064 |
+
|
1065 |
+
workflow.add_edge(
|
1066 |
+
"plan_research", "execute_research"
|
1067 |
+
) # Always execute after planning
|
1068 |
+
|
1069 |
+
# Conditional edge after execution
|
1070 |
+
workflow.add_conditional_edges(
|
1071 |
+
"execute_research",
|
1072 |
+
should_continue,
|
1073 |
+
{
|
1074 |
+
"execute_research": "execute_research", # Loop back if more steps
|
1075 |
+
"synthesize_report": "synthesize_report", # Move to synthesis if done
|
1076 |
+
"end_run": "end_run", # End if stop requested or error
|
1077 |
+
},
|
1078 |
+
)
|
1079 |
+
|
1080 |
+
workflow.add_edge("synthesize_report", "end_run") # End after synthesis
|
1081 |
+
|
1082 |
+
app = workflow.compile()
|
1083 |
+
return app
|
1084 |
+
|
1085 |
+
async def run(
|
1086 |
+
self,
|
1087 |
+
topic: str,
|
1088 |
+
task_id: Optional[str] = None,
|
1089 |
+
save_dir: str = "./tmp/deep_research",
|
1090 |
+
max_parallel_browsers: int = 1,
|
1091 |
+
) -> Dict[str, Any]:
|
1092 |
+
"""
|
1093 |
+
Starts the deep research process (Async Generator Version).
|
1094 |
+
|
1095 |
+
Args:
|
1096 |
+
topic: The research topic.
|
1097 |
+
task_id: Optional existing task ID to resume. If None, a new ID is generated.
|
1098 |
+
|
1099 |
+
Yields:
|
1100 |
+
Intermediate state updates or messages during execution.
|
1101 |
+
"""
|
1102 |
+
if self.runner and not self.runner.done():
|
1103 |
+
logger.warning(
|
1104 |
+
"Agent is already running. Please stop the current task first."
|
1105 |
+
)
|
1106 |
+
# Return an error status instead of yielding
|
1107 |
+
return {
|
1108 |
+
"status": "error",
|
1109 |
+
"message": "Agent already running.",
|
1110 |
+
"task_id": self.current_task_id,
|
1111 |
+
}
|
1112 |
+
|
1113 |
+
self.current_task_id = task_id if task_id else str(uuid.uuid4())
|
1114 |
+
safe_root_dir = "./tmp/deep_research"
|
1115 |
+
normalized_save_dir = os.path.normpath(save_dir)
|
1116 |
+
if not normalized_save_dir.startswith(os.path.abspath(safe_root_dir)):
|
1117 |
+
logger.warning(f"Unsafe save_dir detected: {save_dir}. Using default directory.")
|
1118 |
+
normalized_save_dir = os.path.abspath(safe_root_dir)
|
1119 |
+
output_dir = os.path.join(normalized_save_dir, self.current_task_id)
|
1120 |
+
os.makedirs(output_dir, exist_ok=True)
|
1121 |
+
|
1122 |
+
logger.info(
|
1123 |
+
f"[AsyncGen] Starting research task ID: {self.current_task_id} for topic: '{topic}'"
|
1124 |
+
)
|
1125 |
+
logger.info(f"[AsyncGen] Output directory: {output_dir}")
|
1126 |
+
|
1127 |
+
self.stop_event = threading.Event()
|
1128 |
+
_AGENT_STOP_FLAGS[self.current_task_id] = self.stop_event
|
1129 |
+
agent_tools = await self._setup_tools(
|
1130 |
+
self.current_task_id, self.stop_event, max_parallel_browsers
|
1131 |
+
)
|
1132 |
+
initial_state: DeepResearchState = {
|
1133 |
+
"task_id": self.current_task_id,
|
1134 |
+
"topic": topic,
|
1135 |
+
"research_plan": [],
|
1136 |
+
"search_results": [],
|
1137 |
+
"messages": [],
|
1138 |
+
"llm": self.llm,
|
1139 |
+
"tools": agent_tools,
|
1140 |
+
"output_dir": Path(output_dir),
|
1141 |
+
"browser_config": self.browser_config,
|
1142 |
+
"final_report": None,
|
1143 |
+
"current_category_index": 0,
|
1144 |
+
"current_task_index_in_category": 0,
|
1145 |
+
"stop_requested": False,
|
1146 |
+
"error_message": None,
|
1147 |
+
}
|
1148 |
+
|
1149 |
+
if task_id:
|
1150 |
+
logger.info(f"Attempting to resume task {task_id}...")
|
1151 |
+
loaded_state = _load_previous_state(task_id, output_dir)
|
1152 |
+
initial_state.update(loaded_state)
|
1153 |
+
if loaded_state.get("research_plan"):
|
1154 |
+
logger.info(
|
1155 |
+
f"Resuming with {len(loaded_state['research_plan'])} plan categories "
|
1156 |
+
f"and {len(loaded_state.get('search_results', []))} existing results. "
|
1157 |
+
f"Next task: Cat {initial_state['current_category_index']}, Task {initial_state['current_task_index_in_category']}"
|
1158 |
+
)
|
1159 |
+
initial_state["topic"] = (
|
1160 |
+
topic # Allow overriding topic even when resuming? Or use stored topic? Let's use new one.
|
1161 |
+
)
|
1162 |
+
else:
|
1163 |
+
logger.warning(
|
1164 |
+
f"Resume requested for {task_id}, but no previous plan found. Starting fresh."
|
1165 |
+
)
|
1166 |
+
|
1167 |
+
# --- Execute Graph using ainvoke ---
|
1168 |
+
final_state = None
|
1169 |
+
status = "unknown"
|
1170 |
+
message = None
|
1171 |
+
try:
|
1172 |
+
logger.info(f"Invoking graph execution for task {self.current_task_id}...")
|
1173 |
+
self.runner = asyncio.create_task(self.graph.ainvoke(initial_state))
|
1174 |
+
final_state = await self.runner
|
1175 |
+
logger.info(f"Graph execution finished for task {self.current_task_id}.")
|
1176 |
+
|
1177 |
+
# Determine status based on final state
|
1178 |
+
if self.stop_event and self.stop_event.is_set():
|
1179 |
+
status = "stopped"
|
1180 |
+
message = "Research process was stopped by request."
|
1181 |
+
logger.info(message)
|
1182 |
+
elif final_state and final_state.get("error_message"):
|
1183 |
+
status = "error"
|
1184 |
+
message = final_state["error_message"]
|
1185 |
+
logger.error(f"Graph execution completed with error: {message}")
|
1186 |
+
elif final_state and final_state.get("final_report"):
|
1187 |
+
status = "completed"
|
1188 |
+
message = "Research process completed successfully."
|
1189 |
+
logger.info(message)
|
1190 |
+
else:
|
1191 |
+
# If it ends without error/report (e.g., empty plan, stopped before synthesis)
|
1192 |
+
status = "finished_incomplete"
|
1193 |
+
message = "Research process finished, but may be incomplete (no final report generated)."
|
1194 |
+
logger.warning(message)
|
1195 |
+
|
1196 |
+
except asyncio.CancelledError:
|
1197 |
+
status = "cancelled"
|
1198 |
+
message = f"Agent run task cancelled for {self.current_task_id}."
|
1199 |
+
logger.info(message)
|
1200 |
+
# final_state will remain None or the state before cancellation if checkpointing was used
|
1201 |
+
except Exception as e:
|
1202 |
+
status = "error"
|
1203 |
+
message = f"Unhandled error during graph execution for {self.current_task_id}: {e}"
|
1204 |
+
logger.error(message, exc_info=True)
|
1205 |
+
# final_state will remain None or the state before the error
|
1206 |
+
finally:
|
1207 |
+
logger.info(f"Cleaning up resources for task {self.current_task_id}")
|
1208 |
+
task_id_to_clean = self.current_task_id
|
1209 |
+
|
1210 |
+
self.stop_event = None
|
1211 |
+
self.current_task_id = None
|
1212 |
+
self.runner = None # Mark runner as finished
|
1213 |
+
if self.mcp_client:
|
1214 |
+
await self.mcp_client.__aexit__(None, None, None)
|
1215 |
+
|
1216 |
+
# Return a result dictionary including the status and the final state if available
|
1217 |
+
return {
|
1218 |
+
"status": status,
|
1219 |
+
"message": message,
|
1220 |
+
"task_id": task_id_to_clean, # Use the stored task_id
|
1221 |
+
"final_state": final_state
|
1222 |
+
if final_state
|
1223 |
+
else {}, # Return the final state dict
|
1224 |
+
}
|
1225 |
+
|
1226 |
+
async def _stop_lingering_browsers(self, task_id):
|
1227 |
+
"""Attempts to stop any BrowserUseAgent instances associated with the task_id."""
|
1228 |
+
keys_to_stop = [
|
1229 |
+
key for key in _BROWSER_AGENT_INSTANCES if key.startswith(f"{task_id}_")
|
1230 |
+
]
|
1231 |
+
if not keys_to_stop:
|
1232 |
+
return
|
1233 |
+
|
1234 |
+
logger.warning(
|
1235 |
+
f"Found {len(keys_to_stop)} potentially lingering browser agents for task {task_id}. Attempting stop..."
|
1236 |
+
)
|
1237 |
+
for key in keys_to_stop:
|
1238 |
+
agent_instance = _BROWSER_AGENT_INSTANCES.get(key)
|
1239 |
+
try:
|
1240 |
+
if agent_instance:
|
1241 |
+
# Assuming BU agent has an async stop method
|
1242 |
+
await agent_instance.stop()
|
1243 |
+
logger.info(f"Called stop() on browser agent instance {key}")
|
1244 |
+
except Exception as e:
|
1245 |
+
logger.error(
|
1246 |
+
f"Error calling stop() on browser agent instance {key}: {e}"
|
1247 |
+
)
|
1248 |
+
|
1249 |
+
async def stop(self):
|
1250 |
+
"""Signals the currently running agent task to stop."""
|
1251 |
+
if not self.current_task_id or not self.stop_event:
|
1252 |
+
logger.info("No agent task is currently running.")
|
1253 |
+
return
|
1254 |
+
|
1255 |
+
logger.info(f"Stop requested for task ID: {self.current_task_id}")
|
1256 |
+
self.stop_event.set() # Signal the stop event
|
1257 |
+
self.stopped = True
|
1258 |
+
await self._stop_lingering_browsers(self.current_task_id)
|
1259 |
+
|
1260 |
+
def close(self):
|
1261 |
+
self.stopped = False
|
src/browser/__init__.py
ADDED
File without changes
|
src/browser/custom_browser.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import pdb
|
3 |
+
|
4 |
+
from playwright.async_api import Browser as PlaywrightBrowser
|
5 |
+
from playwright.async_api import (
|
6 |
+
BrowserContext as PlaywrightBrowserContext,
|
7 |
+
)
|
8 |
+
from playwright.async_api import (
|
9 |
+
Playwright,
|
10 |
+
async_playwright,
|
11 |
+
)
|
12 |
+
from browser_use.browser.browser import Browser, IN_DOCKER
|
13 |
+
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
14 |
+
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
15 |
+
import logging
|
16 |
+
|
17 |
+
from browser_use.browser.chrome import (
|
18 |
+
CHROME_ARGS,
|
19 |
+
CHROME_DETERMINISTIC_RENDERING_ARGS,
|
20 |
+
CHROME_DISABLE_SECURITY_ARGS,
|
21 |
+
CHROME_DOCKER_ARGS,
|
22 |
+
CHROME_HEADLESS_ARGS,
|
23 |
+
)
|
24 |
+
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
25 |
+
from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments
|
26 |
+
from browser_use.utils import time_execution_async
|
27 |
+
import socket
|
28 |
+
|
29 |
+
from .custom_context import CustomBrowserContext
|
30 |
+
|
31 |
+
logger = logging.getLogger(__name__)
|
32 |
+
|
33 |
+
|
34 |
+
class CustomBrowser(Browser):
|
35 |
+
|
36 |
+
async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext:
|
37 |
+
"""Create a browser context"""
|
38 |
+
browser_config = self.config.model_dump() if self.config else {}
|
39 |
+
context_config = config.model_dump() if config else {}
|
40 |
+
merged_config = {**browser_config, **context_config}
|
41 |
+
return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
|
42 |
+
|
43 |
+
async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
44 |
+
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
45 |
+
assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
|
46 |
+
|
47 |
+
# Use the configured window size from new_context_config if available
|
48 |
+
if (
|
49 |
+
not self.config.headless
|
50 |
+
and hasattr(self.config, 'new_context_config')
|
51 |
+
and hasattr(self.config.new_context_config, 'window_width')
|
52 |
+
and hasattr(self.config.new_context_config, 'window_height')
|
53 |
+
):
|
54 |
+
screen_size = {
|
55 |
+
'width': self.config.new_context_config.window_width,
|
56 |
+
'height': self.config.new_context_config.window_height,
|
57 |
+
}
|
58 |
+
offset_x, offset_y = get_window_adjustments()
|
59 |
+
elif self.config.headless:
|
60 |
+
screen_size = {'width': 1920, 'height': 1080}
|
61 |
+
offset_x, offset_y = 0, 0
|
62 |
+
else:
|
63 |
+
screen_size = get_screen_resolution()
|
64 |
+
offset_x, offset_y = get_window_adjustments()
|
65 |
+
|
66 |
+
chrome_args = {
|
67 |
+
f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
|
68 |
+
*CHROME_ARGS,
|
69 |
+
*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
|
70 |
+
*(CHROME_HEADLESS_ARGS if self.config.headless else []),
|
71 |
+
*(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
|
72 |
+
*(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
|
73 |
+
f'--window-position={offset_x},{offset_y}',
|
74 |
+
f'--window-size={screen_size["width"]},{screen_size["height"]}',
|
75 |
+
*self.config.extra_browser_args,
|
76 |
+
}
|
77 |
+
|
78 |
+
# check if chrome remote debugging port is already taken,
|
79 |
+
# if so remove the remote-debugging-port arg to prevent conflicts
|
80 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
81 |
+
if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
|
82 |
+
chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
|
83 |
+
|
84 |
+
browser_class = getattr(playwright, self.config.browser_class)
|
85 |
+
args = {
|
86 |
+
'chromium': list(chrome_args),
|
87 |
+
'firefox': [
|
88 |
+
*{
|
89 |
+
'-no-remote',
|
90 |
+
*self.config.extra_browser_args,
|
91 |
+
}
|
92 |
+
],
|
93 |
+
'webkit': [
|
94 |
+
*{
|
95 |
+
'--no-startup-window',
|
96 |
+
*self.config.extra_browser_args,
|
97 |
+
}
|
98 |
+
],
|
99 |
+
}
|
100 |
+
|
101 |
+
browser = await browser_class.launch(
|
102 |
+
channel='chromium', # https://github.com/microsoft/playwright/issues/33566
|
103 |
+
headless=self.config.headless,
|
104 |
+
args=args[self.config.browser_class],
|
105 |
+
proxy=self.config.proxy.model_dump() if self.config.proxy else None,
|
106 |
+
handle_sigterm=False,
|
107 |
+
handle_sigint=False,
|
108 |
+
)
|
109 |
+
return browser
|
src/browser/custom_context.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
|
5 |
+
from browser_use.browser.browser import Browser, IN_DOCKER
|
6 |
+
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
7 |
+
from playwright.async_api import Browser as PlaywrightBrowser
|
8 |
+
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
9 |
+
from typing import Optional
|
10 |
+
from browser_use.browser.context import BrowserContextState
|
11 |
+
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
|
15 |
+
class CustomBrowserContext(BrowserContext):
|
16 |
+
def __init__(
|
17 |
+
self,
|
18 |
+
browser: 'Browser',
|
19 |
+
config: BrowserContextConfig | None = None,
|
20 |
+
state: Optional[BrowserContextState] = None,
|
21 |
+
):
|
22 |
+
super(CustomBrowserContext, self).__init__(browser=browser, config=config, state=state)
|
src/controller/__init__.py
ADDED
File without changes
|
src/controller/custom_controller.py
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pdb
|
2 |
+
|
3 |
+
import pyperclip
|
4 |
+
from typing import Optional, Type, Callable, Dict, Any, Union, Awaitable, TypeVar
|
5 |
+
from pydantic import BaseModel
|
6 |
+
from browser_use.agent.views import ActionResult
|
7 |
+
from browser_use.browser.context import BrowserContext
|
8 |
+
from browser_use.controller.service import Controller, DoneAction
|
9 |
+
from browser_use.controller.registry.service import Registry, RegisteredAction
|
10 |
+
from main_content_extractor import MainContentExtractor
|
11 |
+
from browser_use.controller.views import (
|
12 |
+
ClickElementAction,
|
13 |
+
DoneAction,
|
14 |
+
ExtractPageContentAction,
|
15 |
+
GoToUrlAction,
|
16 |
+
InputTextAction,
|
17 |
+
OpenTabAction,
|
18 |
+
ScrollAction,
|
19 |
+
SearchGoogleAction,
|
20 |
+
SendKeysAction,
|
21 |
+
SwitchTabAction,
|
22 |
+
)
|
23 |
+
import logging
|
24 |
+
import inspect
|
25 |
+
import asyncio
|
26 |
+
import os
|
27 |
+
from langchain_core.language_models.chat_models import BaseChatModel
|
28 |
+
from browser_use.agent.views import ActionModel, ActionResult
|
29 |
+
|
30 |
+
from src.utils.mcp_client import create_tool_param_model, setup_mcp_client_and_tools
|
31 |
+
|
32 |
+
from browser_use.utils import time_execution_sync
|
33 |
+
|
34 |
+
logger = logging.getLogger(__name__)
|
35 |
+
|
36 |
+
Context = TypeVar('Context')
|
37 |
+
|
38 |
+
|
39 |
+
class CustomController(Controller):
|
40 |
+
def __init__(self, exclude_actions: list[str] = [],
|
41 |
+
output_model: Optional[Type[BaseModel]] = None,
|
42 |
+
ask_assistant_callback: Optional[Union[Callable[[str, BrowserContext], Dict[str, Any]], Callable[
|
43 |
+
[str, BrowserContext], Awaitable[Dict[str, Any]]]]] = None,
|
44 |
+
):
|
45 |
+
super().__init__(exclude_actions=exclude_actions, output_model=output_model)
|
46 |
+
self._register_custom_actions()
|
47 |
+
self.ask_assistant_callback = ask_assistant_callback
|
48 |
+
self.mcp_client = None
|
49 |
+
self.mcp_server_config = None
|
50 |
+
|
51 |
+
def _register_custom_actions(self):
|
52 |
+
"""Register all custom browser actions"""
|
53 |
+
|
54 |
+
@self.registry.action(
|
55 |
+
"When executing tasks, prioritize autonomous completion. However, if you encounter a definitive blocker "
|
56 |
+
"that prevents you from proceeding independently – such as needing credentials you don't possess, "
|
57 |
+
"requiring subjective human judgment, needing a physical action performed, encountering complex CAPTCHAs, "
|
58 |
+
"or facing limitations in your capabilities – you must request human assistance."
|
59 |
+
)
|
60 |
+
async def ask_for_assistant(query: str, browser: BrowserContext):
|
61 |
+
if self.ask_assistant_callback:
|
62 |
+
if inspect.iscoroutinefunction(self.ask_assistant_callback):
|
63 |
+
user_response = await self.ask_assistant_callback(query, browser)
|
64 |
+
else:
|
65 |
+
user_response = self.ask_assistant_callback(query, browser)
|
66 |
+
msg = f"AI ask: {query}. User response: {user_response['response']}"
|
67 |
+
logger.info(msg)
|
68 |
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
69 |
+
else:
|
70 |
+
return ActionResult(extracted_content="Human cannot help you. Please try another way.",
|
71 |
+
include_in_memory=True)
|
72 |
+
|
73 |
+
@self.registry.action(
|
74 |
+
'Upload file to interactive element with file path ',
|
75 |
+
)
|
76 |
+
async def upload_file(index: int, path: str, browser: BrowserContext, available_file_paths: list[str]):
|
77 |
+
if path not in available_file_paths:
|
78 |
+
return ActionResult(error=f'File path {path} is not available')
|
79 |
+
|
80 |
+
if not os.path.exists(path):
|
81 |
+
return ActionResult(error=f'File {path} does not exist')
|
82 |
+
|
83 |
+
dom_el = await browser.get_dom_element_by_index(index)
|
84 |
+
|
85 |
+
file_upload_dom_el = dom_el.get_file_upload_element()
|
86 |
+
|
87 |
+
if file_upload_dom_el is None:
|
88 |
+
msg = f'No file upload element found at index {index}'
|
89 |
+
logger.info(msg)
|
90 |
+
return ActionResult(error=msg)
|
91 |
+
|
92 |
+
file_upload_el = await browser.get_locate_element(file_upload_dom_el)
|
93 |
+
|
94 |
+
if file_upload_el is None:
|
95 |
+
msg = f'No file upload element found at index {index}'
|
96 |
+
logger.info(msg)
|
97 |
+
return ActionResult(error=msg)
|
98 |
+
|
99 |
+
try:
|
100 |
+
await file_upload_el.set_input_files(path)
|
101 |
+
msg = f'Successfully uploaded file to index {index}'
|
102 |
+
logger.info(msg)
|
103 |
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
104 |
+
except Exception as e:
|
105 |
+
msg = f'Failed to upload file to index {index}: {str(e)}'
|
106 |
+
logger.info(msg)
|
107 |
+
return ActionResult(error=msg)
|
108 |
+
|
109 |
+
@time_execution_sync('--act')
|
110 |
+
async def act(
|
111 |
+
self,
|
112 |
+
action: ActionModel,
|
113 |
+
browser_context: Optional[BrowserContext] = None,
|
114 |
+
#
|
115 |
+
page_extraction_llm: Optional[BaseChatModel] = None,
|
116 |
+
sensitive_data: Optional[Dict[str, str]] = None,
|
117 |
+
available_file_paths: Optional[list[str]] = None,
|
118 |
+
#
|
119 |
+
context: Context | None = None,
|
120 |
+
) -> ActionResult:
|
121 |
+
"""Execute an action"""
|
122 |
+
|
123 |
+
try:
|
124 |
+
for action_name, params in action.model_dump(exclude_unset=True).items():
|
125 |
+
if params is not None:
|
126 |
+
if action_name.startswith("mcp"):
|
127 |
+
# this is a mcp tool
|
128 |
+
logger.debug(f"Invoke MCP tool: {action_name}")
|
129 |
+
mcp_tool = self.registry.registry.actions.get(action_name).function
|
130 |
+
result = await mcp_tool.ainvoke(params)
|
131 |
+
else:
|
132 |
+
result = await self.registry.execute_action(
|
133 |
+
action_name,
|
134 |
+
params,
|
135 |
+
browser=browser_context,
|
136 |
+
page_extraction_llm=page_extraction_llm,
|
137 |
+
sensitive_data=sensitive_data,
|
138 |
+
available_file_paths=available_file_paths,
|
139 |
+
context=context,
|
140 |
+
)
|
141 |
+
|
142 |
+
if isinstance(result, str):
|
143 |
+
return ActionResult(extracted_content=result)
|
144 |
+
elif isinstance(result, ActionResult):
|
145 |
+
return result
|
146 |
+
elif result is None:
|
147 |
+
return ActionResult()
|
148 |
+
else:
|
149 |
+
raise ValueError(f'Invalid action result type: {type(result)} of {result}')
|
150 |
+
return ActionResult()
|
151 |
+
except Exception as e:
|
152 |
+
raise e
|
153 |
+
|
154 |
+
async def setup_mcp_client(self, mcp_server_config: Optional[Dict[str, Any]] = None):
|
155 |
+
self.mcp_server_config = mcp_server_config
|
156 |
+
if self.mcp_server_config:
|
157 |
+
self.mcp_client = await setup_mcp_client_and_tools(self.mcp_server_config)
|
158 |
+
self.register_mcp_tools()
|
159 |
+
|
160 |
+
def register_mcp_tools(self):
|
161 |
+
"""
|
162 |
+
Register the MCP tools used by this controller.
|
163 |
+
"""
|
164 |
+
if self.mcp_client:
|
165 |
+
for server_name in self.mcp_client.server_name_to_tools:
|
166 |
+
for tool in self.mcp_client.server_name_to_tools[server_name]:
|
167 |
+
tool_name = f"mcp.{server_name}.{tool.name}"
|
168 |
+
self.registry.registry.actions[tool_name] = RegisteredAction(
|
169 |
+
name=tool_name,
|
170 |
+
description=tool.description,
|
171 |
+
function=tool,
|
172 |
+
param_model=create_tool_param_model(tool),
|
173 |
+
)
|
174 |
+
logger.info(f"Add mcp tool: {tool_name}")
|
175 |
+
logger.debug(
|
176 |
+
f"Registered {len(self.mcp_client.server_name_to_tools[server_name])} mcp tools for {server_name}")
|
177 |
+
else:
|
178 |
+
logger.warning(f"MCP client not started.")
|
179 |
+
|
180 |
+
async def close_mcp_client(self):
|
181 |
+
if self.mcp_client:
|
182 |
+
await self.mcp_client.__aexit__(None, None, None)
|
src/utils/__init__.py
ADDED
File without changes
|
src/utils/config.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROVIDER_DISPLAY_NAMES = {
|
2 |
+
"openai": "OpenAI",
|
3 |
+
"azure_openai": "Azure OpenAI",
|
4 |
+
"anthropic": "Anthropic",
|
5 |
+
"deepseek": "DeepSeek",
|
6 |
+
"google": "Google",
|
7 |
+
"alibaba": "Alibaba",
|
8 |
+
"moonshot": "MoonShot",
|
9 |
+
"unbound": "Unbound AI",
|
10 |
+
"ibm": "IBM",
|
11 |
+
"grok": "Grok",
|
12 |
+
}
|
13 |
+
|
14 |
+
# Predefined model names for common providers
|
15 |
+
model_names = {
|
16 |
+
"anthropic": ["claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-opus-20240229"],
|
17 |
+
"openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "o3-mini"],
|
18 |
+
"deepseek": ["deepseek-chat", "deepseek-reasoner"],
|
19 |
+
"google": ["gemini-2.0-flash", "gemini-2.0-flash-thinking-exp", "gemini-1.5-flash-latest",
|
20 |
+
"gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-01-21", "gemini-2.0-pro-exp-02-05",
|
21 |
+
"gemini-2.5-pro-preview-03-25", "gemini-2.5-flash-preview-04-17"],
|
22 |
+
"ollama": ["qwen2.5:7b", "qwen2.5:14b", "qwen2.5:32b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "llama2:7b",
|
23 |
+
"deepseek-r1:14b", "deepseek-r1:32b"],
|
24 |
+
"azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"],
|
25 |
+
"mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
|
26 |
+
"alibaba": ["qwen-plus", "qwen-max", "qwen-vl-max", "qwen-vl-plus", "qwen-turbo", "qwen-long"],
|
27 |
+
"moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"],
|
28 |
+
"unbound": ["gemini-2.0-flash", "gpt-4o-mini", "gpt-4o", "gpt-4.5-preview"],
|
29 |
+
"grok": [
|
30 |
+
"grok-3",
|
31 |
+
"grok-3-fast",
|
32 |
+
"grok-3-mini",
|
33 |
+
"grok-3-mini-fast",
|
34 |
+
"grok-2-vision",
|
35 |
+
"grok-2-image",
|
36 |
+
"grok-2",
|
37 |
+
],
|
38 |
+
"siliconflow": [
|
39 |
+
"deepseek-ai/DeepSeek-R1",
|
40 |
+
"deepseek-ai/DeepSeek-V3",
|
41 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
42 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
43 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
44 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
45 |
+
"deepseek-ai/DeepSeek-V2.5",
|
46 |
+
"deepseek-ai/deepseek-vl2",
|
47 |
+
"Qwen/Qwen2.5-72B-Instruct-128K",
|
48 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
49 |
+
"Qwen/Qwen2.5-32B-Instruct",
|
50 |
+
"Qwen/Qwen2.5-14B-Instruct",
|
51 |
+
"Qwen/Qwen2.5-7B-Instruct",
|
52 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
53 |
+
"Qwen/Qwen2.5-Coder-7B-Instruct",
|
54 |
+
"Qwen/Qwen2-7B-Instruct",
|
55 |
+
"Qwen/Qwen2-1.5B-Instruct",
|
56 |
+
"Qwen/QwQ-32B-Preview",
|
57 |
+
"Qwen/Qwen2-VL-72B-Instruct",
|
58 |
+
"Qwen/Qwen2.5-VL-32B-Instruct",
|
59 |
+
"Qwen/Qwen2.5-VL-72B-Instruct",
|
60 |
+
"TeleAI/TeleChat2",
|
61 |
+
"THUDM/glm-4-9b-chat",
|
62 |
+
"Vendor-A/Qwen/Qwen2.5-72B-Instruct",
|
63 |
+
"internlm/internlm2_5-7b-chat",
|
64 |
+
"internlm/internlm2_5-20b-chat",
|
65 |
+
"Pro/Qwen/Qwen2.5-7B-Instruct",
|
66 |
+
"Pro/Qwen/Qwen2-7B-Instruct",
|
67 |
+
"Pro/Qwen/Qwen2-1.5B-Instruct",
|
68 |
+
"Pro/THUDM/chatglm3-6b",
|
69 |
+
"Pro/THUDM/glm-4-9b-chat",
|
70 |
+
],
|
71 |
+
"ibm": ["ibm/granite-vision-3.1-2b-preview", "meta-llama/llama-4-maverick-17b-128e-instruct-fp8",
|
72 |
+
"meta-llama/llama-3-2-90b-vision-instruct"],
|
73 |
+
"modelscope":[
|
74 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
75 |
+
"Qwen/Qwen2.5-Coder-14B-Instruct",
|
76 |
+
"Qwen/Qwen2.5-Coder-7B-Instruct",
|
77 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
78 |
+
"Qwen/Qwen2.5-32B-Instruct",
|
79 |
+
"Qwen/Qwen2.5-14B-Instruct",
|
80 |
+
"Qwen/Qwen2.5-7B-Instruct",
|
81 |
+
"Qwen/QwQ-32B-Preview",
|
82 |
+
"Qwen/Qwen2.5-VL-3B-Instruct",
|
83 |
+
"Qwen/Qwen2.5-VL-7B-Instruct",
|
84 |
+
"Qwen/Qwen2.5-VL-32B-Instruct",
|
85 |
+
"Qwen/Qwen2.5-VL-72B-Instruct",
|
86 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
87 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
88 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
89 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
90 |
+
"deepseek-ai/DeepSeek-R1",
|
91 |
+
"deepseek-ai/DeepSeek-V3",
|
92 |
+
"Qwen/Qwen3-1.7B",
|
93 |
+
"Qwen/Qwen3-4B",
|
94 |
+
"Qwen/Qwen3-8B",
|
95 |
+
"Qwen/Qwen3-14B",
|
96 |
+
"Qwen/Qwen3-30B-A3B",
|
97 |
+
"Qwen/Qwen3-32B",
|
98 |
+
"Qwen/Qwen3-235B-A22B",
|
99 |
+
],
|
100 |
+
}
|
src/utils/llm_provider.py
ADDED
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
import pdb
|
3 |
+
from langchain_openai import ChatOpenAI
|
4 |
+
from langchain_core.globals import get_llm_cache
|
5 |
+
from langchain_core.language_models.base import (
|
6 |
+
BaseLanguageModel,
|
7 |
+
LangSmithParams,
|
8 |
+
LanguageModelInput,
|
9 |
+
)
|
10 |
+
import os
|
11 |
+
from langchain_core.load import dumpd, dumps
|
12 |
+
from langchain_core.messages import (
|
13 |
+
AIMessage,
|
14 |
+
SystemMessage,
|
15 |
+
AnyMessage,
|
16 |
+
BaseMessage,
|
17 |
+
BaseMessageChunk,
|
18 |
+
HumanMessage,
|
19 |
+
convert_to_messages,
|
20 |
+
message_chunk_to_message,
|
21 |
+
)
|
22 |
+
from langchain_core.outputs import (
|
23 |
+
ChatGeneration,
|
24 |
+
ChatGenerationChunk,
|
25 |
+
ChatResult,
|
26 |
+
LLMResult,
|
27 |
+
RunInfo,
|
28 |
+
)
|
29 |
+
from langchain_ollama import ChatOllama
|
30 |
+
from langchain_core.output_parsers.base import OutputParserLike
|
31 |
+
from langchain_core.runnables import Runnable, RunnableConfig
|
32 |
+
from langchain_core.tools import BaseTool
|
33 |
+
|
34 |
+
from typing import (
|
35 |
+
TYPE_CHECKING,
|
36 |
+
Any,
|
37 |
+
Callable,
|
38 |
+
Literal,
|
39 |
+
Optional,
|
40 |
+
Union,
|
41 |
+
cast, List,
|
42 |
+
)
|
43 |
+
from langchain_anthropic import ChatAnthropic
|
44 |
+
from langchain_mistralai import ChatMistralAI
|
45 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
46 |
+
from langchain_ollama import ChatOllama
|
47 |
+
from langchain_openai import AzureChatOpenAI, ChatOpenAI
|
48 |
+
from langchain_ibm import ChatWatsonx
|
49 |
+
from langchain_aws import ChatBedrock
|
50 |
+
from pydantic import SecretStr
|
51 |
+
|
52 |
+
from src.utils import config
|
53 |
+
|
54 |
+
|
55 |
+
class DeepSeekR1ChatOpenAI(ChatOpenAI):
|
56 |
+
|
57 |
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
58 |
+
super().__init__(*args, **kwargs)
|
59 |
+
self.client = OpenAI(
|
60 |
+
base_url=kwargs.get("base_url"),
|
61 |
+
api_key=kwargs.get("api_key")
|
62 |
+
)
|
63 |
+
|
64 |
+
async def ainvoke(
|
65 |
+
self,
|
66 |
+
input: LanguageModelInput,
|
67 |
+
config: Optional[RunnableConfig] = None,
|
68 |
+
*,
|
69 |
+
stop: Optional[list[str]] = None,
|
70 |
+
**kwargs: Any,
|
71 |
+
) -> AIMessage:
|
72 |
+
message_history = []
|
73 |
+
for input_ in input:
|
74 |
+
if isinstance(input_, SystemMessage):
|
75 |
+
message_history.append({"role": "system", "content": input_.content})
|
76 |
+
elif isinstance(input_, AIMessage):
|
77 |
+
message_history.append({"role": "assistant", "content": input_.content})
|
78 |
+
else:
|
79 |
+
message_history.append({"role": "user", "content": input_.content})
|
80 |
+
|
81 |
+
response = self.client.chat.completions.create(
|
82 |
+
model=self.model_name,
|
83 |
+
messages=message_history
|
84 |
+
)
|
85 |
+
|
86 |
+
reasoning_content = response.choices[0].message.reasoning_content
|
87 |
+
content = response.choices[0].message.content
|
88 |
+
return AIMessage(content=content, reasoning_content=reasoning_content)
|
89 |
+
|
90 |
+
def invoke(
|
91 |
+
self,
|
92 |
+
input: LanguageModelInput,
|
93 |
+
config: Optional[RunnableConfig] = None,
|
94 |
+
*,
|
95 |
+
stop: Optional[list[str]] = None,
|
96 |
+
**kwargs: Any,
|
97 |
+
) -> AIMessage:
|
98 |
+
message_history = []
|
99 |
+
for input_ in input:
|
100 |
+
if isinstance(input_, SystemMessage):
|
101 |
+
message_history.append({"role": "system", "content": input_.content})
|
102 |
+
elif isinstance(input_, AIMessage):
|
103 |
+
message_history.append({"role": "assistant", "content": input_.content})
|
104 |
+
else:
|
105 |
+
message_history.append({"role": "user", "content": input_.content})
|
106 |
+
|
107 |
+
response = self.client.chat.completions.create(
|
108 |
+
model=self.model_name,
|
109 |
+
messages=message_history
|
110 |
+
)
|
111 |
+
|
112 |
+
reasoning_content = response.choices[0].message.reasoning_content
|
113 |
+
content = response.choices[0].message.content
|
114 |
+
return AIMessage(content=content, reasoning_content=reasoning_content)
|
115 |
+
|
116 |
+
|
117 |
+
class DeepSeekR1ChatOllama(ChatOllama):
|
118 |
+
|
119 |
+
async def ainvoke(
|
120 |
+
self,
|
121 |
+
input: LanguageModelInput,
|
122 |
+
config: Optional[RunnableConfig] = None,
|
123 |
+
*,
|
124 |
+
stop: Optional[list[str]] = None,
|
125 |
+
**kwargs: Any,
|
126 |
+
) -> AIMessage:
|
127 |
+
org_ai_message = await super().ainvoke(input=input)
|
128 |
+
org_content = org_ai_message.content
|
129 |
+
reasoning_content = org_content.split("</think>")[0].replace("<think>", "")
|
130 |
+
content = org_content.split("</think>")[1]
|
131 |
+
if "**JSON Response:**" in content:
|
132 |
+
content = content.split("**JSON Response:**")[-1]
|
133 |
+
return AIMessage(content=content, reasoning_content=reasoning_content)
|
134 |
+
|
135 |
+
def invoke(
|
136 |
+
self,
|
137 |
+
input: LanguageModelInput,
|
138 |
+
config: Optional[RunnableConfig] = None,
|
139 |
+
*,
|
140 |
+
stop: Optional[list[str]] = None,
|
141 |
+
**kwargs: Any,
|
142 |
+
) -> AIMessage:
|
143 |
+
org_ai_message = super().invoke(input=input)
|
144 |
+
org_content = org_ai_message.content
|
145 |
+
reasoning_content = org_content.split("</think>")[0].replace("<think>", "")
|
146 |
+
content = org_content.split("</think>")[1]
|
147 |
+
if "**JSON Response:**" in content:
|
148 |
+
content = content.split("**JSON Response:**")[-1]
|
149 |
+
return AIMessage(content=content, reasoning_content=reasoning_content)
|
150 |
+
|
151 |
+
|
152 |
+
def get_llm_model(provider: str, **kwargs):
|
153 |
+
"""
|
154 |
+
Get LLM model
|
155 |
+
:param provider: LLM provider
|
156 |
+
:param kwargs:
|
157 |
+
:return:
|
158 |
+
"""
|
159 |
+
if provider not in ["ollama", "bedrock"]:
|
160 |
+
env_var = f"{provider.upper()}_API_KEY"
|
161 |
+
api_key = kwargs.get("api_key", "") or os.getenv(env_var, "")
|
162 |
+
if not api_key:
|
163 |
+
provider_display = config.PROVIDER_DISPLAY_NAMES.get(provider, provider.upper())
|
164 |
+
error_msg = f"💥 {provider_display} API key not found! 🔑 Please set the `{env_var}` environment variable or provide it in the UI."
|
165 |
+
raise ValueError(error_msg)
|
166 |
+
kwargs["api_key"] = api_key
|
167 |
+
|
168 |
+
if provider == "anthropic":
|
169 |
+
if not kwargs.get("base_url", ""):
|
170 |
+
base_url = "https://api.anthropic.com"
|
171 |
+
else:
|
172 |
+
base_url = kwargs.get("base_url")
|
173 |
+
|
174 |
+
return ChatAnthropic(
|
175 |
+
model=kwargs.get("model_name", "claude-3-5-sonnet-20241022"),
|
176 |
+
temperature=kwargs.get("temperature", 0.0),
|
177 |
+
base_url=base_url,
|
178 |
+
api_key=api_key,
|
179 |
+
)
|
180 |
+
elif provider == 'mistral':
|
181 |
+
if not kwargs.get("base_url", ""):
|
182 |
+
base_url = os.getenv("MISTRAL_ENDPOINT", "https://api.mistral.ai/v1")
|
183 |
+
else:
|
184 |
+
base_url = kwargs.get("base_url")
|
185 |
+
if not kwargs.get("api_key", ""):
|
186 |
+
api_key = os.getenv("MISTRAL_API_KEY", "")
|
187 |
+
else:
|
188 |
+
api_key = kwargs.get("api_key")
|
189 |
+
|
190 |
+
return ChatMistralAI(
|
191 |
+
model=kwargs.get("model_name", "mistral-large-latest"),
|
192 |
+
temperature=kwargs.get("temperature", 0.0),
|
193 |
+
base_url=base_url,
|
194 |
+
api_key=api_key,
|
195 |
+
)
|
196 |
+
elif provider == "openai":
|
197 |
+
if not kwargs.get("base_url", ""):
|
198 |
+
base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1")
|
199 |
+
else:
|
200 |
+
base_url = kwargs.get("base_url")
|
201 |
+
|
202 |
+
return ChatOpenAI(
|
203 |
+
model=kwargs.get("model_name", "gpt-4o"),
|
204 |
+
temperature=kwargs.get("temperature", 0.0),
|
205 |
+
base_url=base_url,
|
206 |
+
api_key=api_key,
|
207 |
+
)
|
208 |
+
elif provider == "grok":
|
209 |
+
if not kwargs.get("base_url", ""):
|
210 |
+
base_url = os.getenv("GROK_ENDPOINT", "https://api.x.ai/v1")
|
211 |
+
else:
|
212 |
+
base_url = kwargs.get("base_url")
|
213 |
+
|
214 |
+
return ChatOpenAI(
|
215 |
+
model=kwargs.get("model_name", "grok-3"),
|
216 |
+
temperature=kwargs.get("temperature", 0.0),
|
217 |
+
base_url=base_url,
|
218 |
+
api_key=api_key,
|
219 |
+
)
|
220 |
+
elif provider == "deepseek":
|
221 |
+
if not kwargs.get("base_url", ""):
|
222 |
+
base_url = os.getenv("DEEPSEEK_ENDPOINT", "")
|
223 |
+
else:
|
224 |
+
base_url = kwargs.get("base_url")
|
225 |
+
|
226 |
+
if kwargs.get("model_name", "deepseek-chat") == "deepseek-reasoner":
|
227 |
+
return DeepSeekR1ChatOpenAI(
|
228 |
+
model=kwargs.get("model_name", "deepseek-reasoner"),
|
229 |
+
temperature=kwargs.get("temperature", 0.0),
|
230 |
+
base_url=base_url,
|
231 |
+
api_key=api_key,
|
232 |
+
)
|
233 |
+
else:
|
234 |
+
return ChatOpenAI(
|
235 |
+
model=kwargs.get("model_name", "deepseek-chat"),
|
236 |
+
temperature=kwargs.get("temperature", 0.0),
|
237 |
+
base_url=base_url,
|
238 |
+
api_key=api_key,
|
239 |
+
)
|
240 |
+
elif provider == "google":
|
241 |
+
return ChatGoogleGenerativeAI(
|
242 |
+
model=kwargs.get("model_name", "gemini-2.0-flash-exp"),
|
243 |
+
temperature=kwargs.get("temperature", 0.0),
|
244 |
+
api_key=api_key,
|
245 |
+
)
|
246 |
+
elif provider == "ollama":
|
247 |
+
if not kwargs.get("base_url", ""):
|
248 |
+
base_url = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434")
|
249 |
+
else:
|
250 |
+
base_url = kwargs.get("base_url")
|
251 |
+
|
252 |
+
if "deepseek-r1" in kwargs.get("model_name", "qwen2.5:7b"):
|
253 |
+
return DeepSeekR1ChatOllama(
|
254 |
+
model=kwargs.get("model_name", "deepseek-r1:14b"),
|
255 |
+
temperature=kwargs.get("temperature", 0.0),
|
256 |
+
num_ctx=kwargs.get("num_ctx", 32000),
|
257 |
+
base_url=base_url,
|
258 |
+
)
|
259 |
+
else:
|
260 |
+
return ChatOllama(
|
261 |
+
model=kwargs.get("model_name", "qwen2.5:7b"),
|
262 |
+
temperature=kwargs.get("temperature", 0.0),
|
263 |
+
num_ctx=kwargs.get("num_ctx", 32000),
|
264 |
+
num_predict=kwargs.get("num_predict", 1024),
|
265 |
+
base_url=base_url,
|
266 |
+
)
|
267 |
+
elif provider == "azure_openai":
|
268 |
+
if not kwargs.get("base_url", ""):
|
269 |
+
base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
|
270 |
+
else:
|
271 |
+
base_url = kwargs.get("base_url")
|
272 |
+
api_version = kwargs.get("api_version", "") or os.getenv("AZURE_OPENAI_API_VERSION", "2025-01-01-preview")
|
273 |
+
return AzureChatOpenAI(
|
274 |
+
model=kwargs.get("model_name", "gpt-4o"),
|
275 |
+
temperature=kwargs.get("temperature", 0.0),
|
276 |
+
api_version=api_version,
|
277 |
+
azure_endpoint=base_url,
|
278 |
+
api_key=api_key,
|
279 |
+
)
|
280 |
+
elif provider == "alibaba":
|
281 |
+
if not kwargs.get("base_url", ""):
|
282 |
+
base_url = os.getenv("ALIBABA_ENDPOINT", "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
283 |
+
else:
|
284 |
+
base_url = kwargs.get("base_url")
|
285 |
+
|
286 |
+
return ChatOpenAI(
|
287 |
+
model=kwargs.get("model_name", "qwen-plus"),
|
288 |
+
temperature=kwargs.get("temperature", 0.0),
|
289 |
+
base_url=base_url,
|
290 |
+
api_key=api_key,
|
291 |
+
)
|
292 |
+
elif provider == "ibm":
|
293 |
+
parameters = {
|
294 |
+
"temperature": kwargs.get("temperature", 0.0),
|
295 |
+
"max_tokens": kwargs.get("num_ctx", 32000)
|
296 |
+
}
|
297 |
+
if not kwargs.get("base_url", ""):
|
298 |
+
base_url = os.getenv("IBM_ENDPOINT", "https://us-south.ml.cloud.ibm.com")
|
299 |
+
else:
|
300 |
+
base_url = kwargs.get("base_url")
|
301 |
+
|
302 |
+
return ChatWatsonx(
|
303 |
+
model_id=kwargs.get("model_name", "ibm/granite-vision-3.1-2b-preview"),
|
304 |
+
url=base_url,
|
305 |
+
project_id=os.getenv("IBM_PROJECT_ID"),
|
306 |
+
apikey=os.getenv("IBM_API_KEY"),
|
307 |
+
params=parameters
|
308 |
+
)
|
309 |
+
elif provider == "moonshot":
|
310 |
+
return ChatOpenAI(
|
311 |
+
model=kwargs.get("model_name", "moonshot-v1-32k-vision-preview"),
|
312 |
+
temperature=kwargs.get("temperature", 0.0),
|
313 |
+
base_url=os.getenv("MOONSHOT_ENDPOINT"),
|
314 |
+
api_key=os.getenv("MOONSHOT_API_KEY"),
|
315 |
+
)
|
316 |
+
elif provider == "unbound":
|
317 |
+
return ChatOpenAI(
|
318 |
+
model=kwargs.get("model_name", "gpt-4o-mini"),
|
319 |
+
temperature=kwargs.get("temperature", 0.0),
|
320 |
+
base_url=os.getenv("UNBOUND_ENDPOINT", "https://api.getunbound.ai"),
|
321 |
+
api_key=api_key,
|
322 |
+
)
|
323 |
+
elif provider == "siliconflow":
|
324 |
+
if not kwargs.get("api_key", ""):
|
325 |
+
api_key = os.getenv("SiliconFLOW_API_KEY", "")
|
326 |
+
else:
|
327 |
+
api_key = kwargs.get("api_key")
|
328 |
+
if not kwargs.get("base_url", ""):
|
329 |
+
base_url = os.getenv("SiliconFLOW_ENDPOINT", "")
|
330 |
+
else:
|
331 |
+
base_url = kwargs.get("base_url")
|
332 |
+
return ChatOpenAI(
|
333 |
+
api_key=api_key,
|
334 |
+
base_url=base_url,
|
335 |
+
model_name=kwargs.get("model_name", "Qwen/QwQ-32B"),
|
336 |
+
temperature=kwargs.get("temperature", 0.0),
|
337 |
+
)
|
338 |
+
elif provider == "modelscope":
|
339 |
+
if not kwargs.get("api_key", ""):
|
340 |
+
api_key = os.getenv("MODELSCOPE_API_KEY", "")
|
341 |
+
else:
|
342 |
+
api_key = kwargs.get("api_key")
|
343 |
+
if not kwargs.get("base_url", ""):
|
344 |
+
base_url = os.getenv("MODELSCOPE_ENDPOINT", "")
|
345 |
+
else:
|
346 |
+
base_url = kwargs.get("base_url")
|
347 |
+
return ChatOpenAI(
|
348 |
+
api_key=api_key,
|
349 |
+
base_url=base_url,
|
350 |
+
model_name=kwargs.get("model_name", "Qwen/QwQ-32B"),
|
351 |
+
temperature=kwargs.get("temperature", 0.0),
|
352 |
+
)
|
353 |
+
else:
|
354 |
+
raise ValueError(f"Unsupported provider: {provider}")
|
src/utils/mcp_client.py
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import inspect
|
2 |
+
import logging
|
3 |
+
import uuid
|
4 |
+
from datetime import date, datetime, time
|
5 |
+
from enum import Enum
|
6 |
+
from typing import Any, Dict, List, Optional, Set, Type, Union, get_type_hints
|
7 |
+
|
8 |
+
from browser_use.controller.registry.views import ActionModel
|
9 |
+
from langchain.tools import BaseTool
|
10 |
+
from langchain_mcp_adapters.client import MultiServerMCPClient
|
11 |
+
from pydantic import BaseModel, Field, create_model
|
12 |
+
from pydantic.v1 import BaseModel, Field
|
13 |
+
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
|
17 |
+
async def setup_mcp_client_and_tools(mcp_server_config: Dict[str, Any]) -> Optional[MultiServerMCPClient]:
|
18 |
+
"""
|
19 |
+
Initializes the MultiServerMCPClient, connects to servers, fetches tools,
|
20 |
+
filters them, and returns a flat list of usable tools and the client instance.
|
21 |
+
|
22 |
+
Returns:
|
23 |
+
A tuple containing:
|
24 |
+
- list[BaseTool]: The filtered list of usable LangChain tools.
|
25 |
+
- MultiServerMCPClient | None: The initialized and started client instance, or None on failure.
|
26 |
+
"""
|
27 |
+
|
28 |
+
logger.info("Initializing MultiServerMCPClient...")
|
29 |
+
|
30 |
+
if not mcp_server_config:
|
31 |
+
logger.error("No MCP server configuration provided.")
|
32 |
+
return None
|
33 |
+
|
34 |
+
try:
|
35 |
+
if "mcpServers" in mcp_server_config:
|
36 |
+
mcp_server_config = mcp_server_config["mcpServers"]
|
37 |
+
client = MultiServerMCPClient(mcp_server_config)
|
38 |
+
await client.__aenter__()
|
39 |
+
return client
|
40 |
+
|
41 |
+
except Exception as e:
|
42 |
+
logger.error(f"Failed to setup MCP client or fetch tools: {e}", exc_info=True)
|
43 |
+
return None
|
44 |
+
|
45 |
+
|
46 |
+
def create_tool_param_model(tool: BaseTool) -> Type[BaseModel]:
|
47 |
+
"""Creates a Pydantic model from a LangChain tool's schema"""
|
48 |
+
|
49 |
+
# Get tool schema information
|
50 |
+
json_schema = tool.args_schema
|
51 |
+
tool_name = tool.name
|
52 |
+
|
53 |
+
# If the tool already has a schema defined, convert it to a new param_model
|
54 |
+
if json_schema is not None:
|
55 |
+
|
56 |
+
# Create new parameter model
|
57 |
+
params = {}
|
58 |
+
|
59 |
+
# Process properties if they exist
|
60 |
+
if 'properties' in json_schema:
|
61 |
+
# Find required fields
|
62 |
+
required_fields: Set[str] = set(json_schema.get('required', []))
|
63 |
+
|
64 |
+
for prop_name, prop_details in json_schema['properties'].items():
|
65 |
+
field_type = resolve_type(prop_details, f"{tool_name}_{prop_name}")
|
66 |
+
|
67 |
+
# Check if parameter is required
|
68 |
+
is_required = prop_name in required_fields
|
69 |
+
|
70 |
+
# Get default value and description
|
71 |
+
default_value = prop_details.get('default', ... if is_required else None)
|
72 |
+
description = prop_details.get('description', '')
|
73 |
+
|
74 |
+
# Add field constraints
|
75 |
+
field_kwargs = {'default': default_value}
|
76 |
+
if description:
|
77 |
+
field_kwargs['description'] = description
|
78 |
+
|
79 |
+
# Add additional constraints if present
|
80 |
+
if 'minimum' in prop_details:
|
81 |
+
field_kwargs['ge'] = prop_details['minimum']
|
82 |
+
if 'maximum' in prop_details:
|
83 |
+
field_kwargs['le'] = prop_details['maximum']
|
84 |
+
if 'minLength' in prop_details:
|
85 |
+
field_kwargs['min_length'] = prop_details['minLength']
|
86 |
+
if 'maxLength' in prop_details:
|
87 |
+
field_kwargs['max_length'] = prop_details['maxLength']
|
88 |
+
if 'pattern' in prop_details:
|
89 |
+
field_kwargs['pattern'] = prop_details['pattern']
|
90 |
+
|
91 |
+
# Add to parameters dictionary
|
92 |
+
params[prop_name] = (field_type, Field(**field_kwargs))
|
93 |
+
|
94 |
+
return create_model(
|
95 |
+
f'{tool_name}_parameters',
|
96 |
+
__base__=ActionModel,
|
97 |
+
**params, # type: ignore
|
98 |
+
)
|
99 |
+
|
100 |
+
# If no schema is defined, extract parameters from the _run method
|
101 |
+
run_method = tool._run
|
102 |
+
sig = inspect.signature(run_method)
|
103 |
+
|
104 |
+
# Get type hints for better type information
|
105 |
+
try:
|
106 |
+
type_hints = get_type_hints(run_method)
|
107 |
+
except Exception:
|
108 |
+
type_hints = {}
|
109 |
+
|
110 |
+
params = {}
|
111 |
+
for name, param in sig.parameters.items():
|
112 |
+
# Skip 'self' parameter and any other parameters you want to exclude
|
113 |
+
if name == 'self':
|
114 |
+
continue
|
115 |
+
|
116 |
+
# Get annotation from type hints if available, otherwise from signature
|
117 |
+
annotation = type_hints.get(name, param.annotation)
|
118 |
+
if annotation == inspect.Parameter.empty:
|
119 |
+
annotation = Any
|
120 |
+
|
121 |
+
# Use default value if available, otherwise make it required
|
122 |
+
if param.default != param.empty:
|
123 |
+
params[name] = (annotation, param.default)
|
124 |
+
else:
|
125 |
+
params[name] = (annotation, ...)
|
126 |
+
|
127 |
+
return create_model(
|
128 |
+
f'{tool_name}_parameters',
|
129 |
+
__base__=ActionModel,
|
130 |
+
**params, # type: ignore
|
131 |
+
)
|
132 |
+
|
133 |
+
|
134 |
+
def resolve_type(prop_details: Dict[str, Any], prefix: str = "") -> Any:
|
135 |
+
"""Recursively resolves JSON schema type to Python/Pydantic type"""
|
136 |
+
|
137 |
+
# Handle reference types
|
138 |
+
if '$ref' in prop_details:
|
139 |
+
# In a real application, reference resolution would be needed
|
140 |
+
return Any
|
141 |
+
|
142 |
+
# Basic type mapping
|
143 |
+
type_mapping = {
|
144 |
+
'string': str,
|
145 |
+
'integer': int,
|
146 |
+
'number': float,
|
147 |
+
'boolean': bool,
|
148 |
+
'array': List,
|
149 |
+
'object': Dict,
|
150 |
+
'null': type(None),
|
151 |
+
}
|
152 |
+
|
153 |
+
# Handle formatted strings
|
154 |
+
if prop_details.get('type') == 'string' and 'format' in prop_details:
|
155 |
+
format_mapping = {
|
156 |
+
'date-time': datetime,
|
157 |
+
'date': date,
|
158 |
+
'time': time,
|
159 |
+
'email': str,
|
160 |
+
'uri': str,
|
161 |
+
'url': str,
|
162 |
+
'uuid': uuid.UUID,
|
163 |
+
'binary': bytes,
|
164 |
+
}
|
165 |
+
return format_mapping.get(prop_details['format'], str)
|
166 |
+
|
167 |
+
# Handle enum types
|
168 |
+
if 'enum' in prop_details:
|
169 |
+
enum_values = prop_details['enum']
|
170 |
+
# Create dynamic enum class with safe names
|
171 |
+
enum_dict = {}
|
172 |
+
for i, v in enumerate(enum_values):
|
173 |
+
# Ensure enum names are valid Python identifiers
|
174 |
+
if isinstance(v, str):
|
175 |
+
key = v.upper().replace(' ', '_').replace('-', '_')
|
176 |
+
if not key.isidentifier():
|
177 |
+
key = f"VALUE_{i}"
|
178 |
+
else:
|
179 |
+
key = f"VALUE_{i}"
|
180 |
+
enum_dict[key] = v
|
181 |
+
|
182 |
+
# Only create enum if we have values
|
183 |
+
if enum_dict:
|
184 |
+
return Enum(f"{prefix}_Enum", enum_dict)
|
185 |
+
return str # Fallback
|
186 |
+
|
187 |
+
# Handle array types
|
188 |
+
if prop_details.get('type') == 'array' and 'items' in prop_details:
|
189 |
+
item_type = resolve_type(prop_details['items'], f"{prefix}_item")
|
190 |
+
return List[item_type] # type: ignore
|
191 |
+
|
192 |
+
# Handle object types with properties
|
193 |
+
if prop_details.get('type') == 'object' and 'properties' in prop_details:
|
194 |
+
nested_params = {}
|
195 |
+
for nested_name, nested_details in prop_details['properties'].items():
|
196 |
+
nested_type = resolve_type(nested_details, f"{prefix}_{nested_name}")
|
197 |
+
# Get required field info
|
198 |
+
required_fields = prop_details.get('required', [])
|
199 |
+
is_required = nested_name in required_fields
|
200 |
+
default_value = nested_details.get('default', ... if is_required else None)
|
201 |
+
description = nested_details.get('description', '')
|
202 |
+
|
203 |
+
field_kwargs = {'default': default_value}
|
204 |
+
if description:
|
205 |
+
field_kwargs['description'] = description
|
206 |
+
|
207 |
+
nested_params[nested_name] = (nested_type, Field(**field_kwargs))
|
208 |
+
|
209 |
+
# Create nested model
|
210 |
+
nested_model = create_model(f"{prefix}_Model", **nested_params)
|
211 |
+
return nested_model
|
212 |
+
|
213 |
+
# Handle union types (oneOf, anyOf)
|
214 |
+
if 'oneOf' in prop_details or 'anyOf' in prop_details:
|
215 |
+
union_schema = prop_details.get('oneOf') or prop_details.get('anyOf')
|
216 |
+
union_types = []
|
217 |
+
for i, t in enumerate(union_schema):
|
218 |
+
union_types.append(resolve_type(t, f"{prefix}_{i}"))
|
219 |
+
|
220 |
+
if union_types:
|
221 |
+
return Union.__getitem__(tuple(union_types)) # type: ignore
|
222 |
+
return Any
|
223 |
+
|
224 |
+
# Handle allOf (intersection types)
|
225 |
+
if 'allOf' in prop_details:
|
226 |
+
nested_params = {}
|
227 |
+
for i, schema_part in enumerate(prop_details['allOf']):
|
228 |
+
if 'properties' in schema_part:
|
229 |
+
for nested_name, nested_details in schema_part['properties'].items():
|
230 |
+
nested_type = resolve_type(nested_details, f"{prefix}_allOf_{i}_{nested_name}")
|
231 |
+
# Check if required
|
232 |
+
required_fields = schema_part.get('required', [])
|
233 |
+
is_required = nested_name in required_fields
|
234 |
+
nested_params[nested_name] = (nested_type, ... if is_required else None)
|
235 |
+
|
236 |
+
# Create composite model
|
237 |
+
if nested_params:
|
238 |
+
composite_model = create_model(f"{prefix}_CompositeModel", **nested_params)
|
239 |
+
return composite_model
|
240 |
+
return Dict
|
241 |
+
|
242 |
+
# Default to basic types
|
243 |
+
schema_type = prop_details.get('type', 'string')
|
244 |
+
if isinstance(schema_type, list):
|
245 |
+
# Handle multiple types (e.g., ["string", "null"])
|
246 |
+
non_null_types = [t for t in schema_type if t != 'null']
|
247 |
+
if non_null_types:
|
248 |
+
primary_type = type_mapping.get(non_null_types[0], Any)
|
249 |
+
if 'null' in schema_type:
|
250 |
+
return Optional[primary_type] # type: ignore
|
251 |
+
return primary_type
|
252 |
+
return Any
|
253 |
+
|
254 |
+
return type_mapping.get(schema_type, Any)
|
src/utils/utils.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import os
|
3 |
+
import time
|
4 |
+
from pathlib import Path
|
5 |
+
from typing import Dict, Optional
|
6 |
+
import requests
|
7 |
+
import json
|
8 |
+
import gradio as gr
|
9 |
+
import uuid
|
10 |
+
|
11 |
+
|
12 |
+
def encode_image(img_path):
|
13 |
+
if not img_path:
|
14 |
+
return None
|
15 |
+
with open(img_path, "rb") as fin:
|
16 |
+
image_data = base64.b64encode(fin.read()).decode("utf-8")
|
17 |
+
return image_data
|
18 |
+
|
19 |
+
|
20 |
+
def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Dict[str, Optional[str]]:
|
21 |
+
"""Get the latest recording and trace files"""
|
22 |
+
latest_files: Dict[str, Optional[str]] = {ext: None for ext in file_types}
|
23 |
+
|
24 |
+
if not os.path.exists(directory):
|
25 |
+
os.makedirs(directory, exist_ok=True)
|
26 |
+
return latest_files
|
27 |
+
|
28 |
+
for file_type in file_types:
|
29 |
+
try:
|
30 |
+
matches = list(Path(directory).rglob(f"*{file_type}"))
|
31 |
+
if matches:
|
32 |
+
latest = max(matches, key=lambda p: p.stat().st_mtime)
|
33 |
+
# Only return files that are complete (not being written)
|
34 |
+
if time.time() - latest.stat().st_mtime > 1.0:
|
35 |
+
latest_files[file_type] = str(latest)
|
36 |
+
except Exception as e:
|
37 |
+
print(f"Error getting latest {file_type} file: {e}")
|
38 |
+
|
39 |
+
return latest_files
|
src/webui/__init__.py
ADDED
File without changes
|
src/webui/components/__init__.py
ADDED
File without changes
|
src/webui/components/agent_settings_tab.py
ADDED
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
from gradio.components import Component
|
6 |
+
from typing import Any, Dict, Optional
|
7 |
+
from src.webui.webui_manager import WebuiManager
|
8 |
+
from src.utils import config
|
9 |
+
import logging
|
10 |
+
from functools import partial
|
11 |
+
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
|
15 |
+
def update_model_dropdown(llm_provider):
|
16 |
+
"""
|
17 |
+
Update the model name dropdown with predefined models for the selected provider.
|
18 |
+
"""
|
19 |
+
# Use predefined models for the selected provider
|
20 |
+
if llm_provider in config.model_names:
|
21 |
+
return gr.Dropdown(choices=config.model_names[llm_provider], value=config.model_names[llm_provider][0],
|
22 |
+
interactive=True)
|
23 |
+
else:
|
24 |
+
return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True)
|
25 |
+
|
26 |
+
|
27 |
+
async def update_mcp_server(mcp_file: str, webui_manager: WebuiManager):
|
28 |
+
"""
|
29 |
+
Update the MCP server.
|
30 |
+
"""
|
31 |
+
if hasattr(webui_manager, "bu_controller") and webui_manager.bu_controller:
|
32 |
+
logger.warning("⚠️ Close controller because mcp file has changed!")
|
33 |
+
await webui_manager.bu_controller.close_mcp_client()
|
34 |
+
webui_manager.bu_controller = None
|
35 |
+
|
36 |
+
if not mcp_file or not os.path.exists(mcp_file) or not mcp_file.endswith('.json'):
|
37 |
+
logger.warning(f"{mcp_file} is not a valid MCP file.")
|
38 |
+
return None, gr.update(visible=False)
|
39 |
+
|
40 |
+
with open(mcp_file, 'r') as f:
|
41 |
+
mcp_server = json.load(f)
|
42 |
+
|
43 |
+
return json.dumps(mcp_server, indent=2), gr.update(visible=True)
|
44 |
+
|
45 |
+
|
46 |
+
def create_agent_settings_tab(webui_manager: WebuiManager):
|
47 |
+
"""
|
48 |
+
Creates an agent settings tab.
|
49 |
+
"""
|
50 |
+
input_components = set(webui_manager.get_components())
|
51 |
+
tab_components = {}
|
52 |
+
|
53 |
+
with gr.Group():
|
54 |
+
with gr.Column():
|
55 |
+
override_system_prompt = gr.Textbox(label="Override system prompt", lines=4, interactive=True)
|
56 |
+
extend_system_prompt = gr.Textbox(label="Extend system prompt", lines=4, interactive=True)
|
57 |
+
|
58 |
+
with gr.Group():
|
59 |
+
mcp_json_file = gr.File(label="MCP server json", interactive=True, file_types=[".json"])
|
60 |
+
mcp_server_config = gr.Textbox(label="MCP server", lines=6, interactive=True, visible=False)
|
61 |
+
|
62 |
+
with gr.Group():
|
63 |
+
with gr.Row():
|
64 |
+
llm_provider = gr.Dropdown(
|
65 |
+
choices=[provider for provider, model in config.model_names.items()],
|
66 |
+
label="LLM Provider",
|
67 |
+
value=os.getenv("DEFAULT_LLM", "openai"),
|
68 |
+
info="Select LLM provider for LLM",
|
69 |
+
interactive=True
|
70 |
+
)
|
71 |
+
llm_model_name = gr.Dropdown(
|
72 |
+
label="LLM Model Name",
|
73 |
+
choices=config.model_names[os.getenv("DEFAULT_LLM", "openai")],
|
74 |
+
value=config.model_names[os.getenv("DEFAULT_LLM", "openai")][0],
|
75 |
+
interactive=True,
|
76 |
+
allow_custom_value=True,
|
77 |
+
info="Select a model in the dropdown options or directly type a custom model name"
|
78 |
+
)
|
79 |
+
with gr.Row():
|
80 |
+
llm_temperature = gr.Slider(
|
81 |
+
minimum=0.0,
|
82 |
+
maximum=2.0,
|
83 |
+
value=0.6,
|
84 |
+
step=0.1,
|
85 |
+
label="LLM Temperature",
|
86 |
+
info="Controls randomness in model outputs",
|
87 |
+
interactive=True
|
88 |
+
)
|
89 |
+
|
90 |
+
use_vision = gr.Checkbox(
|
91 |
+
label="Use Vision",
|
92 |
+
value=True,
|
93 |
+
info="Enable Vision(Input highlighted screenshot into LLM)",
|
94 |
+
interactive=True
|
95 |
+
)
|
96 |
+
|
97 |
+
ollama_num_ctx = gr.Slider(
|
98 |
+
minimum=2 ** 8,
|
99 |
+
maximum=2 ** 16,
|
100 |
+
value=16000,
|
101 |
+
step=1,
|
102 |
+
label="Ollama Context Length",
|
103 |
+
info="Controls max context length model needs to handle (less = faster)",
|
104 |
+
visible=False,
|
105 |
+
interactive=True
|
106 |
+
)
|
107 |
+
|
108 |
+
with gr.Row():
|
109 |
+
llm_base_url = gr.Textbox(
|
110 |
+
label="Base URL",
|
111 |
+
value="",
|
112 |
+
info="API endpoint URL (if required)"
|
113 |
+
)
|
114 |
+
llm_api_key = gr.Textbox(
|
115 |
+
label="API Key",
|
116 |
+
type="password",
|
117 |
+
value="",
|
118 |
+
info="Your API key (leave blank to use .env)"
|
119 |
+
)
|
120 |
+
|
121 |
+
with gr.Group():
|
122 |
+
with gr.Row():
|
123 |
+
planner_llm_provider = gr.Dropdown(
|
124 |
+
choices=[provider for provider, model in config.model_names.items()],
|
125 |
+
label="Planner LLM Provider",
|
126 |
+
info="Select LLM provider for LLM",
|
127 |
+
value=None,
|
128 |
+
interactive=True
|
129 |
+
)
|
130 |
+
planner_llm_model_name = gr.Dropdown(
|
131 |
+
label="Planner LLM Model Name",
|
132 |
+
interactive=True,
|
133 |
+
allow_custom_value=True,
|
134 |
+
info="Select a model in the dropdown options or directly type a custom model name"
|
135 |
+
)
|
136 |
+
with gr.Row():
|
137 |
+
planner_llm_temperature = gr.Slider(
|
138 |
+
minimum=0.0,
|
139 |
+
maximum=2.0,
|
140 |
+
value=0.6,
|
141 |
+
step=0.1,
|
142 |
+
label="Planner LLM Temperature",
|
143 |
+
info="Controls randomness in model outputs",
|
144 |
+
interactive=True
|
145 |
+
)
|
146 |
+
|
147 |
+
planner_use_vision = gr.Checkbox(
|
148 |
+
label="Use Vision(Planner LLM)",
|
149 |
+
value=False,
|
150 |
+
info="Enable Vision(Input highlighted screenshot into LLM)",
|
151 |
+
interactive=True
|
152 |
+
)
|
153 |
+
|
154 |
+
planner_ollama_num_ctx = gr.Slider(
|
155 |
+
minimum=2 ** 8,
|
156 |
+
maximum=2 ** 16,
|
157 |
+
value=16000,
|
158 |
+
step=1,
|
159 |
+
label="Ollama Context Length",
|
160 |
+
info="Controls max context length model needs to handle (less = faster)",
|
161 |
+
visible=False,
|
162 |
+
interactive=True
|
163 |
+
)
|
164 |
+
|
165 |
+
with gr.Row():
|
166 |
+
planner_llm_base_url = gr.Textbox(
|
167 |
+
label="Base URL",
|
168 |
+
value="",
|
169 |
+
info="API endpoint URL (if required)"
|
170 |
+
)
|
171 |
+
planner_llm_api_key = gr.Textbox(
|
172 |
+
label="API Key",
|
173 |
+
type="password",
|
174 |
+
value="",
|
175 |
+
info="Your API key (leave blank to use .env)"
|
176 |
+
)
|
177 |
+
|
178 |
+
with gr.Row():
|
179 |
+
max_steps = gr.Slider(
|
180 |
+
minimum=1,
|
181 |
+
maximum=1000,
|
182 |
+
value=100,
|
183 |
+
step=1,
|
184 |
+
label="Max Run Steps",
|
185 |
+
info="Maximum number of steps the agent will take",
|
186 |
+
interactive=True
|
187 |
+
)
|
188 |
+
max_actions = gr.Slider(
|
189 |
+
minimum=1,
|
190 |
+
maximum=100,
|
191 |
+
value=10,
|
192 |
+
step=1,
|
193 |
+
label="Max Number of Actions",
|
194 |
+
info="Maximum number of actions the agent will take per step",
|
195 |
+
interactive=True
|
196 |
+
)
|
197 |
+
|
198 |
+
with gr.Row():
|
199 |
+
max_input_tokens = gr.Number(
|
200 |
+
label="Max Input Tokens",
|
201 |
+
value=128000,
|
202 |
+
precision=0,
|
203 |
+
interactive=True
|
204 |
+
)
|
205 |
+
tool_calling_method = gr.Dropdown(
|
206 |
+
label="Tool Calling Method",
|
207 |
+
value="auto",
|
208 |
+
interactive=True,
|
209 |
+
allow_custom_value=True,
|
210 |
+
choices=['function_calling', 'json_mode', 'raw', 'auto', 'tools', "None"],
|
211 |
+
visible=True
|
212 |
+
)
|
213 |
+
tab_components.update(dict(
|
214 |
+
override_system_prompt=override_system_prompt,
|
215 |
+
extend_system_prompt=extend_system_prompt,
|
216 |
+
llm_provider=llm_provider,
|
217 |
+
llm_model_name=llm_model_name,
|
218 |
+
llm_temperature=llm_temperature,
|
219 |
+
use_vision=use_vision,
|
220 |
+
ollama_num_ctx=ollama_num_ctx,
|
221 |
+
llm_base_url=llm_base_url,
|
222 |
+
llm_api_key=llm_api_key,
|
223 |
+
planner_llm_provider=planner_llm_provider,
|
224 |
+
planner_llm_model_name=planner_llm_model_name,
|
225 |
+
planner_llm_temperature=planner_llm_temperature,
|
226 |
+
planner_use_vision=planner_use_vision,
|
227 |
+
planner_ollama_num_ctx=planner_ollama_num_ctx,
|
228 |
+
planner_llm_base_url=planner_llm_base_url,
|
229 |
+
planner_llm_api_key=planner_llm_api_key,
|
230 |
+
max_steps=max_steps,
|
231 |
+
max_actions=max_actions,
|
232 |
+
max_input_tokens=max_input_tokens,
|
233 |
+
tool_calling_method=tool_calling_method,
|
234 |
+
mcp_json_file=mcp_json_file,
|
235 |
+
mcp_server_config=mcp_server_config,
|
236 |
+
))
|
237 |
+
webui_manager.add_components("agent_settings", tab_components)
|
238 |
+
|
239 |
+
llm_provider.change(
|
240 |
+
fn=lambda x: gr.update(visible=x == "ollama"),
|
241 |
+
inputs=llm_provider,
|
242 |
+
outputs=ollama_num_ctx
|
243 |
+
)
|
244 |
+
llm_provider.change(
|
245 |
+
lambda provider: update_model_dropdown(provider),
|
246 |
+
inputs=[llm_provider],
|
247 |
+
outputs=[llm_model_name]
|
248 |
+
)
|
249 |
+
planner_llm_provider.change(
|
250 |
+
fn=lambda x: gr.update(visible=x == "ollama"),
|
251 |
+
inputs=[planner_llm_provider],
|
252 |
+
outputs=[planner_ollama_num_ctx]
|
253 |
+
)
|
254 |
+
planner_llm_provider.change(
|
255 |
+
lambda provider: update_model_dropdown(provider),
|
256 |
+
inputs=[planner_llm_provider],
|
257 |
+
outputs=[planner_llm_model_name]
|
258 |
+
)
|
259 |
+
|
260 |
+
async def update_wrapper(mcp_file):
|
261 |
+
"""Wrapper for handle_pause_resume."""
|
262 |
+
update_dict = await update_mcp_server(mcp_file, webui_manager)
|
263 |
+
yield update_dict
|
264 |
+
|
265 |
+
mcp_json_file.change(
|
266 |
+
update_wrapper,
|
267 |
+
inputs=[mcp_json_file],
|
268 |
+
outputs=[mcp_server_config, mcp_server_config]
|
269 |
+
)
|
src/webui/components/browser_settings_tab.py
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from distutils.util import strtobool
|
3 |
+
import gradio as gr
|
4 |
+
import logging
|
5 |
+
from gradio.components import Component
|
6 |
+
|
7 |
+
from src.webui.webui_manager import WebuiManager
|
8 |
+
from src.utils import config
|
9 |
+
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
async def close_browser(webui_manager: WebuiManager):
|
13 |
+
"""
|
14 |
+
Close browser
|
15 |
+
"""
|
16 |
+
if webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
|
17 |
+
webui_manager.bu_current_task.cancel()
|
18 |
+
webui_manager.bu_current_task = None
|
19 |
+
|
20 |
+
if webui_manager.bu_browser_context:
|
21 |
+
logger.info("⚠️ Closing browser context when changing browser config.")
|
22 |
+
await webui_manager.bu_browser_context.close()
|
23 |
+
webui_manager.bu_browser_context = None
|
24 |
+
|
25 |
+
if webui_manager.bu_browser:
|
26 |
+
logger.info("⚠️ Closing browser when changing browser config.")
|
27 |
+
await webui_manager.bu_browser.close()
|
28 |
+
webui_manager.bu_browser = None
|
29 |
+
|
30 |
+
def create_browser_settings_tab(webui_manager: WebuiManager):
|
31 |
+
"""
|
32 |
+
Creates a browser settings tab.
|
33 |
+
"""
|
34 |
+
input_components = set(webui_manager.get_components())
|
35 |
+
tab_components = {}
|
36 |
+
|
37 |
+
with gr.Group():
|
38 |
+
with gr.Row():
|
39 |
+
browser_binary_path = gr.Textbox(
|
40 |
+
label="Browser Binary Path",
|
41 |
+
lines=1,
|
42 |
+
interactive=True,
|
43 |
+
placeholder="e.g. '/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome'"
|
44 |
+
)
|
45 |
+
browser_user_data_dir = gr.Textbox(
|
46 |
+
label="Browser User Data Dir",
|
47 |
+
lines=1,
|
48 |
+
interactive=True,
|
49 |
+
placeholder="Leave it empty if you use your default user data",
|
50 |
+
)
|
51 |
+
with gr.Group():
|
52 |
+
with gr.Row():
|
53 |
+
use_own_browser = gr.Checkbox(
|
54 |
+
label="Use Own Browser",
|
55 |
+
value=bool(strtobool(os.getenv("USE_OWN_BROWSER", "false"))),
|
56 |
+
info="Use your existing browser instance",
|
57 |
+
interactive=True
|
58 |
+
)
|
59 |
+
keep_browser_open = gr.Checkbox(
|
60 |
+
label="Keep Browser Open",
|
61 |
+
value=bool(strtobool(os.getenv("KEEP_BROWSER_OPEN", "true"))),
|
62 |
+
info="Keep Browser Open between Tasks",
|
63 |
+
interactive=True
|
64 |
+
)
|
65 |
+
headless = gr.Checkbox(
|
66 |
+
label="Headless Mode",
|
67 |
+
value=False,
|
68 |
+
info="Run browser without GUI",
|
69 |
+
interactive=True
|
70 |
+
)
|
71 |
+
disable_security = gr.Checkbox(
|
72 |
+
label="Disable Security",
|
73 |
+
value=False,
|
74 |
+
info="Disable browser security",
|
75 |
+
interactive=True
|
76 |
+
)
|
77 |
+
|
78 |
+
with gr.Group():
|
79 |
+
with gr.Row():
|
80 |
+
window_w = gr.Number(
|
81 |
+
label="Window Width",
|
82 |
+
value=1280,
|
83 |
+
info="Browser window width",
|
84 |
+
interactive=True
|
85 |
+
)
|
86 |
+
window_h = gr.Number(
|
87 |
+
label="Window Height",
|
88 |
+
value=1100,
|
89 |
+
info="Browser window height",
|
90 |
+
interactive=True
|
91 |
+
)
|
92 |
+
with gr.Group():
|
93 |
+
with gr.Row():
|
94 |
+
cdp_url = gr.Textbox(
|
95 |
+
label="CDP URL",
|
96 |
+
value=os.getenv("BROWSER_CDP", None),
|
97 |
+
info="CDP URL for browser remote debugging",
|
98 |
+
interactive=True,
|
99 |
+
)
|
100 |
+
wss_url = gr.Textbox(
|
101 |
+
label="WSS URL",
|
102 |
+
info="WSS URL for browser remote debugging",
|
103 |
+
interactive=True,
|
104 |
+
)
|
105 |
+
with gr.Group():
|
106 |
+
with gr.Row():
|
107 |
+
save_recording_path = gr.Textbox(
|
108 |
+
label="Recording Path",
|
109 |
+
placeholder="e.g. ./tmp/record_videos",
|
110 |
+
info="Path to save browser recordings",
|
111 |
+
interactive=True,
|
112 |
+
)
|
113 |
+
|
114 |
+
save_trace_path = gr.Textbox(
|
115 |
+
label="Trace Path",
|
116 |
+
placeholder="e.g. ./tmp/traces",
|
117 |
+
info="Path to save Agent traces",
|
118 |
+
interactive=True,
|
119 |
+
)
|
120 |
+
|
121 |
+
with gr.Row():
|
122 |
+
save_agent_history_path = gr.Textbox(
|
123 |
+
label="Agent History Save Path",
|
124 |
+
value="./tmp/agent_history",
|
125 |
+
info="Specify the directory where agent history should be saved.",
|
126 |
+
interactive=True,
|
127 |
+
)
|
128 |
+
save_download_path = gr.Textbox(
|
129 |
+
label="Save Directory for browser downloads",
|
130 |
+
value="./tmp/downloads",
|
131 |
+
info="Specify the directory where downloaded files should be saved.",
|
132 |
+
interactive=True,
|
133 |
+
)
|
134 |
+
tab_components.update(
|
135 |
+
dict(
|
136 |
+
browser_binary_path=browser_binary_path,
|
137 |
+
browser_user_data_dir=browser_user_data_dir,
|
138 |
+
use_own_browser=use_own_browser,
|
139 |
+
keep_browser_open=keep_browser_open,
|
140 |
+
headless=headless,
|
141 |
+
disable_security=disable_security,
|
142 |
+
save_recording_path=save_recording_path,
|
143 |
+
save_trace_path=save_trace_path,
|
144 |
+
save_agent_history_path=save_agent_history_path,
|
145 |
+
save_download_path=save_download_path,
|
146 |
+
cdp_url=cdp_url,
|
147 |
+
wss_url=wss_url,
|
148 |
+
window_h=window_h,
|
149 |
+
window_w=window_w,
|
150 |
+
)
|
151 |
+
)
|
152 |
+
webui_manager.add_components("browser_settings", tab_components)
|
153 |
+
|
154 |
+
async def close_wrapper():
|
155 |
+
"""Wrapper for handle_clear."""
|
156 |
+
await close_browser(webui_manager)
|
157 |
+
|
158 |
+
headless.change(close_wrapper)
|
159 |
+
keep_browser_open.change(close_wrapper)
|
160 |
+
disable_security.change(close_wrapper)
|
161 |
+
use_own_browser.change(close_wrapper)
|
src/webui/components/browser_use_agent_tab.py
ADDED
@@ -0,0 +1,1083 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import json
|
3 |
+
import logging
|
4 |
+
import os
|
5 |
+
import uuid
|
6 |
+
from typing import Any, AsyncGenerator, Dict, Optional
|
7 |
+
|
8 |
+
import gradio as gr
|
9 |
+
|
10 |
+
# from browser_use.agent.service import Agent
|
11 |
+
from browser_use.agent.views import (
|
12 |
+
AgentHistoryList,
|
13 |
+
AgentOutput,
|
14 |
+
)
|
15 |
+
from browser_use.browser.browser import BrowserConfig
|
16 |
+
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
17 |
+
from browser_use.browser.views import BrowserState
|
18 |
+
from gradio.components import Component
|
19 |
+
from langchain_core.language_models.chat_models import BaseChatModel
|
20 |
+
|
21 |
+
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
22 |
+
from src.browser.custom_browser import CustomBrowser
|
23 |
+
from src.controller.custom_controller import CustomController
|
24 |
+
from src.utils import llm_provider
|
25 |
+
from src.webui.webui_manager import WebuiManager
|
26 |
+
|
27 |
+
logger = logging.getLogger(__name__)
|
28 |
+
|
29 |
+
|
30 |
+
# --- Helper Functions --- (Defined at module level)
|
31 |
+
|
32 |
+
|
33 |
+
async def _initialize_llm(
|
34 |
+
provider: Optional[str],
|
35 |
+
model_name: Optional[str],
|
36 |
+
temperature: float,
|
37 |
+
base_url: Optional[str],
|
38 |
+
api_key: Optional[str],
|
39 |
+
num_ctx: Optional[int] = None,
|
40 |
+
) -> Optional[BaseChatModel]:
|
41 |
+
"""Initializes the LLM based on settings. Returns None if provider/model is missing."""
|
42 |
+
if not provider or not model_name:
|
43 |
+
logger.info("LLM Provider or Model Name not specified, LLM will be None.")
|
44 |
+
return None
|
45 |
+
try:
|
46 |
+
# Use your actual LLM provider logic here
|
47 |
+
logger.info(
|
48 |
+
f"Initializing LLM: Provider={provider}, Model={model_name}, Temp={temperature}"
|
49 |
+
)
|
50 |
+
# Example using a placeholder function
|
51 |
+
llm = llm_provider.get_llm_model(
|
52 |
+
provider=provider,
|
53 |
+
model_name=model_name,
|
54 |
+
temperature=temperature,
|
55 |
+
base_url=base_url or None,
|
56 |
+
api_key=api_key or None,
|
57 |
+
# Add other relevant params like num_ctx for ollama
|
58 |
+
num_ctx=num_ctx if provider == "ollama" else None,
|
59 |
+
)
|
60 |
+
return llm
|
61 |
+
except Exception as e:
|
62 |
+
logger.error(f"Failed to initialize LLM: {e}", exc_info=True)
|
63 |
+
gr.Warning(
|
64 |
+
f"Failed to initialize LLM '{model_name}' for provider '{provider}'. Please check settings. Error: {e}"
|
65 |
+
)
|
66 |
+
return None
|
67 |
+
|
68 |
+
|
69 |
+
def _get_config_value(
|
70 |
+
webui_manager: WebuiManager,
|
71 |
+
comp_dict: Dict[gr.components.Component, Any],
|
72 |
+
comp_id_suffix: str,
|
73 |
+
default: Any = None,
|
74 |
+
) -> Any:
|
75 |
+
"""Safely get value from component dictionary using its ID suffix relative to the tab."""
|
76 |
+
# Assumes component ID format is "tab_name.comp_name"
|
77 |
+
tab_name = "browser_use_agent" # Hardcode or derive if needed
|
78 |
+
comp_id = f"{tab_name}.{comp_id_suffix}"
|
79 |
+
# Need to find the component object first using the ID from the manager
|
80 |
+
try:
|
81 |
+
comp = webui_manager.get_component_by_id(comp_id)
|
82 |
+
return comp_dict.get(comp, default)
|
83 |
+
except KeyError:
|
84 |
+
# Try accessing settings tabs as well
|
85 |
+
for prefix in ["agent_settings", "browser_settings"]:
|
86 |
+
try:
|
87 |
+
comp_id = f"{prefix}.{comp_id_suffix}"
|
88 |
+
comp = webui_manager.get_component_by_id(comp_id)
|
89 |
+
return comp_dict.get(comp, default)
|
90 |
+
except KeyError:
|
91 |
+
continue
|
92 |
+
logger.warning(
|
93 |
+
f"Component with suffix '{comp_id_suffix}' not found in manager for value lookup."
|
94 |
+
)
|
95 |
+
return default
|
96 |
+
|
97 |
+
|
98 |
+
def _format_agent_output(model_output: AgentOutput) -> str:
|
99 |
+
"""Formats AgentOutput for display in the chatbot using JSON."""
|
100 |
+
content = ""
|
101 |
+
if model_output:
|
102 |
+
try:
|
103 |
+
# Directly use model_dump if actions and current_state are Pydantic models
|
104 |
+
action_dump = [
|
105 |
+
action.model_dump(exclude_none=True) for action in model_output.action
|
106 |
+
]
|
107 |
+
|
108 |
+
state_dump = model_output.current_state.model_dump(exclude_none=True)
|
109 |
+
model_output_dump = {
|
110 |
+
"current_state": state_dump,
|
111 |
+
"action": action_dump,
|
112 |
+
}
|
113 |
+
# Dump to JSON string with indentation
|
114 |
+
json_string = json.dumps(model_output_dump, indent=4, ensure_ascii=False)
|
115 |
+
# Wrap in <pre><code> for proper display in HTML
|
116 |
+
content = f"<pre><code class='language-json'>{json_string}</code></pre>"
|
117 |
+
|
118 |
+
except AttributeError as ae:
|
119 |
+
logger.error(
|
120 |
+
f"AttributeError during model dump: {ae}. Check if 'action' or 'current_state' or their items support 'model_dump'."
|
121 |
+
)
|
122 |
+
content = f"<pre><code>Error: Could not format agent output (AttributeError: {ae}).\nRaw output: {str(model_output)}</code></pre>"
|
123 |
+
except Exception as e:
|
124 |
+
logger.error(f"Error formatting agent output: {e}", exc_info=True)
|
125 |
+
# Fallback to simple string representation on error
|
126 |
+
content = f"<pre><code>Error formatting agent output.\nRaw output:\n{str(model_output)}</code></pre>"
|
127 |
+
|
128 |
+
return content.strip()
|
129 |
+
|
130 |
+
|
131 |
+
# --- Updated Callback Implementation ---
|
132 |
+
|
133 |
+
|
134 |
+
async def _handle_new_step(
|
135 |
+
webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
|
136 |
+
):
|
137 |
+
"""Callback for each step taken by the agent, including screenshot display."""
|
138 |
+
|
139 |
+
# Use the correct chat history attribute name from the user's code
|
140 |
+
if not hasattr(webui_manager, "bu_chat_history"):
|
141 |
+
logger.error(
|
142 |
+
"Attribute 'bu_chat_history' not found in webui_manager! Cannot add chat message."
|
143 |
+
)
|
144 |
+
# Initialize it maybe? Or raise an error? For now, log and potentially skip chat update.
|
145 |
+
webui_manager.bu_chat_history = [] # Initialize if missing (consider if this is the right place)
|
146 |
+
# return # Or stop if this is critical
|
147 |
+
step_num -= 1
|
148 |
+
logger.info(f"Step {step_num} completed.")
|
149 |
+
|
150 |
+
# --- Screenshot Handling ---
|
151 |
+
screenshot_html = ""
|
152 |
+
# Ensure state.screenshot exists and is not empty before proceeding
|
153 |
+
# Use getattr for safer access
|
154 |
+
screenshot_data = getattr(state, "screenshot", None)
|
155 |
+
if screenshot_data:
|
156 |
+
try:
|
157 |
+
# Basic validation: check if it looks like base64
|
158 |
+
if (
|
159 |
+
isinstance(screenshot_data, str) and len(screenshot_data) > 100
|
160 |
+
): # Arbitrary length check
|
161 |
+
# *** UPDATED STYLE: Removed centering, adjusted width ***
|
162 |
+
img_tag = f'<img src="data:image/jpeg;base64,{screenshot_data}" alt="Step {step_num} Screenshot" style="max-width: 800px; max-height: 600px; object-fit:contain;" />'
|
163 |
+
screenshot_html = (
|
164 |
+
img_tag + "<br/>"
|
165 |
+
) # Use <br/> for line break after inline-block image
|
166 |
+
else:
|
167 |
+
logger.warning(
|
168 |
+
f"Screenshot for step {step_num} seems invalid (type: {type(screenshot_data)}, len: {len(screenshot_data) if isinstance(screenshot_data, str) else 'N/A'})."
|
169 |
+
)
|
170 |
+
screenshot_html = "**[Invalid screenshot data]**<br/>"
|
171 |
+
|
172 |
+
except Exception as e:
|
173 |
+
logger.error(
|
174 |
+
f"Error processing or formatting screenshot for step {step_num}: {e}",
|
175 |
+
exc_info=True,
|
176 |
+
)
|
177 |
+
screenshot_html = "**[Error displaying screenshot]**<br/>"
|
178 |
+
else:
|
179 |
+
logger.debug(f"No screenshot available for step {step_num}.")
|
180 |
+
|
181 |
+
# --- Format Agent Output ---
|
182 |
+
formatted_output = _format_agent_output(output) # Use the updated function
|
183 |
+
|
184 |
+
# --- Combine and Append to Chat ---
|
185 |
+
step_header = f"--- **Step {step_num}** ---"
|
186 |
+
# Combine header, image (with line break), and JSON block
|
187 |
+
final_content = step_header + "<br/>" + screenshot_html + formatted_output
|
188 |
+
|
189 |
+
chat_message = {
|
190 |
+
"role": "assistant",
|
191 |
+
"content": final_content.strip(), # Remove leading/trailing whitespace
|
192 |
+
}
|
193 |
+
|
194 |
+
# Append to the correct chat history list
|
195 |
+
webui_manager.bu_chat_history.append(chat_message)
|
196 |
+
|
197 |
+
await asyncio.sleep(0.05)
|
198 |
+
|
199 |
+
|
200 |
+
def _handle_done(webui_manager: WebuiManager, history: AgentHistoryList):
|
201 |
+
"""Callback when the agent finishes the task (success or failure)."""
|
202 |
+
logger.info(
|
203 |
+
f"Agent task finished. Duration: {history.total_duration_seconds():.2f}s, Tokens: {history.total_input_tokens()}"
|
204 |
+
)
|
205 |
+
final_summary = "**Task Completed**\n"
|
206 |
+
final_summary += f"- Duration: {history.total_duration_seconds():.2f} seconds\n"
|
207 |
+
final_summary += f"- Total Input Tokens: {history.total_input_tokens()}\n" # Or total tokens if available
|
208 |
+
|
209 |
+
final_result = history.final_result()
|
210 |
+
if final_result:
|
211 |
+
final_summary += f"- Final Result: {final_result}\n"
|
212 |
+
|
213 |
+
errors = history.errors()
|
214 |
+
if errors and any(errors):
|
215 |
+
final_summary += f"- **Errors:**\n```\n{errors}\n```\n"
|
216 |
+
else:
|
217 |
+
final_summary += "- Status: Success\n"
|
218 |
+
|
219 |
+
webui_manager.bu_chat_history.append(
|
220 |
+
{"role": "assistant", "content": final_summary}
|
221 |
+
)
|
222 |
+
|
223 |
+
|
224 |
+
async def _ask_assistant_callback(
|
225 |
+
webui_manager: WebuiManager, query: str, browser_context: BrowserContext
|
226 |
+
) -> Dict[str, Any]:
|
227 |
+
"""Callback triggered by the agent's ask_for_assistant action."""
|
228 |
+
logger.info("Agent requires assistance. Waiting for user input.")
|
229 |
+
|
230 |
+
if not hasattr(webui_manager, "_chat_history"):
|
231 |
+
logger.error("Chat history not found in webui_manager during ask_assistant!")
|
232 |
+
return {"response": "Internal Error: Cannot display help request."}
|
233 |
+
|
234 |
+
webui_manager.bu_chat_history.append(
|
235 |
+
{
|
236 |
+
"role": "assistant",
|
237 |
+
"content": f"**Need Help:** {query}\nPlease provide information or perform the required action in the browser, then type your response/confirmation below and click 'Submit Response'.",
|
238 |
+
}
|
239 |
+
)
|
240 |
+
|
241 |
+
# Use state stored in webui_manager
|
242 |
+
webui_manager.bu_response_event = asyncio.Event()
|
243 |
+
webui_manager.bu_user_help_response = None # Reset previous response
|
244 |
+
|
245 |
+
try:
|
246 |
+
logger.info("Waiting for user response event...")
|
247 |
+
await asyncio.wait_for(
|
248 |
+
webui_manager.bu_response_event.wait(), timeout=3600.0
|
249 |
+
) # Long timeout
|
250 |
+
logger.info("User response event received.")
|
251 |
+
except asyncio.TimeoutError:
|
252 |
+
logger.warning("Timeout waiting for user assistance.")
|
253 |
+
webui_manager.bu_chat_history.append(
|
254 |
+
{
|
255 |
+
"role": "assistant",
|
256 |
+
"content": "**Timeout:** No response received. Trying to proceed.",
|
257 |
+
}
|
258 |
+
)
|
259 |
+
webui_manager.bu_response_event = None # Clear the event
|
260 |
+
return {"response": "Timeout: User did not respond."} # Inform the agent
|
261 |
+
|
262 |
+
response = webui_manager.bu_user_help_response
|
263 |
+
webui_manager.bu_chat_history.append(
|
264 |
+
{"role": "user", "content": response}
|
265 |
+
) # Show user response in chat
|
266 |
+
webui_manager.bu_response_event = (
|
267 |
+
None # Clear the event for the next potential request
|
268 |
+
)
|
269 |
+
return {"response": response}
|
270 |
+
|
271 |
+
|
272 |
+
# --- Core Agent Execution Logic --- (Needs access to webui_manager)
|
273 |
+
|
274 |
+
|
275 |
+
async def run_agent_task(
|
276 |
+
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
277 |
+
) -> AsyncGenerator[Dict[gr.components.Component, Any], None]:
|
278 |
+
"""Handles the entire lifecycle of initializing and running the agent."""
|
279 |
+
|
280 |
+
# --- Get Components ---
|
281 |
+
# Need handles to specific UI components to update them
|
282 |
+
user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
|
283 |
+
run_button_comp = webui_manager.get_component_by_id("browser_use_agent.run_button")
|
284 |
+
stop_button_comp = webui_manager.get_component_by_id(
|
285 |
+
"browser_use_agent.stop_button"
|
286 |
+
)
|
287 |
+
pause_resume_button_comp = webui_manager.get_component_by_id(
|
288 |
+
"browser_use_agent.pause_resume_button"
|
289 |
+
)
|
290 |
+
clear_button_comp = webui_manager.get_component_by_id(
|
291 |
+
"browser_use_agent.clear_button"
|
292 |
+
)
|
293 |
+
chatbot_comp = webui_manager.get_component_by_id("browser_use_agent.chatbot")
|
294 |
+
history_file_comp = webui_manager.get_component_by_id(
|
295 |
+
"browser_use_agent.agent_history_file"
|
296 |
+
)
|
297 |
+
gif_comp = webui_manager.get_component_by_id("browser_use_agent.recording_gif")
|
298 |
+
browser_view_comp = webui_manager.get_component_by_id(
|
299 |
+
"browser_use_agent.browser_view"
|
300 |
+
)
|
301 |
+
|
302 |
+
# --- 1. Get Task and Initial UI Update ---
|
303 |
+
task = components.get(user_input_comp, "").strip()
|
304 |
+
if not task:
|
305 |
+
gr.Warning("Please enter a task.")
|
306 |
+
yield {run_button_comp: gr.update(interactive=True)}
|
307 |
+
return
|
308 |
+
|
309 |
+
# Set running state indirectly via _current_task
|
310 |
+
webui_manager.bu_chat_history.append({"role": "user", "content": task})
|
311 |
+
|
312 |
+
yield {
|
313 |
+
user_input_comp: gr.Textbox(
|
314 |
+
value="", interactive=False, placeholder="Agent is running..."
|
315 |
+
),
|
316 |
+
run_button_comp: gr.Button(value="⏳ Running...", interactive=False),
|
317 |
+
stop_button_comp: gr.Button(interactive=True),
|
318 |
+
pause_resume_button_comp: gr.Button(value="⏸️ Pause", interactive=True),
|
319 |
+
clear_button_comp: gr.Button(interactive=False),
|
320 |
+
chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
|
321 |
+
history_file_comp: gr.update(value=None),
|
322 |
+
gif_comp: gr.update(value=None),
|
323 |
+
}
|
324 |
+
|
325 |
+
# --- Agent Settings ---
|
326 |
+
# Access settings values via components dict, getting IDs from webui_manager
|
327 |
+
def get_setting(key, default=None):
|
328 |
+
comp = webui_manager.id_to_component.get(f"agent_settings.{key}")
|
329 |
+
return components.get(comp, default) if comp else default
|
330 |
+
|
331 |
+
override_system_prompt = get_setting("override_system_prompt") or None
|
332 |
+
extend_system_prompt = get_setting("extend_system_prompt") or None
|
333 |
+
llm_provider_name = get_setting(
|
334 |
+
"llm_provider", None
|
335 |
+
) # Default to None if not found
|
336 |
+
llm_model_name = get_setting("llm_model_name", None)
|
337 |
+
llm_temperature = get_setting("llm_temperature", 0.6)
|
338 |
+
use_vision = get_setting("use_vision", True)
|
339 |
+
ollama_num_ctx = get_setting("ollama_num_ctx", 16000)
|
340 |
+
llm_base_url = get_setting("llm_base_url") or None
|
341 |
+
llm_api_key = get_setting("llm_api_key") or None
|
342 |
+
max_steps = get_setting("max_steps", 100)
|
343 |
+
max_actions = get_setting("max_actions", 10)
|
344 |
+
max_input_tokens = get_setting("max_input_tokens", 128000)
|
345 |
+
tool_calling_str = get_setting("tool_calling_method", "auto")
|
346 |
+
tool_calling_method = tool_calling_str if tool_calling_str != "None" else None
|
347 |
+
mcp_server_config_comp = webui_manager.id_to_component.get(
|
348 |
+
"agent_settings.mcp_server_config"
|
349 |
+
)
|
350 |
+
mcp_server_config_str = (
|
351 |
+
components.get(mcp_server_config_comp) if mcp_server_config_comp else None
|
352 |
+
)
|
353 |
+
mcp_server_config = (
|
354 |
+
json.loads(mcp_server_config_str) if mcp_server_config_str else None
|
355 |
+
)
|
356 |
+
|
357 |
+
# Planner LLM Settings (Optional)
|
358 |
+
planner_llm_provider_name = get_setting("planner_llm_provider") or None
|
359 |
+
planner_llm = None
|
360 |
+
planner_use_vision = False
|
361 |
+
if planner_llm_provider_name:
|
362 |
+
planner_llm_model_name = get_setting("planner_llm_model_name")
|
363 |
+
planner_llm_temperature = get_setting("planner_llm_temperature", 0.6)
|
364 |
+
planner_ollama_num_ctx = get_setting("planner_ollama_num_ctx", 16000)
|
365 |
+
planner_llm_base_url = get_setting("planner_llm_base_url") or None
|
366 |
+
planner_llm_api_key = get_setting("planner_llm_api_key") or None
|
367 |
+
planner_use_vision = get_setting("planner_use_vision", False)
|
368 |
+
|
369 |
+
planner_llm = await _initialize_llm(
|
370 |
+
planner_llm_provider_name,
|
371 |
+
planner_llm_model_name,
|
372 |
+
planner_llm_temperature,
|
373 |
+
planner_llm_base_url,
|
374 |
+
planner_llm_api_key,
|
375 |
+
planner_ollama_num_ctx if planner_llm_provider_name == "ollama" else None,
|
376 |
+
)
|
377 |
+
|
378 |
+
# --- Browser Settings ---
|
379 |
+
def get_browser_setting(key, default=None):
|
380 |
+
comp = webui_manager.id_to_component.get(f"browser_settings.{key}")
|
381 |
+
return components.get(comp, default) if comp else default
|
382 |
+
|
383 |
+
browser_binary_path = get_browser_setting("browser_binary_path") or None
|
384 |
+
browser_user_data_dir = get_browser_setting("browser_user_data_dir") or None
|
385 |
+
use_own_browser = get_browser_setting(
|
386 |
+
"use_own_browser", False
|
387 |
+
) # Logic handled by CDP/WSS presence
|
388 |
+
keep_browser_open = get_browser_setting("keep_browser_open", False)
|
389 |
+
headless = get_browser_setting("headless", False)
|
390 |
+
disable_security = get_browser_setting("disable_security", False)
|
391 |
+
window_w = int(get_browser_setting("window_w", 1280))
|
392 |
+
window_h = int(get_browser_setting("window_h", 1100))
|
393 |
+
cdp_url = get_browser_setting("cdp_url") or None
|
394 |
+
wss_url = get_browser_setting("wss_url") or None
|
395 |
+
save_recording_path = get_browser_setting("save_recording_path") or None
|
396 |
+
save_trace_path = get_browser_setting("save_trace_path") or None
|
397 |
+
save_agent_history_path = get_browser_setting(
|
398 |
+
"save_agent_history_path", "./tmp/agent_history"
|
399 |
+
)
|
400 |
+
save_download_path = get_browser_setting("save_download_path", "./tmp/downloads")
|
401 |
+
|
402 |
+
stream_vw = 70
|
403 |
+
stream_vh = int(70 * window_h // window_w)
|
404 |
+
|
405 |
+
os.makedirs(save_agent_history_path, exist_ok=True)
|
406 |
+
if save_recording_path:
|
407 |
+
os.makedirs(save_recording_path, exist_ok=True)
|
408 |
+
if save_trace_path:
|
409 |
+
os.makedirs(save_trace_path, exist_ok=True)
|
410 |
+
if save_download_path:
|
411 |
+
os.makedirs(save_download_path, exist_ok=True)
|
412 |
+
|
413 |
+
# --- 2. Initialize LLM ---
|
414 |
+
main_llm = await _initialize_llm(
|
415 |
+
llm_provider_name,
|
416 |
+
llm_model_name,
|
417 |
+
llm_temperature,
|
418 |
+
llm_base_url,
|
419 |
+
llm_api_key,
|
420 |
+
ollama_num_ctx if llm_provider_name == "ollama" else None,
|
421 |
+
)
|
422 |
+
|
423 |
+
# Pass the webui_manager instance to the callback when wrapping it
|
424 |
+
async def ask_callback_wrapper(
|
425 |
+
query: str, browser_context: BrowserContext
|
426 |
+
) -> Dict[str, Any]:
|
427 |
+
return await _ask_assistant_callback(webui_manager, query, browser_context)
|
428 |
+
|
429 |
+
if not webui_manager.bu_controller:
|
430 |
+
webui_manager.bu_controller = CustomController(
|
431 |
+
ask_assistant_callback=ask_callback_wrapper
|
432 |
+
)
|
433 |
+
await webui_manager.bu_controller.setup_mcp_client(mcp_server_config)
|
434 |
+
|
435 |
+
# --- 4. Initialize Browser and Context ---
|
436 |
+
should_close_browser_on_finish = not keep_browser_open
|
437 |
+
|
438 |
+
try:
|
439 |
+
# Close existing resources if not keeping open
|
440 |
+
if not keep_browser_open:
|
441 |
+
if webui_manager.bu_browser_context:
|
442 |
+
logger.info("Closing previous browser context.")
|
443 |
+
await webui_manager.bu_browser_context.close()
|
444 |
+
webui_manager.bu_browser_context = None
|
445 |
+
if webui_manager.bu_browser:
|
446 |
+
logger.info("Closing previous browser.")
|
447 |
+
await webui_manager.bu_browser.close()
|
448 |
+
webui_manager.bu_browser = None
|
449 |
+
|
450 |
+
# Create Browser if needed
|
451 |
+
if not webui_manager.bu_browser:
|
452 |
+
logger.info("Launching new browser instance.")
|
453 |
+
extra_args = []
|
454 |
+
if use_own_browser:
|
455 |
+
browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
|
456 |
+
if browser_binary_path == "":
|
457 |
+
browser_binary_path = None
|
458 |
+
browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
|
459 |
+
if browser_user_data:
|
460 |
+
extra_args += [f"--user-data-dir={browser_user_data}"]
|
461 |
+
else:
|
462 |
+
browser_binary_path = None
|
463 |
+
|
464 |
+
webui_manager.bu_browser = CustomBrowser(
|
465 |
+
config=BrowserConfig(
|
466 |
+
headless=headless,
|
467 |
+
disable_security=disable_security,
|
468 |
+
browser_binary_path=browser_binary_path,
|
469 |
+
extra_browser_args=extra_args,
|
470 |
+
wss_url=wss_url,
|
471 |
+
cdp_url=cdp_url,
|
472 |
+
new_context_config=BrowserContextConfig(
|
473 |
+
window_width=window_w,
|
474 |
+
window_height=window_h,
|
475 |
+
)
|
476 |
+
)
|
477 |
+
)
|
478 |
+
|
479 |
+
# Create Context if needed
|
480 |
+
if not webui_manager.bu_browser_context:
|
481 |
+
logger.info("Creating new browser context.")
|
482 |
+
context_config = BrowserContextConfig(
|
483 |
+
trace_path=save_trace_path if save_trace_path else None,
|
484 |
+
save_recording_path=save_recording_path
|
485 |
+
if save_recording_path
|
486 |
+
else None,
|
487 |
+
save_downloads_path=save_download_path if save_download_path else None,
|
488 |
+
window_height=window_h,
|
489 |
+
window_width=window_w,
|
490 |
+
)
|
491 |
+
if not webui_manager.bu_browser:
|
492 |
+
raise ValueError("Browser not initialized, cannot create context.")
|
493 |
+
webui_manager.bu_browser_context = (
|
494 |
+
await webui_manager.bu_browser.new_context(config=context_config)
|
495 |
+
)
|
496 |
+
|
497 |
+
# --- 5. Initialize or Update Agent ---
|
498 |
+
webui_manager.bu_agent_task_id = str(uuid.uuid4()) # New ID for this task run
|
499 |
+
os.makedirs(
|
500 |
+
os.path.join(save_agent_history_path, webui_manager.bu_agent_task_id),
|
501 |
+
exist_ok=True,
|
502 |
+
)
|
503 |
+
history_file = os.path.join(
|
504 |
+
save_agent_history_path,
|
505 |
+
webui_manager.bu_agent_task_id,
|
506 |
+
f"{webui_manager.bu_agent_task_id}.json",
|
507 |
+
)
|
508 |
+
gif_path = os.path.join(
|
509 |
+
save_agent_history_path,
|
510 |
+
webui_manager.bu_agent_task_id,
|
511 |
+
f"{webui_manager.bu_agent_task_id}.gif",
|
512 |
+
)
|
513 |
+
|
514 |
+
# Pass the webui_manager to callbacks when wrapping them
|
515 |
+
async def step_callback_wrapper(
|
516 |
+
state: BrowserState, output: AgentOutput, step_num: int
|
517 |
+
):
|
518 |
+
await _handle_new_step(webui_manager, state, output, step_num)
|
519 |
+
|
520 |
+
def done_callback_wrapper(history: AgentHistoryList):
|
521 |
+
_handle_done(webui_manager, history)
|
522 |
+
|
523 |
+
if not webui_manager.bu_agent:
|
524 |
+
logger.info(f"Initializing new agent for task: {task}")
|
525 |
+
if not webui_manager.bu_browser or not webui_manager.bu_browser_context:
|
526 |
+
raise ValueError(
|
527 |
+
"Browser or Context not initialized, cannot create agent."
|
528 |
+
)
|
529 |
+
webui_manager.bu_agent = BrowserUseAgent(
|
530 |
+
task=task,
|
531 |
+
llm=main_llm,
|
532 |
+
browser=webui_manager.bu_browser,
|
533 |
+
browser_context=webui_manager.bu_browser_context,
|
534 |
+
controller=webui_manager.bu_controller,
|
535 |
+
register_new_step_callback=step_callback_wrapper,
|
536 |
+
register_done_callback=done_callback_wrapper,
|
537 |
+
use_vision=use_vision,
|
538 |
+
override_system_message=override_system_prompt,
|
539 |
+
extend_system_message=extend_system_prompt,
|
540 |
+
max_input_tokens=max_input_tokens,
|
541 |
+
max_actions_per_step=max_actions,
|
542 |
+
tool_calling_method=tool_calling_method,
|
543 |
+
planner_llm=planner_llm,
|
544 |
+
use_vision_for_planner=planner_use_vision if planner_llm else False,
|
545 |
+
source="webui",
|
546 |
+
)
|
547 |
+
webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
|
548 |
+
webui_manager.bu_agent.settings.generate_gif = gif_path
|
549 |
+
else:
|
550 |
+
webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
|
551 |
+
webui_manager.bu_agent.add_new_task(task)
|
552 |
+
webui_manager.bu_agent.settings.generate_gif = gif_path
|
553 |
+
webui_manager.bu_agent.browser = webui_manager.bu_browser
|
554 |
+
webui_manager.bu_agent.browser_context = webui_manager.bu_browser_context
|
555 |
+
webui_manager.bu_agent.controller = webui_manager.bu_controller
|
556 |
+
|
557 |
+
# --- 6. Run Agent Task and Stream Updates ---
|
558 |
+
agent_run_coro = webui_manager.bu_agent.run(max_steps=max_steps)
|
559 |
+
agent_task = asyncio.create_task(agent_run_coro)
|
560 |
+
webui_manager.bu_current_task = agent_task # Store the task
|
561 |
+
|
562 |
+
last_chat_len = len(webui_manager.bu_chat_history)
|
563 |
+
while not agent_task.done():
|
564 |
+
is_paused = webui_manager.bu_agent.state.paused
|
565 |
+
is_stopped = webui_manager.bu_agent.state.stopped
|
566 |
+
|
567 |
+
# Check for pause state
|
568 |
+
if is_paused:
|
569 |
+
yield {
|
570 |
+
pause_resume_button_comp: gr.update(
|
571 |
+
value="▶️ Resume", interactive=True
|
572 |
+
),
|
573 |
+
stop_button_comp: gr.update(interactive=True),
|
574 |
+
}
|
575 |
+
# Wait until pause is released or task is stopped/done
|
576 |
+
while is_paused and not agent_task.done():
|
577 |
+
# Re-check agent state in loop
|
578 |
+
is_paused = webui_manager.bu_agent.state.paused
|
579 |
+
is_stopped = webui_manager.bu_agent.state.stopped
|
580 |
+
if is_stopped: # Stop signal received while paused
|
581 |
+
break
|
582 |
+
await asyncio.sleep(0.2)
|
583 |
+
|
584 |
+
if (
|
585 |
+
agent_task.done() or is_stopped
|
586 |
+
): # If stopped or task finished while paused
|
587 |
+
break
|
588 |
+
|
589 |
+
# If resumed, yield UI update
|
590 |
+
yield {
|
591 |
+
pause_resume_button_comp: gr.update(
|
592 |
+
value="⏸️ Pause", interactive=True
|
593 |
+
),
|
594 |
+
run_button_comp: gr.update(
|
595 |
+
value="⏳ Running...", interactive=False
|
596 |
+
),
|
597 |
+
}
|
598 |
+
|
599 |
+
# Check if agent stopped itself or stop button was pressed (which sets agent.state.stopped)
|
600 |
+
if is_stopped:
|
601 |
+
logger.info("Agent has stopped (internally or via stop button).")
|
602 |
+
if not agent_task.done():
|
603 |
+
# Ensure the task coroutine finishes if agent just set flag
|
604 |
+
try:
|
605 |
+
await asyncio.wait_for(
|
606 |
+
agent_task, timeout=1.0
|
607 |
+
) # Give it a moment to exit run()
|
608 |
+
except asyncio.TimeoutError:
|
609 |
+
logger.warning(
|
610 |
+
"Agent task did not finish quickly after stop signal, cancelling."
|
611 |
+
)
|
612 |
+
agent_task.cancel()
|
613 |
+
except Exception: # Catch task exceptions if it errors on stop
|
614 |
+
pass
|
615 |
+
break # Exit the streaming loop
|
616 |
+
|
617 |
+
# Check if agent is asking for help (via response_event)
|
618 |
+
update_dict = {}
|
619 |
+
if webui_manager.bu_response_event is not None:
|
620 |
+
update_dict = {
|
621 |
+
user_input_comp: gr.update(
|
622 |
+
placeholder="Agent needs help. Enter response and submit.",
|
623 |
+
interactive=True,
|
624 |
+
),
|
625 |
+
run_button_comp: gr.update(
|
626 |
+
value="✔️ Submit Response", interactive=True
|
627 |
+
),
|
628 |
+
pause_resume_button_comp: gr.update(interactive=False),
|
629 |
+
stop_button_comp: gr.update(interactive=False),
|
630 |
+
chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
|
631 |
+
}
|
632 |
+
last_chat_len = len(webui_manager.bu_chat_history)
|
633 |
+
yield update_dict
|
634 |
+
# Wait until response is submitted or task finishes
|
635 |
+
while (
|
636 |
+
webui_manager.bu_response_event is not None
|
637 |
+
and not agent_task.done()
|
638 |
+
):
|
639 |
+
await asyncio.sleep(0.2)
|
640 |
+
# Restore UI after response submitted or if task ended unexpectedly
|
641 |
+
if not agent_task.done():
|
642 |
+
yield {
|
643 |
+
user_input_comp: gr.update(
|
644 |
+
placeholder="Agent is running...", interactive=False
|
645 |
+
),
|
646 |
+
run_button_comp: gr.update(
|
647 |
+
value="⏳ Running...", interactive=False
|
648 |
+
),
|
649 |
+
pause_resume_button_comp: gr.update(interactive=True),
|
650 |
+
stop_button_comp: gr.update(interactive=True),
|
651 |
+
}
|
652 |
+
else:
|
653 |
+
break # Task finished while waiting for response
|
654 |
+
|
655 |
+
# Update Chatbot if new messages arrived via callbacks
|
656 |
+
if len(webui_manager.bu_chat_history) > last_chat_len:
|
657 |
+
update_dict[chatbot_comp] = gr.update(
|
658 |
+
value=webui_manager.bu_chat_history
|
659 |
+
)
|
660 |
+
last_chat_len = len(webui_manager.bu_chat_history)
|
661 |
+
|
662 |
+
# Update Browser View
|
663 |
+
if headless and webui_manager.bu_browser_context:
|
664 |
+
try:
|
665 |
+
screenshot_b64 = (
|
666 |
+
await webui_manager.bu_browser_context.take_screenshot()
|
667 |
+
)
|
668 |
+
if screenshot_b64:
|
669 |
+
html_content = f'<img src="data:image/jpeg;base64,{screenshot_b64}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
|
670 |
+
update_dict[browser_view_comp] = gr.update(
|
671 |
+
value=html_content, visible=True
|
672 |
+
)
|
673 |
+
else:
|
674 |
+
html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
|
675 |
+
update_dict[browser_view_comp] = gr.update(
|
676 |
+
value=html_content, visible=True
|
677 |
+
)
|
678 |
+
except Exception as e:
|
679 |
+
logger.debug(f"Failed to capture screenshot: {e}")
|
680 |
+
update_dict[browser_view_comp] = gr.update(
|
681 |
+
value="<div style='...'>Error loading view...</div>",
|
682 |
+
visible=True,
|
683 |
+
)
|
684 |
+
else:
|
685 |
+
update_dict[browser_view_comp] = gr.update(visible=False)
|
686 |
+
|
687 |
+
# Yield accumulated updates
|
688 |
+
if update_dict:
|
689 |
+
yield update_dict
|
690 |
+
|
691 |
+
await asyncio.sleep(0.1) # Polling interval
|
692 |
+
|
693 |
+
# --- 7. Task Finalization ---
|
694 |
+
webui_manager.bu_agent.state.paused = False
|
695 |
+
webui_manager.bu_agent.state.stopped = False
|
696 |
+
final_update = {}
|
697 |
+
try:
|
698 |
+
logger.info("Agent task completing...")
|
699 |
+
# Await the task ensure completion and catch exceptions if not already caught
|
700 |
+
if not agent_task.done():
|
701 |
+
await agent_task # Retrieve result/exception
|
702 |
+
elif agent_task.exception(): # Check if task finished with exception
|
703 |
+
agent_task.result() # Raise the exception to be caught below
|
704 |
+
logger.info("Agent task completed processing.")
|
705 |
+
|
706 |
+
logger.info(f"Explicitly saving agent history to: {history_file}")
|
707 |
+
webui_manager.bu_agent.save_history(history_file)
|
708 |
+
|
709 |
+
if os.path.exists(history_file):
|
710 |
+
final_update[history_file_comp] = gr.File(value=history_file)
|
711 |
+
|
712 |
+
if gif_path and os.path.exists(gif_path):
|
713 |
+
logger.info(f"GIF found at: {gif_path}")
|
714 |
+
final_update[gif_comp] = gr.Image(value=gif_path)
|
715 |
+
|
716 |
+
except asyncio.CancelledError:
|
717 |
+
logger.info("Agent task was cancelled.")
|
718 |
+
if not any(
|
719 |
+
"Cancelled" in msg.get("content", "")
|
720 |
+
for msg in webui_manager.bu_chat_history
|
721 |
+
if msg.get("role") == "assistant"
|
722 |
+
):
|
723 |
+
webui_manager.bu_chat_history.append(
|
724 |
+
{"role": "assistant", "content": "**Task Cancelled**."}
|
725 |
+
)
|
726 |
+
final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
|
727 |
+
except Exception as e:
|
728 |
+
logger.error(f"Error during agent execution: {e}", exc_info=True)
|
729 |
+
error_message = (
|
730 |
+
f"**Agent Execution Error:**\n```\n{type(e).__name__}: {e}\n```"
|
731 |
+
)
|
732 |
+
if not any(
|
733 |
+
error_message in msg.get("content", "")
|
734 |
+
for msg in webui_manager.bu_chat_history
|
735 |
+
if msg.get("role") == "assistant"
|
736 |
+
):
|
737 |
+
webui_manager.bu_chat_history.append(
|
738 |
+
{"role": "assistant", "content": error_message}
|
739 |
+
)
|
740 |
+
final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
|
741 |
+
gr.Error(f"Agent execution failed: {e}")
|
742 |
+
|
743 |
+
finally:
|
744 |
+
webui_manager.bu_current_task = None # Clear the task reference
|
745 |
+
|
746 |
+
# Close browser/context if requested
|
747 |
+
if should_close_browser_on_finish:
|
748 |
+
if webui_manager.bu_browser_context:
|
749 |
+
logger.info("Closing browser context after task.")
|
750 |
+
await webui_manager.bu_browser_context.close()
|
751 |
+
webui_manager.bu_browser_context = None
|
752 |
+
if webui_manager.bu_browser:
|
753 |
+
logger.info("Closing browser after task.")
|
754 |
+
await webui_manager.bu_browser.close()
|
755 |
+
webui_manager.bu_browser = None
|
756 |
+
|
757 |
+
# --- 8. Final UI Update ---
|
758 |
+
final_update.update(
|
759 |
+
{
|
760 |
+
user_input_comp: gr.update(
|
761 |
+
value="",
|
762 |
+
interactive=True,
|
763 |
+
placeholder="Enter your next task...",
|
764 |
+
),
|
765 |
+
run_button_comp: gr.update(value="▶️ Submit Task", interactive=True),
|
766 |
+
stop_button_comp: gr.update(value="⏹️ Stop", interactive=False),
|
767 |
+
pause_resume_button_comp: gr.update(
|
768 |
+
value="⏸️ Pause", interactive=False
|
769 |
+
),
|
770 |
+
clear_button_comp: gr.update(interactive=True),
|
771 |
+
# Ensure final chat history is shown
|
772 |
+
chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
|
773 |
+
}
|
774 |
+
)
|
775 |
+
yield final_update
|
776 |
+
|
777 |
+
except Exception as e:
|
778 |
+
# Catch errors during setup (before agent run starts)
|
779 |
+
logger.error(f"Error setting up agent task: {e}", exc_info=True)
|
780 |
+
webui_manager.bu_current_task = None # Ensure state is reset
|
781 |
+
yield {
|
782 |
+
user_input_comp: gr.update(
|
783 |
+
interactive=True, placeholder="Error during setup. Enter task..."
|
784 |
+
),
|
785 |
+
run_button_comp: gr.update(value="▶️ Submit Task", interactive=True),
|
786 |
+
stop_button_comp: gr.update(value="⏹️ Stop", interactive=False),
|
787 |
+
pause_resume_button_comp: gr.update(value="⏸️ Pause", interactive=False),
|
788 |
+
clear_button_comp: gr.update(interactive=True),
|
789 |
+
chatbot_comp: gr.update(
|
790 |
+
value=webui_manager.bu_chat_history
|
791 |
+
+ [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
|
792 |
+
),
|
793 |
+
}
|
794 |
+
|
795 |
+
|
796 |
+
# --- Button Click Handlers --- (Need access to webui_manager)
|
797 |
+
|
798 |
+
|
799 |
+
async def handle_submit(
|
800 |
+
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
801 |
+
):
|
802 |
+
"""Handles clicks on the main 'Submit' button."""
|
803 |
+
user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
|
804 |
+
user_input_value = components.get(user_input_comp, "").strip()
|
805 |
+
|
806 |
+
# Check if waiting for user assistance
|
807 |
+
if webui_manager.bu_response_event and not webui_manager.bu_response_event.is_set():
|
808 |
+
logger.info(f"User submitted assistance: {user_input_value}")
|
809 |
+
webui_manager.bu_user_help_response = (
|
810 |
+
user_input_value if user_input_value else "User provided no text response."
|
811 |
+
)
|
812 |
+
webui_manager.bu_response_event.set()
|
813 |
+
# UI updates handled by the main loop reacting to the event being set
|
814 |
+
yield {
|
815 |
+
user_input_comp: gr.update(
|
816 |
+
value="",
|
817 |
+
interactive=False,
|
818 |
+
placeholder="Waiting for agent to continue...",
|
819 |
+
),
|
820 |
+
webui_manager.get_component_by_id(
|
821 |
+
"browser_use_agent.run_button"
|
822 |
+
): gr.update(value="⏳ Running...", interactive=False),
|
823 |
+
}
|
824 |
+
# Check if a task is currently running (using _current_task)
|
825 |
+
elif webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
|
826 |
+
logger.warning(
|
827 |
+
"Submit button clicked while agent is already running and not asking for help."
|
828 |
+
)
|
829 |
+
gr.Info("Agent is currently running. Please wait or use Stop/Pause.")
|
830 |
+
yield {} # No change
|
831 |
+
else:
|
832 |
+
# Handle submission for a new task
|
833 |
+
logger.info("Submit button clicked for new task.")
|
834 |
+
# Use async generator to stream updates from run_agent_task
|
835 |
+
async for update in run_agent_task(webui_manager, components):
|
836 |
+
yield update
|
837 |
+
|
838 |
+
|
839 |
+
async def handle_stop(webui_manager: WebuiManager):
|
840 |
+
"""Handles clicks on the 'Stop' button."""
|
841 |
+
logger.info("Stop button clicked.")
|
842 |
+
agent = webui_manager.bu_agent
|
843 |
+
task = webui_manager.bu_current_task
|
844 |
+
|
845 |
+
if agent and task and not task.done():
|
846 |
+
# Signal the agent to stop by setting its internal flag
|
847 |
+
agent.state.stopped = True
|
848 |
+
agent.state.paused = False # Ensure not paused if stopped
|
849 |
+
return {
|
850 |
+
webui_manager.get_component_by_id(
|
851 |
+
"browser_use_agent.stop_button"
|
852 |
+
): gr.update(interactive=False, value="⏹️ Stopping..."),
|
853 |
+
webui_manager.get_component_by_id(
|
854 |
+
"browser_use_agent.pause_resume_button"
|
855 |
+
): gr.update(interactive=False),
|
856 |
+
webui_manager.get_component_by_id(
|
857 |
+
"browser_use_agent.run_button"
|
858 |
+
): gr.update(interactive=False),
|
859 |
+
}
|
860 |
+
else:
|
861 |
+
logger.warning("Stop clicked but agent is not running or task is already done.")
|
862 |
+
# Reset UI just in case it's stuck
|
863 |
+
return {
|
864 |
+
webui_manager.get_component_by_id(
|
865 |
+
"browser_use_agent.run_button"
|
866 |
+
): gr.update(interactive=True),
|
867 |
+
webui_manager.get_component_by_id(
|
868 |
+
"browser_use_agent.stop_button"
|
869 |
+
): gr.update(interactive=False),
|
870 |
+
webui_manager.get_component_by_id(
|
871 |
+
"browser_use_agent.pause_resume_button"
|
872 |
+
): gr.update(interactive=False),
|
873 |
+
webui_manager.get_component_by_id(
|
874 |
+
"browser_use_agent.clear_button"
|
875 |
+
): gr.update(interactive=True),
|
876 |
+
}
|
877 |
+
|
878 |
+
|
879 |
+
async def handle_pause_resume(webui_manager: WebuiManager):
|
880 |
+
"""Handles clicks on the 'Pause/Resume' button."""
|
881 |
+
agent = webui_manager.bu_agent
|
882 |
+
task = webui_manager.bu_current_task
|
883 |
+
|
884 |
+
if agent and task and not task.done():
|
885 |
+
if agent.state.paused:
|
886 |
+
logger.info("Resume button clicked.")
|
887 |
+
agent.resume()
|
888 |
+
# UI update happens in main loop
|
889 |
+
return {
|
890 |
+
webui_manager.get_component_by_id(
|
891 |
+
"browser_use_agent.pause_resume_button"
|
892 |
+
): gr.update(value="⏸️ Pause", interactive=True)
|
893 |
+
} # Optimistic update
|
894 |
+
else:
|
895 |
+
logger.info("Pause button clicked.")
|
896 |
+
agent.pause()
|
897 |
+
return {
|
898 |
+
webui_manager.get_component_by_id(
|
899 |
+
"browser_use_agent.pause_resume_button"
|
900 |
+
): gr.update(value="▶️ Resume", interactive=True)
|
901 |
+
} # Optimistic update
|
902 |
+
else:
|
903 |
+
logger.warning(
|
904 |
+
"Pause/Resume clicked but agent is not running or doesn't support state."
|
905 |
+
)
|
906 |
+
return {} # No change
|
907 |
+
|
908 |
+
|
909 |
+
async def handle_clear(webui_manager: WebuiManager):
|
910 |
+
"""Handles clicks on the 'Clear' button."""
|
911 |
+
logger.info("Clear button clicked.")
|
912 |
+
|
913 |
+
# Stop any running task first
|
914 |
+
task = webui_manager.bu_current_task
|
915 |
+
if task and not task.done():
|
916 |
+
logger.info("Clearing requires stopping the current task.")
|
917 |
+
webui_manager.bu_agent.stop()
|
918 |
+
task.cancel()
|
919 |
+
try:
|
920 |
+
await asyncio.wait_for(task, timeout=2.0) # Wait briefly
|
921 |
+
except (asyncio.CancelledError, asyncio.TimeoutError):
|
922 |
+
pass
|
923 |
+
except Exception as e:
|
924 |
+
logger.warning(f"Error stopping task on clear: {e}")
|
925 |
+
webui_manager.bu_current_task = None
|
926 |
+
|
927 |
+
if webui_manager.bu_controller:
|
928 |
+
await webui_manager.bu_controller.close_mcp_client()
|
929 |
+
webui_manager.bu_controller = None
|
930 |
+
webui_manager.bu_agent = None
|
931 |
+
|
932 |
+
# Reset state stored in manager
|
933 |
+
webui_manager.bu_chat_history = []
|
934 |
+
webui_manager.bu_response_event = None
|
935 |
+
webui_manager.bu_user_help_response = None
|
936 |
+
webui_manager.bu_agent_task_id = None
|
937 |
+
|
938 |
+
logger.info("Agent state and browser resources cleared.")
|
939 |
+
|
940 |
+
# Reset UI components
|
941 |
+
return {
|
942 |
+
webui_manager.get_component_by_id("browser_use_agent.chatbot"): gr.update(
|
943 |
+
value=[]
|
944 |
+
),
|
945 |
+
webui_manager.get_component_by_id("browser_use_agent.user_input"): gr.update(
|
946 |
+
value="", placeholder="Enter your task here..."
|
947 |
+
),
|
948 |
+
webui_manager.get_component_by_id(
|
949 |
+
"browser_use_agent.agent_history_file"
|
950 |
+
): gr.update(value=None),
|
951 |
+
webui_manager.get_component_by_id("browser_use_agent.recording_gif"): gr.update(
|
952 |
+
value=None
|
953 |
+
),
|
954 |
+
webui_manager.get_component_by_id("browser_use_agent.browser_view"): gr.update(
|
955 |
+
value="<div style='...'>Browser Cleared</div>"
|
956 |
+
),
|
957 |
+
webui_manager.get_component_by_id("browser_use_agent.run_button"): gr.update(
|
958 |
+
value="▶️ Submit Task", interactive=True
|
959 |
+
),
|
960 |
+
webui_manager.get_component_by_id("browser_use_agent.stop_button"): gr.update(
|
961 |
+
interactive=False
|
962 |
+
),
|
963 |
+
webui_manager.get_component_by_id(
|
964 |
+
"browser_use_agent.pause_resume_button"
|
965 |
+
): gr.update(value="⏸️ Pause", interactive=False),
|
966 |
+
webui_manager.get_component_by_id("browser_use_agent.clear_button"): gr.update(
|
967 |
+
interactive=True
|
968 |
+
),
|
969 |
+
}
|
970 |
+
|
971 |
+
|
972 |
+
# --- Tab Creation Function ---
|
973 |
+
|
974 |
+
|
975 |
+
def create_browser_use_agent_tab(webui_manager: WebuiManager):
|
976 |
+
"""
|
977 |
+
Create the run agent tab, defining UI, state, and handlers.
|
978 |
+
"""
|
979 |
+
webui_manager.init_browser_use_agent()
|
980 |
+
|
981 |
+
# --- Define UI Components ---
|
982 |
+
tab_components = {}
|
983 |
+
with gr.Column():
|
984 |
+
chatbot = gr.Chatbot(
|
985 |
+
lambda: webui_manager.bu_chat_history, # Load history dynamically
|
986 |
+
elem_id="browser_use_chatbot",
|
987 |
+
label="Agent Interaction",
|
988 |
+
type="messages",
|
989 |
+
height=600,
|
990 |
+
show_copy_button=True,
|
991 |
+
)
|
992 |
+
user_input = gr.Textbox(
|
993 |
+
label="Your Task or Response",
|
994 |
+
placeholder="Enter your task here or provide assistance when asked.",
|
995 |
+
lines=3,
|
996 |
+
interactive=True,
|
997 |
+
elem_id="user_input",
|
998 |
+
)
|
999 |
+
with gr.Row():
|
1000 |
+
stop_button = gr.Button(
|
1001 |
+
"⏹️ Stop", interactive=False, variant="stop", scale=2
|
1002 |
+
)
|
1003 |
+
pause_resume_button = gr.Button(
|
1004 |
+
"⏸️ Pause", interactive=False, variant="secondary", scale=2, visible=True
|
1005 |
+
)
|
1006 |
+
clear_button = gr.Button(
|
1007 |
+
"🗑️ Clear", interactive=True, variant="secondary", scale=2
|
1008 |
+
)
|
1009 |
+
run_button = gr.Button("▶️ Submit Task", variant="primary", scale=3)
|
1010 |
+
|
1011 |
+
browser_view = gr.HTML(
|
1012 |
+
value="<div style='width:100%; height:50vh; display:flex; justify-content:center; align-items:center; border:1px solid #ccc; background-color:#f0f0f0;'><p>Browser View (Requires Headless=True)</p></div>",
|
1013 |
+
label="Browser Live View",
|
1014 |
+
elem_id="browser_view",
|
1015 |
+
visible=False,
|
1016 |
+
)
|
1017 |
+
with gr.Column():
|
1018 |
+
gr.Markdown("### Task Outputs")
|
1019 |
+
agent_history_file = gr.File(label="Agent History JSON", interactive=False)
|
1020 |
+
recording_gif = gr.Image(
|
1021 |
+
label="Task Recording GIF",
|
1022 |
+
format="gif",
|
1023 |
+
interactive=False,
|
1024 |
+
type="filepath",
|
1025 |
+
)
|
1026 |
+
|
1027 |
+
# --- Store Components in Manager ---
|
1028 |
+
tab_components.update(
|
1029 |
+
dict(
|
1030 |
+
chatbot=chatbot,
|
1031 |
+
user_input=user_input,
|
1032 |
+
clear_button=clear_button,
|
1033 |
+
run_button=run_button,
|
1034 |
+
stop_button=stop_button,
|
1035 |
+
pause_resume_button=pause_resume_button,
|
1036 |
+
agent_history_file=agent_history_file,
|
1037 |
+
recording_gif=recording_gif,
|
1038 |
+
browser_view=browser_view,
|
1039 |
+
)
|
1040 |
+
)
|
1041 |
+
webui_manager.add_components(
|
1042 |
+
"browser_use_agent", tab_components
|
1043 |
+
) # Use "browser_use_agent" as tab_name prefix
|
1044 |
+
|
1045 |
+
all_managed_components = set(
|
1046 |
+
webui_manager.get_components()
|
1047 |
+
) # Get all components known to manager
|
1048 |
+
run_tab_outputs = list(tab_components.values())
|
1049 |
+
|
1050 |
+
async def submit_wrapper(
|
1051 |
+
components_dict: Dict[Component, Any],
|
1052 |
+
) -> AsyncGenerator[Dict[Component, Any], None]:
|
1053 |
+
"""Wrapper for handle_submit that yields its results."""
|
1054 |
+
async for update in handle_submit(webui_manager, components_dict):
|
1055 |
+
yield update
|
1056 |
+
|
1057 |
+
async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
1058 |
+
"""Wrapper for handle_stop."""
|
1059 |
+
update_dict = await handle_stop(webui_manager)
|
1060 |
+
yield update_dict
|
1061 |
+
|
1062 |
+
async def pause_resume_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
1063 |
+
"""Wrapper for handle_pause_resume."""
|
1064 |
+
update_dict = await handle_pause_resume(webui_manager)
|
1065 |
+
yield update_dict
|
1066 |
+
|
1067 |
+
async def clear_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
1068 |
+
"""Wrapper for handle_clear."""
|
1069 |
+
update_dict = await handle_clear(webui_manager)
|
1070 |
+
yield update_dict
|
1071 |
+
|
1072 |
+
# --- Connect Event Handlers using the Wrappers --
|
1073 |
+
run_button.click(
|
1074 |
+
fn=submit_wrapper, inputs=all_managed_components, outputs=run_tab_outputs
|
1075 |
+
)
|
1076 |
+
user_input.submit(
|
1077 |
+
fn=submit_wrapper, inputs=all_managed_components, outputs=run_tab_outputs
|
1078 |
+
)
|
1079 |
+
stop_button.click(fn=stop_wrapper, inputs=None, outputs=run_tab_outputs)
|
1080 |
+
pause_resume_button.click(
|
1081 |
+
fn=pause_resume_wrapper, inputs=None, outputs=run_tab_outputs
|
1082 |
+
)
|
1083 |
+
clear_button.click(fn=clear_wrapper, inputs=None, outputs=run_tab_outputs)
|
src/webui/components/deep_research_agent_tab.py
ADDED
@@ -0,0 +1,457 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio.components import Component
|
3 |
+
from functools import partial
|
4 |
+
|
5 |
+
from src.webui.webui_manager import WebuiManager
|
6 |
+
from src.utils import config
|
7 |
+
import logging
|
8 |
+
import os
|
9 |
+
from typing import Any, Dict, AsyncGenerator, Optional, Tuple, Union
|
10 |
+
import asyncio
|
11 |
+
import json
|
12 |
+
from src.agent.deep_research.deep_research_agent import DeepResearchAgent
|
13 |
+
from src.utils import llm_provider
|
14 |
+
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
|
18 |
+
async def _initialize_llm(provider: Optional[str], model_name: Optional[str], temperature: float,
|
19 |
+
base_url: Optional[str], api_key: Optional[str], num_ctx: Optional[int] = None):
|
20 |
+
"""Initializes the LLM based on settings. Returns None if provider/model is missing."""
|
21 |
+
if not provider or not model_name:
|
22 |
+
logger.info("LLM Provider or Model Name not specified, LLM will be None.")
|
23 |
+
return None
|
24 |
+
try:
|
25 |
+
logger.info(f"Initializing LLM: Provider={provider}, Model={model_name}, Temp={temperature}")
|
26 |
+
# Use your actual LLM provider logic here
|
27 |
+
llm = llm_provider.get_llm_model(
|
28 |
+
provider=provider,
|
29 |
+
model_name=model_name,
|
30 |
+
temperature=temperature,
|
31 |
+
base_url=base_url or None,
|
32 |
+
api_key=api_key or None,
|
33 |
+
num_ctx=num_ctx if provider == "ollama" else None
|
34 |
+
)
|
35 |
+
return llm
|
36 |
+
except Exception as e:
|
37 |
+
logger.error(f"Failed to initialize LLM: {e}", exc_info=True)
|
38 |
+
gr.Warning(
|
39 |
+
f"Failed to initialize LLM '{model_name}' for provider '{provider}'. Please check settings. Error: {e}")
|
40 |
+
return None
|
41 |
+
|
42 |
+
|
43 |
+
def _read_file_safe(file_path: str) -> Optional[str]:
|
44 |
+
"""Safely read a file, returning None if it doesn't exist or on error."""
|
45 |
+
if not os.path.exists(file_path):
|
46 |
+
return None
|
47 |
+
try:
|
48 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
49 |
+
return f.read()
|
50 |
+
except Exception as e:
|
51 |
+
logger.error(f"Error reading file {file_path}: {e}")
|
52 |
+
return None
|
53 |
+
|
54 |
+
|
55 |
+
# --- Deep Research Agent Specific Logic ---
|
56 |
+
|
57 |
+
async def run_deep_research(webui_manager: WebuiManager, components: Dict[Component, Any]) -> AsyncGenerator[
|
58 |
+
Dict[Component, Any], None]:
|
59 |
+
"""Handles initializing and running the DeepResearchAgent."""
|
60 |
+
|
61 |
+
# --- Get Components ---
|
62 |
+
research_task_comp = webui_manager.get_component_by_id("deep_research_agent.research_task")
|
63 |
+
resume_task_id_comp = webui_manager.get_component_by_id("deep_research_agent.resume_task_id")
|
64 |
+
parallel_num_comp = webui_manager.get_component_by_id("deep_research_agent.parallel_num")
|
65 |
+
save_dir_comp = webui_manager.get_component_by_id(
|
66 |
+
"deep_research_agent.max_query") # Note: component ID seems misnamed in original code
|
67 |
+
start_button_comp = webui_manager.get_component_by_id("deep_research_agent.start_button")
|
68 |
+
stop_button_comp = webui_manager.get_component_by_id("deep_research_agent.stop_button")
|
69 |
+
markdown_display_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_display")
|
70 |
+
markdown_download_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_download")
|
71 |
+
mcp_server_config_comp = webui_manager.get_component_by_id("deep_research_agent.mcp_server_config")
|
72 |
+
|
73 |
+
# --- 1. Get Task and Settings ---
|
74 |
+
task_topic = components.get(research_task_comp, "").strip()
|
75 |
+
task_id_to_resume = components.get(resume_task_id_comp, "").strip() or None
|
76 |
+
max_parallel_agents = int(components.get(parallel_num_comp, 1))
|
77 |
+
base_save_dir = components.get(save_dir_comp, "./tmp/deep_research").strip()
|
78 |
+
safe_root_dir = "./tmp/deep_research"
|
79 |
+
normalized_base_save_dir = os.path.abspath(os.path.normpath(base_save_dir))
|
80 |
+
if os.path.commonpath([normalized_base_save_dir, os.path.abspath(safe_root_dir)]) != os.path.abspath(safe_root_dir):
|
81 |
+
logger.warning(f"Unsafe base_save_dir detected: {base_save_dir}. Using default directory.")
|
82 |
+
normalized_base_save_dir = os.path.abspath(safe_root_dir)
|
83 |
+
base_save_dir = normalized_base_save_dir
|
84 |
+
mcp_server_config_str = components.get(mcp_server_config_comp)
|
85 |
+
mcp_config = json.loads(mcp_server_config_str) if mcp_server_config_str else None
|
86 |
+
|
87 |
+
if not task_topic:
|
88 |
+
gr.Warning("Please enter a research task.")
|
89 |
+
yield {start_button_comp: gr.update(interactive=True)} # Re-enable start button
|
90 |
+
return
|
91 |
+
|
92 |
+
# Store base save dir for stop handler
|
93 |
+
webui_manager.dr_save_dir = base_save_dir
|
94 |
+
os.makedirs(base_save_dir, exist_ok=True)
|
95 |
+
|
96 |
+
# --- 2. Initial UI Update ---
|
97 |
+
yield {
|
98 |
+
start_button_comp: gr.update(value="⏳ Running...", interactive=False),
|
99 |
+
stop_button_comp: gr.update(interactive=True),
|
100 |
+
research_task_comp: gr.update(interactive=False),
|
101 |
+
resume_task_id_comp: gr.update(interactive=False),
|
102 |
+
parallel_num_comp: gr.update(interactive=False),
|
103 |
+
save_dir_comp: gr.update(interactive=False),
|
104 |
+
markdown_display_comp: gr.update(value="Starting research..."),
|
105 |
+
markdown_download_comp: gr.update(value=None, interactive=False)
|
106 |
+
}
|
107 |
+
|
108 |
+
agent_task = None
|
109 |
+
running_task_id = None
|
110 |
+
plan_file_path = None
|
111 |
+
report_file_path = None
|
112 |
+
last_plan_content = None
|
113 |
+
last_plan_mtime = 0
|
114 |
+
|
115 |
+
try:
|
116 |
+
# --- 3. Get LLM and Browser Config from other tabs ---
|
117 |
+
# Access settings values via components dict, getting IDs from webui_manager
|
118 |
+
def get_setting(tab: str, key: str, default: Any = None):
|
119 |
+
comp = webui_manager.id_to_component.get(f"{tab}.{key}")
|
120 |
+
return components.get(comp, default) if comp else default
|
121 |
+
|
122 |
+
# LLM Config (from agent_settings tab)
|
123 |
+
llm_provider_name = get_setting("agent_settings", "llm_provider")
|
124 |
+
llm_model_name = get_setting("agent_settings", "llm_model_name")
|
125 |
+
llm_temperature = max(get_setting("agent_settings", "llm_temperature", 0.5), 0.5)
|
126 |
+
llm_base_url = get_setting("agent_settings", "llm_base_url")
|
127 |
+
llm_api_key = get_setting("agent_settings", "llm_api_key")
|
128 |
+
ollama_num_ctx = get_setting("agent_settings", "ollama_num_ctx")
|
129 |
+
|
130 |
+
llm = await _initialize_llm(
|
131 |
+
llm_provider_name, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
|
132 |
+
ollama_num_ctx if llm_provider_name == "ollama" else None
|
133 |
+
)
|
134 |
+
if not llm:
|
135 |
+
raise ValueError("LLM Initialization failed. Please check Agent Settings.")
|
136 |
+
|
137 |
+
# Browser Config (from browser_settings tab)
|
138 |
+
# Note: DeepResearchAgent constructor takes a dict, not full Browser/Context objects
|
139 |
+
browser_config_dict = {
|
140 |
+
"headless": get_setting("browser_settings", "headless", False),
|
141 |
+
"disable_security": get_setting("browser_settings", "disable_security", False),
|
142 |
+
"browser_binary_path": get_setting("browser_settings", "browser_binary_path"),
|
143 |
+
"user_data_dir": get_setting("browser_settings", "browser_user_data_dir"),
|
144 |
+
"window_width": int(get_setting("browser_settings", "window_w", 1280)),
|
145 |
+
"window_height": int(get_setting("browser_settings", "window_h", 1100)),
|
146 |
+
# Add other relevant fields if DeepResearchAgent accepts them
|
147 |
+
}
|
148 |
+
|
149 |
+
# --- 4. Initialize or Get Agent ---
|
150 |
+
if not webui_manager.dr_agent:
|
151 |
+
webui_manager.dr_agent = DeepResearchAgent(
|
152 |
+
llm=llm,
|
153 |
+
browser_config=browser_config_dict,
|
154 |
+
mcp_server_config=mcp_config
|
155 |
+
)
|
156 |
+
logger.info("DeepResearchAgent initialized.")
|
157 |
+
|
158 |
+
# --- 5. Start Agent Run ---
|
159 |
+
agent_run_coro = webui_manager.dr_agent.run(
|
160 |
+
topic=task_topic,
|
161 |
+
task_id=task_id_to_resume,
|
162 |
+
save_dir=base_save_dir,
|
163 |
+
max_parallel_browsers=max_parallel_agents
|
164 |
+
)
|
165 |
+
agent_task = asyncio.create_task(agent_run_coro)
|
166 |
+
webui_manager.dr_current_task = agent_task
|
167 |
+
|
168 |
+
# Wait briefly for the agent to start and potentially create the task ID/folder
|
169 |
+
await asyncio.sleep(1.0)
|
170 |
+
|
171 |
+
# Determine the actual task ID being used (agent sets this)
|
172 |
+
running_task_id = webui_manager.dr_agent.current_task_id
|
173 |
+
if not running_task_id:
|
174 |
+
# Agent might not have set it yet, try to get from result later? Risky.
|
175 |
+
# Or derive from resume_task_id if provided?
|
176 |
+
running_task_id = task_id_to_resume
|
177 |
+
if not running_task_id:
|
178 |
+
logger.warning("Could not determine running task ID immediately.")
|
179 |
+
# We can still monitor, but might miss initial plan if ID needed for path
|
180 |
+
else:
|
181 |
+
logger.info(f"Assuming task ID based on resume ID: {running_task_id}")
|
182 |
+
else:
|
183 |
+
logger.info(f"Agent started with Task ID: {running_task_id}")
|
184 |
+
|
185 |
+
webui_manager.dr_task_id = running_task_id # Store for stop handler
|
186 |
+
|
187 |
+
# --- 6. Monitor Progress via research_plan.md ---
|
188 |
+
if running_task_id:
|
189 |
+
task_specific_dir = os.path.join(base_save_dir, str(running_task_id))
|
190 |
+
plan_file_path = os.path.join(task_specific_dir, "research_plan.md")
|
191 |
+
report_file_path = os.path.join(task_specific_dir, "report.md")
|
192 |
+
logger.info(f"Monitoring plan file: {plan_file_path}")
|
193 |
+
else:
|
194 |
+
logger.warning("Cannot monitor plan file: Task ID unknown.")
|
195 |
+
plan_file_path = None
|
196 |
+
last_plan_content = None
|
197 |
+
while not agent_task.done():
|
198 |
+
update_dict = {}
|
199 |
+
update_dict[resume_task_id_comp] = gr.update(value=running_task_id)
|
200 |
+
agent_stopped = getattr(webui_manager.dr_agent, 'stopped', False)
|
201 |
+
if agent_stopped:
|
202 |
+
logger.info("Stop signal detected from agent state.")
|
203 |
+
break # Exit monitoring loop
|
204 |
+
|
205 |
+
# Check and update research plan display
|
206 |
+
if plan_file_path:
|
207 |
+
try:
|
208 |
+
current_mtime = os.path.getmtime(plan_file_path) if os.path.exists(plan_file_path) else 0
|
209 |
+
if current_mtime > last_plan_mtime:
|
210 |
+
logger.info(f"Detected change in {plan_file_path}")
|
211 |
+
plan_content = _read_file_safe(plan_file_path)
|
212 |
+
if last_plan_content is None or (
|
213 |
+
plan_content is not None and plan_content != last_plan_content):
|
214 |
+
update_dict[markdown_display_comp] = gr.update(value=plan_content)
|
215 |
+
last_plan_content = plan_content
|
216 |
+
last_plan_mtime = current_mtime
|
217 |
+
elif plan_content is None:
|
218 |
+
# File might have been deleted or became unreadable
|
219 |
+
last_plan_mtime = 0 # Reset to force re-read attempt later
|
220 |
+
except Exception as e:
|
221 |
+
logger.warning(f"Error checking/reading plan file {plan_file_path}: {e}")
|
222 |
+
# Avoid continuous logging for the same error
|
223 |
+
await asyncio.sleep(2.0)
|
224 |
+
|
225 |
+
# Yield updates if any
|
226 |
+
if update_dict:
|
227 |
+
yield update_dict
|
228 |
+
|
229 |
+
await asyncio.sleep(1.0) # Check file changes every second
|
230 |
+
|
231 |
+
# --- 7. Task Finalization ---
|
232 |
+
logger.info("Agent task processing finished. Awaiting final result...")
|
233 |
+
final_result_dict = await agent_task # Get result or raise exception
|
234 |
+
logger.info(f"Agent run completed. Result keys: {final_result_dict.keys() if final_result_dict else 'None'}")
|
235 |
+
|
236 |
+
# Try to get task ID from result if not known before
|
237 |
+
if not running_task_id and final_result_dict and 'task_id' in final_result_dict:
|
238 |
+
running_task_id = final_result_dict['task_id']
|
239 |
+
webui_manager.dr_task_id = running_task_id
|
240 |
+
task_specific_dir = os.path.join(base_save_dir, str(running_task_id))
|
241 |
+
report_file_path = os.path.join(task_specific_dir, "report.md")
|
242 |
+
logger.info(f"Task ID confirmed from result: {running_task_id}")
|
243 |
+
|
244 |
+
final_ui_update = {}
|
245 |
+
if report_file_path and os.path.exists(report_file_path):
|
246 |
+
logger.info(f"Loading final report from: {report_file_path}")
|
247 |
+
report_content = _read_file_safe(report_file_path)
|
248 |
+
if report_content:
|
249 |
+
final_ui_update[markdown_display_comp] = gr.update(value=report_content)
|
250 |
+
final_ui_update[markdown_download_comp] = gr.File(value=report_file_path,
|
251 |
+
label=f"Report ({running_task_id}.md)",
|
252 |
+
interactive=True)
|
253 |
+
else:
|
254 |
+
final_ui_update[markdown_display_comp] = gr.update(
|
255 |
+
value="# Research Complete\n\n*Error reading final report file.*")
|
256 |
+
elif final_result_dict and 'report' in final_result_dict:
|
257 |
+
logger.info("Using report content directly from agent result.")
|
258 |
+
# If agent directly returns report content
|
259 |
+
final_ui_update[markdown_display_comp] = gr.update(value=final_result_dict['report'])
|
260 |
+
# Cannot offer download if only content is available
|
261 |
+
final_ui_update[markdown_download_comp] = gr.update(value=None, label="Download Research Report",
|
262 |
+
interactive=False)
|
263 |
+
else:
|
264 |
+
logger.warning("Final report file not found and not in result dict.")
|
265 |
+
final_ui_update[markdown_display_comp] = gr.update(value="# Research Complete\n\n*Final report not found.*")
|
266 |
+
|
267 |
+
yield final_ui_update
|
268 |
+
|
269 |
+
|
270 |
+
except Exception as e:
|
271 |
+
logger.error(f"Error during Deep Research Agent execution: {e}", exc_info=True)
|
272 |
+
gr.Error(f"Research failed: {e}")
|
273 |
+
yield {markdown_display_comp: gr.update(value=f"# Research Failed\n\n**Error:**\n```\n{e}\n```")}
|
274 |
+
|
275 |
+
finally:
|
276 |
+
# --- 8. Final UI Reset ---
|
277 |
+
webui_manager.dr_current_task = None # Clear task reference
|
278 |
+
webui_manager.dr_task_id = None # Clear running task ID
|
279 |
+
|
280 |
+
yield {
|
281 |
+
start_button_comp: gr.update(value="▶️ Run", interactive=True),
|
282 |
+
stop_button_comp: gr.update(interactive=False),
|
283 |
+
research_task_comp: gr.update(interactive=True),
|
284 |
+
resume_task_id_comp: gr.update(value="", interactive=True),
|
285 |
+
parallel_num_comp: gr.update(interactive=True),
|
286 |
+
save_dir_comp: gr.update(interactive=True),
|
287 |
+
# Keep download button enabled if file exists
|
288 |
+
markdown_download_comp: gr.update() if report_file_path and os.path.exists(report_file_path) else gr.update(
|
289 |
+
interactive=False)
|
290 |
+
}
|
291 |
+
|
292 |
+
|
293 |
+
async def stop_deep_research(webui_manager: WebuiManager) -> Dict[Component, Any]:
|
294 |
+
"""Handles the Stop button click."""
|
295 |
+
logger.info("Stop button clicked for Deep Research.")
|
296 |
+
agent = webui_manager.dr_agent
|
297 |
+
task = webui_manager.dr_current_task
|
298 |
+
task_id = webui_manager.dr_task_id
|
299 |
+
base_save_dir = webui_manager.dr_save_dir
|
300 |
+
|
301 |
+
stop_button_comp = webui_manager.get_component_by_id("deep_research_agent.stop_button")
|
302 |
+
start_button_comp = webui_manager.get_component_by_id("deep_research_agent.start_button")
|
303 |
+
markdown_display_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_display")
|
304 |
+
markdown_download_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_download")
|
305 |
+
|
306 |
+
final_update = {
|
307 |
+
stop_button_comp: gr.update(interactive=False, value="⏹️ Stopping...")
|
308 |
+
}
|
309 |
+
|
310 |
+
if agent and task and not task.done():
|
311 |
+
logger.info("Signalling DeepResearchAgent to stop.")
|
312 |
+
try:
|
313 |
+
# Assuming stop is synchronous or sets a flag quickly
|
314 |
+
await agent.stop()
|
315 |
+
except Exception as e:
|
316 |
+
logger.error(f"Error calling agent.stop(): {e}")
|
317 |
+
|
318 |
+
# The run_deep_research loop should detect the stop and exit.
|
319 |
+
# We yield an intermediate "Stopping..." state. The final reset is done by run_deep_research.
|
320 |
+
|
321 |
+
# Try to show the final report if available after stopping
|
322 |
+
await asyncio.sleep(1.5) # Give agent a moment to write final files potentially
|
323 |
+
report_file_path = None
|
324 |
+
if task_id and base_save_dir:
|
325 |
+
report_file_path = os.path.join(base_save_dir, str(task_id), "report.md")
|
326 |
+
|
327 |
+
if report_file_path and os.path.exists(report_file_path):
|
328 |
+
report_content = _read_file_safe(report_file_path)
|
329 |
+
if report_content:
|
330 |
+
final_update[markdown_display_comp] = gr.update(
|
331 |
+
value=report_content + "\n\n---\n*Research stopped by user.*")
|
332 |
+
final_update[markdown_download_comp] = gr.File(value=report_file_path, label=f"Report ({task_id}.md)",
|
333 |
+
interactive=True)
|
334 |
+
else:
|
335 |
+
final_update[markdown_display_comp] = gr.update(
|
336 |
+
value="# Research Stopped\n\n*Error reading final report file after stop.*")
|
337 |
+
else:
|
338 |
+
final_update[markdown_display_comp] = gr.update(value="# Research Stopped by User")
|
339 |
+
|
340 |
+
# Keep start button disabled, run_deep_research finally block will re-enable it.
|
341 |
+
final_update[start_button_comp] = gr.update(interactive=False)
|
342 |
+
|
343 |
+
else:
|
344 |
+
logger.warning("Stop clicked but no active research task found.")
|
345 |
+
# Reset UI state just in case
|
346 |
+
final_update = {
|
347 |
+
start_button_comp: gr.update(interactive=True),
|
348 |
+
stop_button_comp: gr.update(interactive=False),
|
349 |
+
webui_manager.get_component_by_id("deep_research_agent.research_task"): gr.update(interactive=True),
|
350 |
+
webui_manager.get_component_by_id("deep_research_agent.resume_task_id"): gr.update(interactive=True),
|
351 |
+
webui_manager.get_component_by_id("deep_research_agent.max_iteration"): gr.update(interactive=True),
|
352 |
+
webui_manager.get_component_by_id("deep_research_agent.max_query"): gr.update(interactive=True),
|
353 |
+
}
|
354 |
+
|
355 |
+
return final_update
|
356 |
+
|
357 |
+
|
358 |
+
async def update_mcp_server(mcp_file: str, webui_manager: WebuiManager):
|
359 |
+
"""
|
360 |
+
Update the MCP server.
|
361 |
+
"""
|
362 |
+
if hasattr(webui_manager, "dr_agent") and webui_manager.dr_agent:
|
363 |
+
logger.warning("⚠️ Close controller because mcp file has changed!")
|
364 |
+
await webui_manager.dr_agent.close_mcp_client()
|
365 |
+
|
366 |
+
if not mcp_file or not os.path.exists(mcp_file) or not mcp_file.endswith('.json'):
|
367 |
+
logger.warning(f"{mcp_file} is not a valid MCP file.")
|
368 |
+
return None, gr.update(visible=False)
|
369 |
+
|
370 |
+
with open(mcp_file, 'r') as f:
|
371 |
+
mcp_server = json.load(f)
|
372 |
+
|
373 |
+
return json.dumps(mcp_server, indent=2), gr.update(visible=True)
|
374 |
+
|
375 |
+
|
376 |
+
def create_deep_research_agent_tab(webui_manager: WebuiManager):
|
377 |
+
"""
|
378 |
+
Creates a deep research agent tab
|
379 |
+
"""
|
380 |
+
input_components = set(webui_manager.get_components())
|
381 |
+
tab_components = {}
|
382 |
+
|
383 |
+
with gr.Group():
|
384 |
+
with gr.Row():
|
385 |
+
mcp_json_file = gr.File(label="MCP server json", interactive=True, file_types=[".json"])
|
386 |
+
mcp_server_config = gr.Textbox(label="MCP server", lines=6, interactive=True, visible=False)
|
387 |
+
|
388 |
+
with gr.Group():
|
389 |
+
research_task = gr.Textbox(label="Research Task", lines=5,
|
390 |
+
value="Give me a detailed travel plan to Switzerland from June 1st to 10th.",
|
391 |
+
interactive=True)
|
392 |
+
with gr.Row():
|
393 |
+
resume_task_id = gr.Textbox(label="Resume Task ID", value="",
|
394 |
+
interactive=True)
|
395 |
+
parallel_num = gr.Number(label="Parallel Agent Num", value=1,
|
396 |
+
precision=0,
|
397 |
+
interactive=True)
|
398 |
+
max_query = gr.Textbox(label="Research Save Dir", value="./tmp/deep_research",
|
399 |
+
interactive=True)
|
400 |
+
with gr.Row():
|
401 |
+
stop_button = gr.Button("⏹️ Stop", variant="stop", scale=2)
|
402 |
+
start_button = gr.Button("▶️ Run", variant="primary", scale=3)
|
403 |
+
with gr.Group():
|
404 |
+
markdown_display = gr.Markdown(label="Research Report")
|
405 |
+
markdown_download = gr.File(label="Download Research Report", interactive=False)
|
406 |
+
tab_components.update(
|
407 |
+
dict(
|
408 |
+
research_task=research_task,
|
409 |
+
parallel_num=parallel_num,
|
410 |
+
max_query=max_query,
|
411 |
+
start_button=start_button,
|
412 |
+
stop_button=stop_button,
|
413 |
+
markdown_display=markdown_display,
|
414 |
+
markdown_download=markdown_download,
|
415 |
+
resume_task_id=resume_task_id,
|
416 |
+
mcp_json_file=mcp_json_file,
|
417 |
+
mcp_server_config=mcp_server_config,
|
418 |
+
)
|
419 |
+
)
|
420 |
+
webui_manager.add_components("deep_research_agent", tab_components)
|
421 |
+
webui_manager.init_deep_research_agent()
|
422 |
+
|
423 |
+
async def update_wrapper(mcp_file):
|
424 |
+
"""Wrapper for handle_pause_resume."""
|
425 |
+
update_dict = await update_mcp_server(mcp_file, webui_manager)
|
426 |
+
yield update_dict
|
427 |
+
|
428 |
+
mcp_json_file.change(
|
429 |
+
update_wrapper,
|
430 |
+
inputs=[mcp_json_file],
|
431 |
+
outputs=[mcp_server_config, mcp_server_config]
|
432 |
+
)
|
433 |
+
|
434 |
+
dr_tab_outputs = list(tab_components.values())
|
435 |
+
all_managed_inputs = set(webui_manager.get_components())
|
436 |
+
|
437 |
+
# --- Define Event Handler Wrappers ---
|
438 |
+
async def start_wrapper(comps: Dict[Component, Any]) -> AsyncGenerator[Dict[Component, Any], None]:
|
439 |
+
async for update in run_deep_research(webui_manager, comps):
|
440 |
+
yield update
|
441 |
+
|
442 |
+
async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
443 |
+
update_dict = await stop_deep_research(webui_manager)
|
444 |
+
yield update_dict
|
445 |
+
|
446 |
+
# --- Connect Handlers ---
|
447 |
+
start_button.click(
|
448 |
+
fn=start_wrapper,
|
449 |
+
inputs=all_managed_inputs,
|
450 |
+
outputs=dr_tab_outputs
|
451 |
+
)
|
452 |
+
|
453 |
+
stop_button.click(
|
454 |
+
fn=stop_wrapper,
|
455 |
+
inputs=None,
|
456 |
+
outputs=dr_tab_outputs
|
457 |
+
)
|
src/webui/components/load_save_config_tab.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio.components import Component
|
3 |
+
|
4 |
+
from src.webui.webui_manager import WebuiManager
|
5 |
+
from src.utils import config
|
6 |
+
|
7 |
+
|
8 |
+
def create_load_save_config_tab(webui_manager: WebuiManager):
|
9 |
+
"""
|
10 |
+
Creates a load and save config tab.
|
11 |
+
"""
|
12 |
+
input_components = set(webui_manager.get_components())
|
13 |
+
tab_components = {}
|
14 |
+
|
15 |
+
config_file = gr.File(
|
16 |
+
label="Load UI Settings from json",
|
17 |
+
file_types=[".json"],
|
18 |
+
interactive=True
|
19 |
+
)
|
20 |
+
with gr.Row():
|
21 |
+
load_config_button = gr.Button("Load Config", variant="primary")
|
22 |
+
save_config_button = gr.Button("Save UI Settings", variant="primary")
|
23 |
+
|
24 |
+
config_status = gr.Textbox(
|
25 |
+
label="Status",
|
26 |
+
lines=2,
|
27 |
+
interactive=False
|
28 |
+
)
|
29 |
+
|
30 |
+
tab_components.update(dict(
|
31 |
+
load_config_button=load_config_button,
|
32 |
+
save_config_button=save_config_button,
|
33 |
+
config_status=config_status,
|
34 |
+
config_file=config_file,
|
35 |
+
))
|
36 |
+
|
37 |
+
webui_manager.add_components("load_save_config", tab_components)
|
38 |
+
|
39 |
+
save_config_button.click(
|
40 |
+
fn=webui_manager.save_config,
|
41 |
+
inputs=set(webui_manager.get_components()),
|
42 |
+
outputs=[config_status]
|
43 |
+
)
|
44 |
+
|
45 |
+
load_config_button.click(
|
46 |
+
fn=webui_manager.load_config,
|
47 |
+
inputs=[config_file],
|
48 |
+
outputs=webui_manager.get_components(),
|
49 |
+
)
|
50 |
+
|
src/webui/interface.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from src.webui.webui_manager import WebuiManager
|
4 |
+
from src.webui.components.agent_settings_tab import create_agent_settings_tab
|
5 |
+
from src.webui.components.browser_settings_tab import create_browser_settings_tab
|
6 |
+
from src.webui.components.browser_use_agent_tab import create_browser_use_agent_tab
|
7 |
+
from src.webui.components.deep_research_agent_tab import create_deep_research_agent_tab
|
8 |
+
from src.webui.components.load_save_config_tab import create_load_save_config_tab
|
9 |
+
|
10 |
+
theme_map = {
|
11 |
+
"Default": gr.themes.Default(),
|
12 |
+
"Soft": gr.themes.Soft(),
|
13 |
+
"Monochrome": gr.themes.Monochrome(),
|
14 |
+
"Glass": gr.themes.Glass(),
|
15 |
+
"Origin": gr.themes.Origin(),
|
16 |
+
"Citrus": gr.themes.Citrus(),
|
17 |
+
"Ocean": gr.themes.Ocean(),
|
18 |
+
"Base": gr.themes.Base()
|
19 |
+
}
|
20 |
+
|
21 |
+
|
22 |
+
def create_ui(theme_name="Ocean"):
|
23 |
+
css = """
|
24 |
+
.gradio-container {
|
25 |
+
width: 70vw !important;
|
26 |
+
max-width: 70% !important;
|
27 |
+
margin-left: auto !important;
|
28 |
+
margin-right: auto !important;
|
29 |
+
padding-top: 10px !important;
|
30 |
+
}
|
31 |
+
.header-text {
|
32 |
+
text-align: center;
|
33 |
+
margin-bottom: 20px;
|
34 |
+
}
|
35 |
+
.tab-header-text {
|
36 |
+
text-align: center;
|
37 |
+
}
|
38 |
+
.theme-section {
|
39 |
+
margin-bottom: 10px;
|
40 |
+
padding: 15px;
|
41 |
+
border-radius: 10px;
|
42 |
+
}
|
43 |
+
"""
|
44 |
+
|
45 |
+
# dark mode in default
|
46 |
+
js_func = """
|
47 |
+
function refresh() {
|
48 |
+
const url = new URL(window.location);
|
49 |
+
|
50 |
+
if (url.searchParams.get('__theme') !== 'dark') {
|
51 |
+
url.searchParams.set('__theme', 'dark');
|
52 |
+
window.location.href = url.href;
|
53 |
+
}
|
54 |
+
}
|
55 |
+
"""
|
56 |
+
|
57 |
+
ui_manager = WebuiManager()
|
58 |
+
|
59 |
+
with gr.Blocks(
|
60 |
+
title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js_func,
|
61 |
+
) as demo:
|
62 |
+
with gr.Row():
|
63 |
+
gr.Markdown(
|
64 |
+
"""
|
65 |
+
# 🌐 Browser Use WebUI
|
66 |
+
### Control your browser with AI assistance
|
67 |
+
""",
|
68 |
+
elem_classes=["header-text"],
|
69 |
+
)
|
70 |
+
|
71 |
+
with gr.Tabs() as tabs:
|
72 |
+
with gr.TabItem("⚙️ Agent Settings"):
|
73 |
+
create_agent_settings_tab(ui_manager)
|
74 |
+
|
75 |
+
with gr.TabItem("🌐 Browser Settings"):
|
76 |
+
create_browser_settings_tab(ui_manager)
|
77 |
+
|
78 |
+
with gr.TabItem("🤖 Run Agent"):
|
79 |
+
create_browser_use_agent_tab(ui_manager)
|
80 |
+
|
81 |
+
with gr.TabItem("🎁 Agent Marketplace"):
|
82 |
+
gr.Markdown(
|
83 |
+
"""
|
84 |
+
### Agents built on Browser-Use
|
85 |
+
""",
|
86 |
+
elem_classes=["tab-header-text"],
|
87 |
+
)
|
88 |
+
with gr.Tabs():
|
89 |
+
with gr.TabItem("Deep Research"):
|
90 |
+
create_deep_research_agent_tab(ui_manager)
|
91 |
+
|
92 |
+
with gr.TabItem("📁 Load & Save Config"):
|
93 |
+
create_load_save_config_tab(ui_manager)
|
94 |
+
|
95 |
+
return demo
|
src/webui/webui_manager.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from collections.abc import Generator
|
3 |
+
from typing import TYPE_CHECKING
|
4 |
+
import os
|
5 |
+
import gradio as gr
|
6 |
+
from datetime import datetime
|
7 |
+
from typing import Optional, Dict, List
|
8 |
+
import uuid
|
9 |
+
import asyncio
|
10 |
+
import time
|
11 |
+
|
12 |
+
from gradio.components import Component
|
13 |
+
from browser_use.browser.browser import Browser
|
14 |
+
from browser_use.browser.context import BrowserContext
|
15 |
+
from browser_use.agent.service import Agent
|
16 |
+
from src.browser.custom_browser import CustomBrowser
|
17 |
+
from src.browser.custom_context import CustomBrowserContext
|
18 |
+
from src.controller.custom_controller import CustomController
|
19 |
+
from src.agent.deep_research.deep_research_agent import DeepResearchAgent
|
20 |
+
|
21 |
+
|
22 |
+
class WebuiManager:
|
23 |
+
def __init__(self, settings_save_dir: str = "./tmp/webui_settings"):
|
24 |
+
self.id_to_component: dict[str, Component] = {}
|
25 |
+
self.component_to_id: dict[Component, str] = {}
|
26 |
+
|
27 |
+
self.settings_save_dir = settings_save_dir
|
28 |
+
os.makedirs(self.settings_save_dir, exist_ok=True)
|
29 |
+
|
30 |
+
def init_browser_use_agent(self) -> None:
|
31 |
+
"""
|
32 |
+
init browser use agent
|
33 |
+
"""
|
34 |
+
self.bu_agent: Optional[Agent] = None
|
35 |
+
self.bu_browser: Optional[CustomBrowser] = None
|
36 |
+
self.bu_browser_context: Optional[CustomBrowserContext] = None
|
37 |
+
self.bu_controller: Optional[CustomController] = None
|
38 |
+
self.bu_chat_history: List[Dict[str, Optional[str]]] = []
|
39 |
+
self.bu_response_event: Optional[asyncio.Event] = None
|
40 |
+
self.bu_user_help_response: Optional[str] = None
|
41 |
+
self.bu_current_task: Optional[asyncio.Task] = None
|
42 |
+
self.bu_agent_task_id: Optional[str] = None
|
43 |
+
|
44 |
+
def init_deep_research_agent(self) -> None:
|
45 |
+
"""
|
46 |
+
init deep research agent
|
47 |
+
"""
|
48 |
+
self.dr_agent: Optional[DeepResearchAgent] = None
|
49 |
+
self.dr_current_task = None
|
50 |
+
self.dr_agent_task_id: Optional[str] = None
|
51 |
+
self.dr_save_dir: Optional[str] = None
|
52 |
+
|
53 |
+
def add_components(self, tab_name: str, components_dict: dict[str, "Component"]) -> None:
|
54 |
+
"""
|
55 |
+
Add tab components
|
56 |
+
"""
|
57 |
+
for comp_name, component in components_dict.items():
|
58 |
+
comp_id = f"{tab_name}.{comp_name}"
|
59 |
+
self.id_to_component[comp_id] = component
|
60 |
+
self.component_to_id[component] = comp_id
|
61 |
+
|
62 |
+
def get_components(self) -> list["Component"]:
|
63 |
+
"""
|
64 |
+
Get all components
|
65 |
+
"""
|
66 |
+
return list(self.id_to_component.values())
|
67 |
+
|
68 |
+
def get_component_by_id(self, comp_id: str) -> "Component":
|
69 |
+
"""
|
70 |
+
Get component by id
|
71 |
+
"""
|
72 |
+
return self.id_to_component[comp_id]
|
73 |
+
|
74 |
+
def get_id_by_component(self, comp: "Component") -> str:
|
75 |
+
"""
|
76 |
+
Get id by component
|
77 |
+
"""
|
78 |
+
return self.component_to_id[comp]
|
79 |
+
|
80 |
+
def save_config(self, components: Dict["Component", str]) -> None:
|
81 |
+
"""
|
82 |
+
Save config
|
83 |
+
"""
|
84 |
+
cur_settings = {}
|
85 |
+
for comp in components:
|
86 |
+
if not isinstance(comp, gr.Button) and not isinstance(comp, gr.File) and str(
|
87 |
+
getattr(comp, "interactive", True)).lower() != "false":
|
88 |
+
comp_id = self.get_id_by_component(comp)
|
89 |
+
cur_settings[comp_id] = components[comp]
|
90 |
+
|
91 |
+
config_name = datetime.now().strftime("%Y%m%d-%H%M%S")
|
92 |
+
with open(os.path.join(self.settings_save_dir, f"{config_name}.json"), "w") as fw:
|
93 |
+
json.dump(cur_settings, fw, indent=4)
|
94 |
+
|
95 |
+
return os.path.join(self.settings_save_dir, f"{config_name}.json")
|
96 |
+
|
97 |
+
def load_config(self, config_path: str):
|
98 |
+
"""
|
99 |
+
Load config
|
100 |
+
"""
|
101 |
+
with open(config_path, "r") as fr:
|
102 |
+
ui_settings = json.load(fr)
|
103 |
+
|
104 |
+
update_components = {}
|
105 |
+
for comp_id, comp_val in ui_settings.items():
|
106 |
+
if comp_id in self.id_to_component:
|
107 |
+
comp = self.id_to_component[comp_id]
|
108 |
+
if comp.__class__.__name__ == "Chatbot":
|
109 |
+
update_components[comp] = comp.__class__(value=comp_val, type="messages")
|
110 |
+
else:
|
111 |
+
update_components[comp] = comp.__class__(value=comp_val)
|
112 |
+
if comp_id == "agent_settings.planner_llm_provider":
|
113 |
+
yield update_components # yield provider, let callback run
|
114 |
+
time.sleep(0.1) # wait for Gradio UI callback
|
115 |
+
|
116 |
+
config_status = self.id_to_component["load_save_config.config_status"]
|
117 |
+
update_components.update(
|
118 |
+
{
|
119 |
+
config_status: config_status.__class__(value=f"Successfully loaded config: {config_path}")
|
120 |
+
}
|
121 |
+
)
|
122 |
+
yield update_components
|
supervisord.conf
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[supervisord]
|
2 |
+
user=root
|
3 |
+
nodaemon=true
|
4 |
+
logfile=/dev/stdout
|
5 |
+
logfile_maxbytes=0
|
6 |
+
loglevel=error
|
7 |
+
|
8 |
+
[program:xvfb]
|
9 |
+
command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
|
10 |
+
autorestart=true
|
11 |
+
stdout_logfile=/dev/stdout
|
12 |
+
stdout_logfile_maxbytes=0
|
13 |
+
stderr_logfile=/dev/stderr
|
14 |
+
stderr_logfile_maxbytes=0
|
15 |
+
priority=100
|
16 |
+
startsecs=3
|
17 |
+
stopsignal=TERM
|
18 |
+
stopwaitsecs=10
|
19 |
+
|
20 |
+
[program:vnc_setup]
|
21 |
+
command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
|
22 |
+
autorestart=false
|
23 |
+
startsecs=0
|
24 |
+
priority=150
|
25 |
+
stdout_logfile=/dev/stdout
|
26 |
+
stdout_logfile_maxbytes=0
|
27 |
+
stderr_logfile=/dev/stderr
|
28 |
+
stderr_logfile_maxbytes=0
|
29 |
+
|
30 |
+
[program:x11vnc]
|
31 |
+
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5901 -o /var/log/x11vnc.log"
|
32 |
+
autorestart=true
|
33 |
+
stdout_logfile=/dev/stdout
|
34 |
+
stdout_logfile_maxbytes=0
|
35 |
+
stderr_logfile=/dev/stderr
|
36 |
+
stderr_logfile_maxbytes=0
|
37 |
+
priority=200
|
38 |
+
startretries=10
|
39 |
+
startsecs=10
|
40 |
+
stopsignal=TERM
|
41 |
+
stopwaitsecs=10
|
42 |
+
depends_on=vnc_setup,xvfb
|
43 |
+
|
44 |
+
[program:x11vnc_log]
|
45 |
+
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
|
46 |
+
autorestart=true
|
47 |
+
stdout_logfile=/dev/stdout
|
48 |
+
stdout_logfile_maxbytes=0
|
49 |
+
stderr_logfile=/dev/stderr
|
50 |
+
stderr_logfile_maxbytes=0
|
51 |
+
priority=250
|
52 |
+
stopsignal=TERM
|
53 |
+
stopwaitsecs=5
|
54 |
+
depends_on=x11vnc
|
55 |
+
|
56 |
+
[program:novnc]
|
57 |
+
command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5901 --listen 0.0.0.0:6080 --web /opt/novnc"
|
58 |
+
autorestart=true
|
59 |
+
stdout_logfile=/dev/stdout
|
60 |
+
stdout_logfile_maxbytes=0
|
61 |
+
stderr_logfile=/dev/stderr
|
62 |
+
stderr_logfile_maxbytes=0
|
63 |
+
priority=300
|
64 |
+
startretries=5
|
65 |
+
startsecs=3
|
66 |
+
depends_on=x11vnc
|
67 |
+
|
68 |
+
[program:webui]
|
69 |
+
command=python webui.py --ip 0.0.0.0 --port 7788
|
70 |
+
directory=/app
|
71 |
+
autorestart=true
|
72 |
+
stdout_logfile=/dev/stdout
|
73 |
+
stdout_logfile_maxbytes=0
|
74 |
+
stderr_logfile=/dev/stderr
|
75 |
+
stderr_logfile_maxbytes=0
|
76 |
+
priority=400
|
77 |
+
startretries=3
|
78 |
+
startsecs=3
|
79 |
+
stopsignal=TERM
|
80 |
+
stopwaitsecs=10
|
tests/test_agents.py
ADDED
@@ -0,0 +1,400 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pdb
|
2 |
+
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
import sys
|
7 |
+
|
8 |
+
sys.path.append(".")
|
9 |
+
import asyncio
|
10 |
+
import os
|
11 |
+
import sys
|
12 |
+
from pprint import pprint
|
13 |
+
|
14 |
+
from browser_use import Agent
|
15 |
+
from browser_use.agent.views import AgentHistoryList
|
16 |
+
|
17 |
+
from src.utils import utils
|
18 |
+
|
19 |
+
|
20 |
+
async def test_browser_use_agent():
|
21 |
+
from browser_use.browser.browser import Browser, BrowserConfig
|
22 |
+
from browser_use.browser.context import (
|
23 |
+
BrowserContextConfig
|
24 |
+
)
|
25 |
+
from browser_use.agent.service import Agent
|
26 |
+
|
27 |
+
from src.browser.custom_browser import CustomBrowser
|
28 |
+
from src.controller.custom_controller import CustomController
|
29 |
+
from src.utils import llm_provider
|
30 |
+
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
31 |
+
|
32 |
+
llm = llm_provider.get_llm_model(
|
33 |
+
provider="openai",
|
34 |
+
model_name="gpt-4o",
|
35 |
+
temperature=0.8,
|
36 |
+
)
|
37 |
+
|
38 |
+
# llm = llm_provider.get_llm_model(
|
39 |
+
# provider="google",
|
40 |
+
# model_name="gemini-2.0-flash",
|
41 |
+
# temperature=0.6,
|
42 |
+
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
43 |
+
# )
|
44 |
+
|
45 |
+
# llm = utils.get_llm_model(
|
46 |
+
# provider="deepseek",
|
47 |
+
# model_name="deepseek-reasoner",
|
48 |
+
# temperature=0.8
|
49 |
+
# )
|
50 |
+
|
51 |
+
# llm = utils.get_llm_model(
|
52 |
+
# provider="deepseek",
|
53 |
+
# model_name="deepseek-chat",
|
54 |
+
# temperature=0.8
|
55 |
+
# )
|
56 |
+
|
57 |
+
# llm = utils.get_llm_model(
|
58 |
+
# provider="ollama", model_name="qwen2.5:7b", temperature=0.5
|
59 |
+
# )
|
60 |
+
|
61 |
+
# llm = utils.get_llm_model(
|
62 |
+
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
63 |
+
# )
|
64 |
+
|
65 |
+
window_w, window_h = 1280, 1100
|
66 |
+
|
67 |
+
# llm = llm_provider.get_llm_model(
|
68 |
+
# provider="azure_openai",
|
69 |
+
# model_name="gpt-4o",
|
70 |
+
# temperature=0.5,
|
71 |
+
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
72 |
+
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
73 |
+
# )
|
74 |
+
|
75 |
+
mcp_server_config = {
|
76 |
+
"mcpServers": {
|
77 |
+
# "markitdown": {
|
78 |
+
# "command": "docker",
|
79 |
+
# "args": [
|
80 |
+
# "run",
|
81 |
+
# "--rm",
|
82 |
+
# "-i",
|
83 |
+
# "markitdown-mcp:latest"
|
84 |
+
# ]
|
85 |
+
# },
|
86 |
+
"desktop-commander": {
|
87 |
+
"command": "npx",
|
88 |
+
"args": [
|
89 |
+
"-y",
|
90 |
+
"@wonderwhy-er/desktop-commander"
|
91 |
+
]
|
92 |
+
},
|
93 |
+
}
|
94 |
+
}
|
95 |
+
controller = CustomController()
|
96 |
+
await controller.setup_mcp_client(mcp_server_config)
|
97 |
+
use_own_browser = True
|
98 |
+
use_vision = True # Set to False when using DeepSeek
|
99 |
+
|
100 |
+
max_actions_per_step = 10
|
101 |
+
browser = None
|
102 |
+
browser_context = None
|
103 |
+
|
104 |
+
try:
|
105 |
+
extra_browser_args = []
|
106 |
+
if use_own_browser:
|
107 |
+
browser_binary_path = os.getenv("BROWSER_PATH", None)
|
108 |
+
if browser_binary_path == "":
|
109 |
+
browser_binary_path = None
|
110 |
+
browser_user_data = os.getenv("BROWSER_USER_DATA", None)
|
111 |
+
if browser_user_data:
|
112 |
+
extra_browser_args += [f"--user-data-dir={browser_user_data}"]
|
113 |
+
else:
|
114 |
+
browser_binary_path = None
|
115 |
+
browser = CustomBrowser(
|
116 |
+
config=BrowserConfig(
|
117 |
+
headless=False,
|
118 |
+
browser_binary_path=browser_binary_path,
|
119 |
+
extra_browser_args=extra_browser_args,
|
120 |
+
new_context_config=BrowserContextConfig(
|
121 |
+
window_width=window_w,
|
122 |
+
window_height=window_h,
|
123 |
+
)
|
124 |
+
)
|
125 |
+
)
|
126 |
+
browser_context = await browser.new_context(
|
127 |
+
config=BrowserContextConfig(
|
128 |
+
trace_path=None,
|
129 |
+
save_recording_path=None,
|
130 |
+
save_downloads_path="./tmp/downloads",
|
131 |
+
window_height=window_h,
|
132 |
+
window_width=window_w,
|
133 |
+
)
|
134 |
+
)
|
135 |
+
agent = BrowserUseAgent(
|
136 |
+
# task="download pdf from https://arxiv.org/pdf/2311.16498 and rename this pdf to 'mcp-test.pdf'",
|
137 |
+
task="give me nvidia stock price",
|
138 |
+
llm=llm,
|
139 |
+
browser=browser,
|
140 |
+
browser_context=browser_context,
|
141 |
+
controller=controller,
|
142 |
+
use_vision=use_vision,
|
143 |
+
max_actions_per_step=max_actions_per_step,
|
144 |
+
generate_gif=True
|
145 |
+
)
|
146 |
+
history: AgentHistoryList = await agent.run(max_steps=100)
|
147 |
+
|
148 |
+
print("Final Result:")
|
149 |
+
pprint(history.final_result(), indent=4)
|
150 |
+
|
151 |
+
print("\nErrors:")
|
152 |
+
pprint(history.errors(), indent=4)
|
153 |
+
|
154 |
+
except Exception:
|
155 |
+
import traceback
|
156 |
+
traceback.print_exc()
|
157 |
+
finally:
|
158 |
+
if browser_context:
|
159 |
+
await browser_context.close()
|
160 |
+
if browser:
|
161 |
+
await browser.close()
|
162 |
+
if controller:
|
163 |
+
await controller.close_mcp_client()
|
164 |
+
|
165 |
+
|
166 |
+
async def test_browser_use_parallel():
|
167 |
+
from browser_use.browser.browser import Browser, BrowserConfig
|
168 |
+
from browser_use.browser.context import (
|
169 |
+
BrowserContextConfig,
|
170 |
+
)
|
171 |
+
from browser_use.agent.service import Agent
|
172 |
+
|
173 |
+
from src.browser.custom_browser import CustomBrowser
|
174 |
+
from src.controller.custom_controller import CustomController
|
175 |
+
from src.utils import llm_provider
|
176 |
+
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
177 |
+
|
178 |
+
# llm = utils.get_llm_model(
|
179 |
+
# provider="openai",
|
180 |
+
# model_name="gpt-4o",
|
181 |
+
# temperature=0.8,
|
182 |
+
# base_url=os.getenv("OPENAI_ENDPOINT", ""),
|
183 |
+
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
184 |
+
# )
|
185 |
+
|
186 |
+
# llm = utils.get_llm_model(
|
187 |
+
# provider="google",
|
188 |
+
# model_name="gemini-2.0-flash",
|
189 |
+
# temperature=0.6,
|
190 |
+
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
191 |
+
# )
|
192 |
+
|
193 |
+
# llm = utils.get_llm_model(
|
194 |
+
# provider="deepseek",
|
195 |
+
# model_name="deepseek-reasoner",
|
196 |
+
# temperature=0.8
|
197 |
+
# )
|
198 |
+
|
199 |
+
# llm = utils.get_llm_model(
|
200 |
+
# provider="deepseek",
|
201 |
+
# model_name="deepseek-chat",
|
202 |
+
# temperature=0.8
|
203 |
+
# )
|
204 |
+
|
205 |
+
# llm = utils.get_llm_model(
|
206 |
+
# provider="ollama", model_name="qwen2.5:7b", temperature=0.5
|
207 |
+
# )
|
208 |
+
|
209 |
+
# llm = utils.get_llm_model(
|
210 |
+
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
211 |
+
# )
|
212 |
+
|
213 |
+
window_w, window_h = 1280, 1100
|
214 |
+
|
215 |
+
llm = llm_provider.get_llm_model(
|
216 |
+
provider="azure_openai",
|
217 |
+
model_name="gpt-4o",
|
218 |
+
temperature=0.5,
|
219 |
+
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
220 |
+
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
221 |
+
)
|
222 |
+
|
223 |
+
mcp_server_config = {
|
224 |
+
"mcpServers": {
|
225 |
+
# "markitdown": {
|
226 |
+
# "command": "docker",
|
227 |
+
# "args": [
|
228 |
+
# "run",
|
229 |
+
# "--rm",
|
230 |
+
# "-i",
|
231 |
+
# "markitdown-mcp:latest"
|
232 |
+
# ]
|
233 |
+
# },
|
234 |
+
"desktop-commander": {
|
235 |
+
"command": "npx",
|
236 |
+
"args": [
|
237 |
+
"-y",
|
238 |
+
"@wonderwhy-er/desktop-commander"
|
239 |
+
]
|
240 |
+
},
|
241 |
+
# "filesystem": {
|
242 |
+
# "command": "npx",
|
243 |
+
# "args": [
|
244 |
+
# "-y",
|
245 |
+
# "@modelcontextprotocol/server-filesystem",
|
246 |
+
# "/Users/xxx/ai_workspace",
|
247 |
+
# ]
|
248 |
+
# },
|
249 |
+
}
|
250 |
+
}
|
251 |
+
controller = CustomController()
|
252 |
+
await controller.setup_mcp_client(mcp_server_config)
|
253 |
+
use_own_browser = True
|
254 |
+
use_vision = True # Set to False when using DeepSeek
|
255 |
+
|
256 |
+
max_actions_per_step = 10
|
257 |
+
browser = None
|
258 |
+
browser_context = None
|
259 |
+
|
260 |
+
try:
|
261 |
+
extra_browser_args = []
|
262 |
+
if use_own_browser:
|
263 |
+
browser_binary_path = os.getenv("BROWSER_PATH", None)
|
264 |
+
if browser_binary_path == "":
|
265 |
+
browser_binary_path = None
|
266 |
+
browser_user_data = os.getenv("BROWSER_USER_DATA", None)
|
267 |
+
if browser_user_data:
|
268 |
+
extra_browser_args += [f"--user-data-dir={browser_user_data}"]
|
269 |
+
else:
|
270 |
+
browser_binary_path = None
|
271 |
+
browser = CustomBrowser(
|
272 |
+
config=BrowserConfig(
|
273 |
+
headless=False,
|
274 |
+
browser_binary_path=browser_binary_path,
|
275 |
+
extra_browser_args=extra_browser_args,
|
276 |
+
new_context_config=BrowserContextConfig(
|
277 |
+
window_width=window_w,
|
278 |
+
window_height=window_h,
|
279 |
+
)
|
280 |
+
)
|
281 |
+
)
|
282 |
+
browser_context = await browser.new_context(
|
283 |
+
config=BrowserContextConfig(
|
284 |
+
trace_path=None,
|
285 |
+
save_recording_path=None,
|
286 |
+
save_downloads_path="./tmp/downloads",
|
287 |
+
window_height=window_h,
|
288 |
+
window_width=window_w,
|
289 |
+
force_new_context=True
|
290 |
+
)
|
291 |
+
)
|
292 |
+
agents = [
|
293 |
+
BrowserUseAgent(task=task, llm=llm, browser=browser, controller=controller)
|
294 |
+
for task in [
|
295 |
+
'Search Google for weather in Tokyo',
|
296 |
+
# 'Check Reddit front page title',
|
297 |
+
# 'Find NASA image of the day',
|
298 |
+
# 'Check top story on CNN',
|
299 |
+
# 'Search latest SpaceX launch date',
|
300 |
+
# 'Look up population of Paris',
|
301 |
+
'Find current time in Sydney',
|
302 |
+
'Check who won last Super Bowl',
|
303 |
+
# 'Search trending topics on Twitter',
|
304 |
+
]
|
305 |
+
]
|
306 |
+
|
307 |
+
history = await asyncio.gather(*[agent.run() for agent in agents])
|
308 |
+
print("Final Result:")
|
309 |
+
pprint(history.final_result(), indent=4)
|
310 |
+
|
311 |
+
print("\nErrors:")
|
312 |
+
pprint(history.errors(), indent=4)
|
313 |
+
|
314 |
+
pdb.set_trace()
|
315 |
+
|
316 |
+
except Exception:
|
317 |
+
import traceback
|
318 |
+
|
319 |
+
traceback.print_exc()
|
320 |
+
finally:
|
321 |
+
if browser_context:
|
322 |
+
await browser_context.close()
|
323 |
+
if browser:
|
324 |
+
await browser.close()
|
325 |
+
if controller:
|
326 |
+
await controller.close_mcp_client()
|
327 |
+
|
328 |
+
|
329 |
+
async def test_deep_research_agent():
|
330 |
+
from src.agent.deep_research.deep_research_agent import DeepResearchAgent, PLAN_FILENAME, REPORT_FILENAME
|
331 |
+
from src.utils import llm_provider
|
332 |
+
|
333 |
+
llm = llm_provider.get_llm_model(
|
334 |
+
provider="openai",
|
335 |
+
model_name="gpt-4o",
|
336 |
+
temperature=0.5
|
337 |
+
)
|
338 |
+
|
339 |
+
# llm = llm_provider.get_llm_model(
|
340 |
+
# provider="bedrock",
|
341 |
+
# )
|
342 |
+
|
343 |
+
mcp_server_config = {
|
344 |
+
"mcpServers": {
|
345 |
+
"desktop-commander": {
|
346 |
+
"command": "npx",
|
347 |
+
"args": [
|
348 |
+
"-y",
|
349 |
+
"@wonderwhy-er/desktop-commander"
|
350 |
+
]
|
351 |
+
},
|
352 |
+
}
|
353 |
+
}
|
354 |
+
|
355 |
+
browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
|
356 |
+
agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
|
357 |
+
research_topic = "Give me investment advices of nvidia and tesla."
|
358 |
+
task_id_to_resume = "" # Set this to resume a previous task ID
|
359 |
+
|
360 |
+
print(f"Starting research on: {research_topic}")
|
361 |
+
|
362 |
+
try:
|
363 |
+
# Call run and wait for the final result dictionary
|
364 |
+
result = await agent.run(research_topic,
|
365 |
+
task_id=task_id_to_resume,
|
366 |
+
save_dir="./tmp/deep_research",
|
367 |
+
max_parallel_browsers=1,
|
368 |
+
)
|
369 |
+
|
370 |
+
print("\n--- Research Process Ended ---")
|
371 |
+
print(f"Status: {result.get('status')}")
|
372 |
+
print(f"Message: {result.get('message')}")
|
373 |
+
print(f"Task ID: {result.get('task_id')}")
|
374 |
+
|
375 |
+
# Check the final state for the report
|
376 |
+
final_state = result.get('final_state', {})
|
377 |
+
if final_state:
|
378 |
+
print("\n--- Final State Summary ---")
|
379 |
+
print(
|
380 |
+
f" Plan Steps Completed: {sum(1 for item in final_state.get('research_plan', []) if item.get('status') == 'completed')}")
|
381 |
+
print(f" Total Search Results Logged: {len(final_state.get('search_results', []))}")
|
382 |
+
if final_state.get("final_report"):
|
383 |
+
print(" Final Report: Generated (content omitted). You can find it in the output directory.")
|
384 |
+
# print("\n--- Final Report ---") # Optionally print report
|
385 |
+
# print(final_state["final_report"])
|
386 |
+
else:
|
387 |
+
print(" Final Report: Not generated.")
|
388 |
+
else:
|
389 |
+
print("Final state information not available.")
|
390 |
+
|
391 |
+
|
392 |
+
except Exception as e:
|
393 |
+
print(f"\n--- An unhandled error occurred outside the agent run ---")
|
394 |
+
print(e)
|
395 |
+
|
396 |
+
|
397 |
+
if __name__ == "__main__":
|
398 |
+
asyncio.run(test_browser_use_agent())
|
399 |
+
# asyncio.run(test_browser_use_parallel())
|
400 |
+
# asyncio.run(test_deep_research_agent())
|
tests/test_controller.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import pdb
|
3 |
+
import sys
|
4 |
+
import time
|
5 |
+
|
6 |
+
sys.path.append(".")
|
7 |
+
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
|
13 |
+
async def test_mcp_client():
|
14 |
+
from src.utils.mcp_client import setup_mcp_client_and_tools, create_tool_param_model
|
15 |
+
|
16 |
+
test_server_config = {
|
17 |
+
"mcpServers": {
|
18 |
+
# "markitdown": {
|
19 |
+
# "command": "docker",
|
20 |
+
# "args": [
|
21 |
+
# "run",
|
22 |
+
# "--rm",
|
23 |
+
# "-i",
|
24 |
+
# "markitdown-mcp:latest"
|
25 |
+
# ]
|
26 |
+
# },
|
27 |
+
"desktop-commander": {
|
28 |
+
"command": "npx",
|
29 |
+
"args": [
|
30 |
+
"-y",
|
31 |
+
"@wonderwhy-er/desktop-commander"
|
32 |
+
]
|
33 |
+
},
|
34 |
+
# "filesystem": {
|
35 |
+
# "command": "npx",
|
36 |
+
# "args": [
|
37 |
+
# "-y",
|
38 |
+
# "@modelcontextprotocol/server-filesystem",
|
39 |
+
# "/Users/xxx/ai_workspace",
|
40 |
+
# ]
|
41 |
+
# },
|
42 |
+
}
|
43 |
+
}
|
44 |
+
|
45 |
+
mcp_tools, mcp_client = await setup_mcp_client_and_tools(test_server_config)
|
46 |
+
|
47 |
+
for tool in mcp_tools:
|
48 |
+
tool_param_model = create_tool_param_model(tool)
|
49 |
+
print(tool.name)
|
50 |
+
print(tool.description)
|
51 |
+
print(tool_param_model.model_json_schema())
|
52 |
+
pdb.set_trace()
|
53 |
+
|
54 |
+
|
55 |
+
async def test_controller_with_mcp():
|
56 |
+
import os
|
57 |
+
from src.controller.custom_controller import CustomController
|
58 |
+
from browser_use.controller.registry.views import ActionModel
|
59 |
+
|
60 |
+
mcp_server_config = {
|
61 |
+
"mcpServers": {
|
62 |
+
# "markitdown": {
|
63 |
+
# "command": "docker",
|
64 |
+
# "args": [
|
65 |
+
# "run",
|
66 |
+
# "--rm",
|
67 |
+
# "-i",
|
68 |
+
# "markitdown-mcp:latest"
|
69 |
+
# ]
|
70 |
+
# },
|
71 |
+
"desktop-commander": {
|
72 |
+
"command": "npx",
|
73 |
+
"args": [
|
74 |
+
"-y",
|
75 |
+
"@wonderwhy-er/desktop-commander"
|
76 |
+
]
|
77 |
+
},
|
78 |
+
# "filesystem": {
|
79 |
+
# "command": "npx",
|
80 |
+
# "args": [
|
81 |
+
# "-y",
|
82 |
+
# "@modelcontextprotocol/server-filesystem",
|
83 |
+
# "/Users/xxx/ai_workspace",
|
84 |
+
# ]
|
85 |
+
# },
|
86 |
+
}
|
87 |
+
}
|
88 |
+
|
89 |
+
controller = CustomController()
|
90 |
+
await controller.setup_mcp_client(mcp_server_config)
|
91 |
+
action_name = "mcp.desktop-commander.execute_command"
|
92 |
+
action_info = controller.registry.registry.actions[action_name]
|
93 |
+
param_model = action_info.param_model
|
94 |
+
print(param_model.model_json_schema())
|
95 |
+
params = {"command": f"python ./tmp/test.py"
|
96 |
+
}
|
97 |
+
validated_params = param_model(**params)
|
98 |
+
ActionModel_ = controller.registry.create_action_model()
|
99 |
+
# Create ActionModel instance with the validated parameters
|
100 |
+
action_model = ActionModel_(**{action_name: validated_params})
|
101 |
+
result = await controller.act(action_model)
|
102 |
+
result = result.extracted_content
|
103 |
+
print(result)
|
104 |
+
if result and "Command is still running. Use read_output to get more output." in result and "PID" in \
|
105 |
+
result.split("\n")[0]:
|
106 |
+
pid = int(result.split("\n")[0].split("PID")[-1].strip())
|
107 |
+
action_name = "mcp.desktop-commander.read_output"
|
108 |
+
action_info = controller.registry.registry.actions[action_name]
|
109 |
+
param_model = action_info.param_model
|
110 |
+
print(param_model.model_json_schema())
|
111 |
+
params = {"pid": pid}
|
112 |
+
validated_params = param_model(**params)
|
113 |
+
action_model = ActionModel_(**{action_name: validated_params})
|
114 |
+
output_result = ""
|
115 |
+
while True:
|
116 |
+
time.sleep(1)
|
117 |
+
result = await controller.act(action_model)
|
118 |
+
result = result.extracted_content
|
119 |
+
if result:
|
120 |
+
pdb.set_trace()
|
121 |
+
output_result = result
|
122 |
+
break
|
123 |
+
print(output_result)
|
124 |
+
pdb.set_trace()
|
125 |
+
await controller.close_mcp_client()
|
126 |
+
pdb.set_trace()
|
127 |
+
|
128 |
+
|
129 |
+
if __name__ == '__main__':
|
130 |
+
# asyncio.run(test_mcp_client())
|
131 |
+
asyncio.run(test_controller_with_mcp())
|
tests/test_llm_api.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pdb
|
3 |
+
from dataclasses import dataclass
|
4 |
+
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
7 |
+
from langchain_ollama import ChatOllama
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
import sys
|
12 |
+
|
13 |
+
sys.path.append(".")
|
14 |
+
|
15 |
+
|
16 |
+
@dataclass
|
17 |
+
class LLMConfig:
|
18 |
+
provider: str
|
19 |
+
model_name: str
|
20 |
+
temperature: float = 0.8
|
21 |
+
base_url: str = None
|
22 |
+
api_key: str = None
|
23 |
+
|
24 |
+
|
25 |
+
def create_message_content(text, image_path=None):
|
26 |
+
content = [{"type": "text", "text": text}]
|
27 |
+
image_format = "png" if image_path and image_path.endswith(".png") else "jpeg"
|
28 |
+
if image_path:
|
29 |
+
from src.utils import utils
|
30 |
+
image_data = utils.encode_image(image_path)
|
31 |
+
content.append({
|
32 |
+
"type": "image_url",
|
33 |
+
"image_url": {"url": f"data:image/{image_format};base64,{image_data}"}
|
34 |
+
})
|
35 |
+
return content
|
36 |
+
|
37 |
+
|
38 |
+
def get_env_value(key, provider):
|
39 |
+
env_mappings = {
|
40 |
+
"openai": {"api_key": "OPENAI_API_KEY", "base_url": "OPENAI_ENDPOINT"},
|
41 |
+
"azure_openai": {"api_key": "AZURE_OPENAI_API_KEY", "base_url": "AZURE_OPENAI_ENDPOINT"},
|
42 |
+
"google": {"api_key": "GOOGLE_API_KEY"},
|
43 |
+
"deepseek": {"api_key": "DEEPSEEK_API_KEY", "base_url": "DEEPSEEK_ENDPOINT"},
|
44 |
+
"mistral": {"api_key": "MISTRAL_API_KEY", "base_url": "MISTRAL_ENDPOINT"},
|
45 |
+
"alibaba": {"api_key": "ALIBABA_API_KEY", "base_url": "ALIBABA_ENDPOINT"},
|
46 |
+
"moonshot": {"api_key": "MOONSHOT_API_KEY", "base_url": "MOONSHOT_ENDPOINT"},
|
47 |
+
"ibm": {"api_key": "IBM_API_KEY", "base_url": "IBM_ENDPOINT"}
|
48 |
+
}
|
49 |
+
|
50 |
+
if provider in env_mappings and key in env_mappings[provider]:
|
51 |
+
return os.getenv(env_mappings[provider][key], "")
|
52 |
+
return ""
|
53 |
+
|
54 |
+
|
55 |
+
def test_llm(config, query, image_path=None, system_message=None):
|
56 |
+
from src.utils import utils, llm_provider
|
57 |
+
|
58 |
+
# Special handling for Ollama-based models
|
59 |
+
if config.provider == "ollama":
|
60 |
+
if "deepseek-r1" in config.model_name:
|
61 |
+
from src.utils.llm_provider import DeepSeekR1ChatOllama
|
62 |
+
llm = DeepSeekR1ChatOllama(model=config.model_name)
|
63 |
+
else:
|
64 |
+
llm = ChatOllama(model=config.model_name)
|
65 |
+
|
66 |
+
ai_msg = llm.invoke(query)
|
67 |
+
print(ai_msg.content)
|
68 |
+
if "deepseek-r1" in config.model_name:
|
69 |
+
pdb.set_trace()
|
70 |
+
return
|
71 |
+
|
72 |
+
# For other providers, use the standard configuration
|
73 |
+
llm = llm_provider.get_llm_model(
|
74 |
+
provider=config.provider,
|
75 |
+
model_name=config.model_name,
|
76 |
+
temperature=config.temperature,
|
77 |
+
base_url=config.base_url or get_env_value("base_url", config.provider),
|
78 |
+
api_key=config.api_key or get_env_value("api_key", config.provider)
|
79 |
+
)
|
80 |
+
|
81 |
+
# Prepare messages for non-Ollama models
|
82 |
+
messages = []
|
83 |
+
if system_message:
|
84 |
+
messages.append(SystemMessage(content=create_message_content(system_message)))
|
85 |
+
messages.append(HumanMessage(content=create_message_content(query, image_path)))
|
86 |
+
ai_msg = llm.invoke(messages)
|
87 |
+
|
88 |
+
# Handle different response types
|
89 |
+
if hasattr(ai_msg, "reasoning_content"):
|
90 |
+
print(ai_msg.reasoning_content)
|
91 |
+
print(ai_msg.content)
|
92 |
+
|
93 |
+
def test_openai_model():
|
94 |
+
config = LLMConfig(provider="openai", model_name="gpt-4o")
|
95 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
96 |
+
|
97 |
+
|
98 |
+
def test_google_model():
|
99 |
+
# Enable your API key first if you haven't: https://ai.google.dev/palm_docs/oauth_quickstart
|
100 |
+
config = LLMConfig(provider="google", model_name="gemini-2.0-flash-exp")
|
101 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
102 |
+
|
103 |
+
|
104 |
+
def test_azure_openai_model():
|
105 |
+
config = LLMConfig(provider="azure_openai", model_name="gpt-4o")
|
106 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
107 |
+
|
108 |
+
|
109 |
+
def test_deepseek_model():
|
110 |
+
config = LLMConfig(provider="deepseek", model_name="deepseek-chat")
|
111 |
+
test_llm(config, "Who are you?")
|
112 |
+
|
113 |
+
|
114 |
+
def test_deepseek_r1_model():
|
115 |
+
config = LLMConfig(provider="deepseek", model_name="deepseek-reasoner")
|
116 |
+
test_llm(config, "Which is greater, 9.11 or 9.8?", system_message="You are a helpful AI assistant.")
|
117 |
+
|
118 |
+
|
119 |
+
def test_ollama_model():
|
120 |
+
config = LLMConfig(provider="ollama", model_name="qwen2.5:7b")
|
121 |
+
test_llm(config, "Sing a ballad of LangChain.")
|
122 |
+
|
123 |
+
|
124 |
+
def test_deepseek_r1_ollama_model():
|
125 |
+
config = LLMConfig(provider="ollama", model_name="deepseek-r1:14b")
|
126 |
+
test_llm(config, "How many 'r's are in the word 'strawberry'?")
|
127 |
+
|
128 |
+
|
129 |
+
def test_mistral_model():
|
130 |
+
config = LLMConfig(provider="mistral", model_name="pixtral-large-latest")
|
131 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
132 |
+
|
133 |
+
|
134 |
+
def test_moonshot_model():
|
135 |
+
config = LLMConfig(provider="moonshot", model_name="moonshot-v1-32k-vision-preview")
|
136 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
137 |
+
|
138 |
+
|
139 |
+
def test_ibm_model():
|
140 |
+
config = LLMConfig(provider="ibm", model_name="meta-llama/llama-4-maverick-17b-128e-instruct-fp8")
|
141 |
+
test_llm(config, "Describe this image", "assets/examples/test.png")
|
142 |
+
|
143 |
+
|
144 |
+
def test_qwen_model():
|
145 |
+
config = LLMConfig(provider="alibaba", model_name="qwen-vl-max")
|
146 |
+
test_llm(config, "How many 'r's are in the word 'strawberry'?")
|
147 |
+
|
148 |
+
|
149 |
+
if __name__ == "__main__":
|
150 |
+
# test_openai_model()
|
151 |
+
# test_google_model()
|
152 |
+
test_azure_openai_model()
|
153 |
+
# test_deepseek_model()
|
154 |
+
# test_ollama_model()
|
155 |
+
# test_deepseek_r1_model()
|
156 |
+
# test_deepseek_r1_ollama_model()
|
157 |
+
# test_mistral_model()
|
158 |
+
# test_ibm_model()
|
159 |
+
# test_qwen_model()
|
tests/test_playwright.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pdb
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
load_dotenv()
|
5 |
+
|
6 |
+
|
7 |
+
def test_connect_browser():
|
8 |
+
import os
|
9 |
+
from playwright.sync_api import sync_playwright
|
10 |
+
|
11 |
+
chrome_exe = os.getenv("CHROME_PATH", "")
|
12 |
+
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
|
13 |
+
|
14 |
+
with sync_playwright() as p:
|
15 |
+
browser = p.chromium.launch_persistent_context(
|
16 |
+
user_data_dir=chrome_use_data,
|
17 |
+
executable_path=chrome_exe,
|
18 |
+
headless=False # Keep browser window visible
|
19 |
+
)
|
20 |
+
|
21 |
+
page = browser.new_page()
|
22 |
+
page.goto("https://mail.google.com/mail/u/0/#inbox")
|
23 |
+
page.wait_for_load_state()
|
24 |
+
|
25 |
+
input("Press the Enter key to close the browser...")
|
26 |
+
|
27 |
+
browser.close()
|
28 |
+
|
29 |
+
|
30 |
+
if __name__ == '__main__':
|
31 |
+
test_connect_browser()
|
webui.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
load_dotenv()
|
3 |
+
import argparse
|
4 |
+
from src.webui.interface import theme_map, create_ui
|
5 |
+
|
6 |
+
|
7 |
+
def main():
|
8 |
+
parser = argparse.ArgumentParser(description="Gradio WebUI for Browser Agent")
|
9 |
+
parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
|
10 |
+
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
|
11 |
+
parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
|
12 |
+
args = parser.parse_args()
|
13 |
+
|
14 |
+
demo = create_ui(theme_name=args.theme)
|
15 |
+
demo.queue().launch(server_name=args.ip, server_port=args.port)
|
16 |
+
|
17 |
+
|
18 |
+
if __name__ == '__main__':
|
19 |
+
main()
|