Spaces:

asifHuggingFace
/

webui

Running

App Files Files Community

asifHuggingFace commited on 8 days ago

Commit

405ca38

1 Parent(s): a4ca470

Add Git LFS support and migrate binary files

Browse files

Files changed (43) hide show

.dockerignore +5 -0
.env.example +68 -0
.github/workflows/build.yml +124 -0
.gitignore +192 -0
.vscode/settings.json +11 -0
Dockerfile +99 -0
LICENSE +21 -0
README_DEPLOYMENT.md +70 -0
SECURITY.md +19 -0
app.py +17 -0
assets/examples/test.png +3 -0
assets/web-ui.png +3 -0
docker-compose.yml +80 -0
requirements.txt +10 -0
src/__init__.py +0 -0
src/agent/__init__.py +0 -0
src/agent/browser_use/browser_use_agent.py +169 -0
src/agent/deep_research/deep_research_agent.py +1261 -0
src/browser/__init__.py +0 -0
src/browser/custom_browser.py +109 -0
src/browser/custom_context.py +22 -0
src/controller/__init__.py +0 -0
src/controller/custom_controller.py +182 -0
src/utils/__init__.py +0 -0
src/utils/config.py +100 -0
src/utils/llm_provider.py +354 -0
src/utils/mcp_client.py +254 -0
src/utils/utils.py +39 -0
src/webui/__init__.py +0 -0
src/webui/components/__init__.py +0 -0
src/webui/components/agent_settings_tab.py +269 -0
src/webui/components/browser_settings_tab.py +161 -0
src/webui/components/browser_use_agent_tab.py +1083 -0
src/webui/components/deep_research_agent_tab.py +457 -0
src/webui/components/load_save_config_tab.py +50 -0
src/webui/interface.py +95 -0
src/webui/webui_manager.py +122 -0
supervisord.conf +80 -0
tests/test_agents.py +400 -0
tests/test_controller.py +131 -0
tests/test_llm_api.py +159 -0
tests/test_playwright.py +31 -0
webui.py +19 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,5 @@

+data
+tmp
+results
+.env

.env.example ADDED Viewed

	@@ -0,0 +1,68 @@

+OPENAI_ENDPOINT=https://api.openai.com/v1
+OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
+ANTHROPIC_ENDPOINT=https://api.anthropic.com
+GOOGLE_API_KEY=
+AZURE_OPENAI_ENDPOINT=
+AZURE_OPENAI_API_KEY=
+AZURE_OPENAI_API_VERSION=2025-01-01-preview
+DEEPSEEK_ENDPOINT=https://api.deepseek.com
+DEEPSEEK_API_KEY=
+MISTRAL_API_KEY=
+MISTRAL_ENDPOINT=https://api.mistral.ai/v1
+OLLAMA_ENDPOINT=http://localhost:11434
+ALIBABA_ENDPOINT=https://dashscope.aliyuncs.com/compatible-mode/v1
+ALIBABA_API_KEY=
+MOONSHOT_ENDPOINT=https://api.moonshot.cn/v1
+MOONSHOT_API_KEY=
+UNBOUND_ENDPOINT=https://api.getunbound.ai
+UNBOUND_API_KEY=
+SiliconFLOW_ENDPOINT=https://api.siliconflow.cn/v1/
+SiliconFLOW_API_KEY=
+IBM_ENDPOINT=https://us-south.ml.cloud.ibm.com
+IBM_API_KEY=
+IBM_PROJECT_ID=
+GROK_ENDPOINT="https://api.x.ai/v1"
+GROK_API_KEY=
+#set default LLM
+DEFAULT_LLM=openai
+# Set to false to disable anonymized telemetry
+ANONYMIZED_TELEMETRY=false
+# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
+BROWSER_USE_LOGGING_LEVEL=info
+# Browser settings
+BROWSER_PATH=
+BROWSER_USER_DATA=
+BROWSER_DEBUGGING_PORT=9222
+BROWSER_DEBUGGING_HOST=localhost
+# Set to true to keep browser open between AI tasks
+KEEP_BROWSER_OPEN=true
+USE_OWN_BROWSER=false
+BROWSER_CDP=
+# Display settings
+# Format: WIDTHxHEIGHTxDEPTH
+RESOLUTION=1920x1080x24
+# Width in pixels
+RESOLUTION_WIDTH=1920
+# Height in pixels
+RESOLUTION_HEIGHT=1080
+# VNC settings
+VNC_PASSWORD=youvncpassword

.github/workflows/build.yml ADDED Viewed

	@@ -0,0 +1,124 @@

+name: Build Docker Image
+on:
+  release:
+    types: [published]
+  push:
+    branches: [main]
+env:
+  GITHUB_CR_REPO: ghcr.io/${{ github.repository }}
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - linux/amd64
+          - linux/arm64
+    steps:
+      - name: Prepare
+        run: |
+          platform=${{ matrix.platform }}
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ${{ env.GITHUB_CR_REPO }}
+      - name: Login to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Build and push by digest
+        id: build
+        uses: docker/build-push-action@v6
+        with:
+          platforms: ${{ matrix.platform }}
+          labels: ${{ steps.meta.outputs.labels }}
+          tags: |
+            ${{ env.GITHUB_CR_REPO }}
+          build-args: |
+            TARGETPLATFORM=${{ matrix.platform }}
+          outputs: type=image,push-by-digest=true,name-canonical=true,push=true
+      - name: Export digest
+        run: |
+          mkdir -p ${{ runner.temp }}/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "${{ runner.temp }}/digests/${digest#sha256:}"
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: digests-${{ env.PLATFORM_PAIR }}
+          path: ${{ runner.temp }}/digests/*
+          if-no-files-found: error
+          retention-days: 1
+  merge:
+    runs-on: ubuntu-latest
+    needs:
+      - build
+    steps:
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          path: ${{ runner.temp }}/digests
+          pattern: digests-*
+          merge-multiple: true
+      - name: Login to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ${{ env.GITHUB_CR_REPO }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}
+      - name: Docker tags
+        run: |
+          tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")
+          if [ -z "$tags" ]; then
+            echo "DOCKER_METADATA_OUTPUT_VERSION=${{ github.ref_name }}" >> $GITHUB_ENV
+            tags="-t ${{ env.GITHUB_CR_REPO }}:${{ github.ref_name }}"
+          fi
+          echo "DOCKER_METADATA_TAGS=$tags" >> $GITHUB_ENV
+      - name: Create manifest list and push
+        working-directory: ${{ runner.temp }}/digests
+        run: |
+          docker buildx imagetools create ${{ env.DOCKER_METADATA_TAGS }} \
+            $(printf '${{ env.GITHUB_CR_REPO }}@sha256:%s ' *)
+      - name: Inspect image
+        run: |
+          docker buildx imagetools inspect ${{ env.GITHUB_CR_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION }}

.gitignore ADDED Viewed

	@@ -0,0 +1,192 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+test_env/
+myenv
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+temp
+tmp
+.DS_Store
+private_example.py
+private_example
+browser_cookies.json
+cookies.json
+AgentHistory.json
+cv_04_24.pdf
+AgentHistoryList.json
+*.gif
+# For Sharing (.pem files)
+.gradio/
+# For Docker
+data/
+# For Config Files (Current Settings)
+.config.pkl
+*.pdf
+workflow

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "python.analysis.typeCheckingMode": "basic",
+  "[python]": {
+    "editor.defaultFormatter": "charliermarsh.ruff",
+    "editor.formatOnSave": true,
+    "editor.codeActionsOnSave": {
+      "source.fixAll.ruff": "explicit",
+      "source.organizeImports.ruff": "explicit"
+    }
+  }
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,99 @@

+FROM python:3.11-slim
+# Set platform for multi-arch builds (Docker Buildx will set this)
+ARG TARGETPLATFORM
+ARG NODE_MAJOR=20
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    wget \
+    netcat-traditional \
+    gnupg \
+    curl \
+    unzip \
+    xvfb \
+    libgconf-2-4 \
+    libxss1 \
+    libnss3 \
+    libnspr4 \
+    libasound2 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdbus-1-3 \
+    libdrm2 \
+    libgbm1 \
+    libgtk-3-0 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxfixes3 \
+    libxrandr2 \
+    xdg-utils \
+    fonts-liberation \
+    dbus \
+    xauth \
+    x11vnc \
+    tigervnc-tools \
+    supervisor \
+    net-tools \
+    procps \
+    git \
+    python3-numpy \
+    fontconfig \
+    fonts-dejavu \
+    fonts-dejavu-core \
+    fonts-dejavu-extra \
+    vim \
+    && rm -rf /var/lib/apt/lists/*
+# Install noVNC
+RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
+    && git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
+    && ln -s /opt/novnc/vnc.html /opt/novnc/index.html
+# Install Node.js using NodeSource PPA
+RUN mkdir -p /etc/apt/keyrings \
+    && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
+    && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
+    && apt-get update \
+    && apt-get install nodejs -y \
+    && rm -rf /var/lib/apt/lists/*
+# Verify Node.js and npm installation (optional, but good for debugging)
+RUN node -v && npm -v && npx -v
+# Set up working directory
+WORKDIR /app
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Install playwright browsers and dependencies
+# playwright documentation suggests PLAYWRIGHT_BROWSERS_PATH is still relevant
+# or that playwright installs to a similar default location that Playwright would.
+# Let's assume playwright respects PLAYWRIGHT_BROWSERS_PATH or its default install location is findable.
+ENV PLAYWRIGHT_BROWSERS_PATH=/ms-browsers
+RUN mkdir -p $PLAYWRIGHT_BROWSERS_PATH
+# Install recommended: Google Chrome (instead of just Chromium for better undetectability)
+# The 'playwright install chrome' command might download and place it.
+# The '--with-deps' equivalent for playwright install is to run 'playwright install-deps chrome' after.
+# RUN playwright install chrome --with-deps
+# Alternative: Install Chromium if Google Chrome is problematic in certain environments
+RUN playwright install chromium --with-deps
+# Copy the application code
+COPY . .
+# Set up supervisor configuration
+RUN mkdir -p /var/log/supervisor
+COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+EXPOSE 7788 6080 5901 9222
+CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
+#CMD ["/bin/bash"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Browser Use Inc.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README_DEPLOYMENT.md ADDED Viewed

	@@ -0,0 +1,70 @@

+# Browser Automation WebUI - Deployment Guide
+## Deploying to Hugging Face Spaces
+### Prerequisites
+- A Hugging Face account
+- Your code pushed to a Git repository
+### Steps to Deploy
+1. **Create a new Space on Hugging Face**
+   - Go to https://huggingface.co/spaces
+   - Click "Create new Space"
+   - Choose "Gradio" as the SDK
+   - Select your repository or create a new one
+2. **File Structure for Deployment**
+   ```
+   web-ui/
+   ├── app.py              # Main entry point (created)
+   ├── requirements.txt    # Dependencies
+   ├── src/               # Source code
+   └── README.md          # Documentation
+   ```
+3. **Key Files for Deployment**
+   - `app.py`: Main entry point that Gradio will use
+   - `requirements.txt`: All necessary dependencies
+   - `src/`: Your source code directory
+### Troubleshooting the "Failed to canonicalize script path" Error
+This error typically occurs when:
+- Gradio can't find the main entry point
+- Import paths are not properly configured
+- File structure doesn't match deployment expectations
+**Solution**: The `app.py` file has been created to serve as the proper entry point for Gradio deployment.
+### Environment Variables
+If your app requires environment variables, you can set them in the Hugging Face Space settings:
+- Go to your Space settings
+- Navigate to "Repository secrets"
+- Add any required environment variables
+### Local Testing
+To test the deployment locally before pushing:
+```bash
+cd web-ui
+python app.py
+```
+This should start the Gradio interface without the canonicalization error.
+### Common Issues and Solutions
+1. **Import Errors**: Make sure all imports use relative paths from the project root
+2. **Missing Dependencies**: Ensure all packages are listed in `requirements.txt`
+3. **Path Issues**: The `app.py` file includes proper path configuration
+### Deployment Checklist
+- [ ] `app.py` exists and is properly configured
+- [ ] All dependencies are in `requirements.txt`
+- [ ] All import paths are correct
+- [ ] Environment variables are configured (if needed)
+- [ ] Local testing works without errors

SECURITY.md ADDED Viewed

	@@ -0,0 +1,19 @@

+## Reporting Security Issues
+If you believe you have found a security vulnerability in browser-use, please report it through coordinated disclosure.
+**Please do not report security vulnerabilities through the repository issues, discussions, or pull requests.**
+Instead, please open a new [Github security advisory](https://github.com/browser-use/web-ui/security/advisories/new).
+Please include as much of the information listed below as you can to help me better understand and resolve the issue:
+* The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
+* Full paths of source file(s) related to the manifestation of the issue
+* The location of the affected source code (tag/branch/commit or direct URL)
+* Any special configuration required to reproduce the issue
+* Step-by-step instructions to reproduce the issue
+* Proof-of-concept or exploit code (if possible)
+* Impact of the issue, including how an attacker might exploit the issue
+This information will help me triage your report more quickly.

app.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+import sys
+# Add the current directory to Python path to ensure imports work
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+from dotenv import load_dotenv
+load_dotenv()
+from src.webui.interface import create_ui
+# Create the Gradio app
+demo = create_ui(theme_name="Ocean")
+# For deployment, we need to expose the app directly
+if __name__ == "__main__":
+    demo.launch()

assets/examples/test.png ADDED Viewed

Git LFS Details

SHA256: 23e4fe8c9836cd35393315a3cca074dbd55a8645289ea337e3300269dda06900
Pointer size: 131 Bytes
Size of remote file: 423 kB

assets/web-ui.png ADDED Viewed

Git LFS Details

SHA256: ea3c23160272116985f1d24a8140f0746e92a820bbd6e4988b6aa4ec0dfbb491
Pointer size: 130 Bytes
Size of remote file: 24.5 kB

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,80 @@

+services:
+  # debug: docker compose run --rm -it browser-use-webui bash
+  browser-use-webui:
+    # image: ghcr.io/browser-use/web-ui # Using precompiled image
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
+    ports:
+      - "7788:7788"
+      - "6080:6080"
+      - "5901:5901"
+      - "9222:9222"
+    environment:
+      # LLM API Keys & Endpoints
+      - OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
+      - ANTHROPIC_ENDPOINT=${ANTHROPIC_ENDPOINT:-https://api.anthropic.com}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
+      - AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
+      - AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
+      - AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION:-2025-01-01-preview}
+      - DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
+      - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
+      - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://localhost:11434}
+      - MISTRAL_ENDPOINT=${MISTRAL_ENDPOINT:-https://api.mistral.ai/v1}
+      - MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
+      - ALIBABA_ENDPOINT=${ALIBABA_ENDPOINT:-https://dashscope.aliyuncs.com/compatible-mode/v1}
+      - ALIBABA_API_KEY=${ALIBABA_API_KEY:-}
+      - MOONSHOT_ENDPOINT=${MOONSHOT_ENDPOINT:-https://api.moonshot.cn/v1}
+      - MOONSHOT_API_KEY=${MOONSHOT_API_KEY:-}
+      - UNBOUND_ENDPOINT=${UNBOUND_ENDPOINT:-https://api.getunbound.ai}
+      - UNBOUND_API_KEY=${UNBOUND_API_KEY:-}
+      - SiliconFLOW_ENDPOINT=${SiliconFLOW_ENDPOINT:-https://api.siliconflow.cn/v1/}
+      - SiliconFLOW_API_KEY=${SiliconFLOW_API_KEY:-}
+      - IBM_ENDPOINT=${IBM_ENDPOINT:-https://us-south.ml.cloud.ibm.com}
+      - IBM_API_KEY=${IBM_API_KEY:-}
+      - IBM_PROJECT_ID=${IBM_PROJECT_ID:-}
+      # Application Settings
+      - ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
+      - BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
+      # Browser Settings
+      - BROWSER_PATH=
+      - BROWSER_USER_DATA=
+      - BROWSER_DEBUGGING_PORT=${BROWSER_DEBUGGING_PORT:-9222}
+      - BROWSER_DEBUGGING_HOST=localhost
+      - USE_OWN_BROWSER=false
+      - KEEP_BROWSER_OPEN=true
+      - BROWSER_CDP=${BROWSER_CDP:-} # e.g., http://localhost:9222
+      # Display Settings
+      - DISPLAY=:99
+      # This ENV is used by the Dockerfile during build time if playwright respects it.
+      # It's not strictly needed at runtime by docker-compose unless your app or scripts also read it.
+      - PLAYWRIGHT_BROWSERS_PATH=/ms-browsers # Matches Dockerfile ENV
+      - RESOLUTION=${RESOLUTION:-1920x1080x24}
+      - RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
+      - RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
+      # VNC Settings
+      - VNC_PASSWORD=${VNC_PASSWORD:-youvncpassword}
+    volumes:
+      - /tmp/.X11-unix:/tmp/.X11-unix
+      # - ./my_chrome_data:/app/data/chrome_data # Optional: persist browser data
+    restart: unless-stopped
+    shm_size: '2gb'
+    cap_add:
+      - SYS_ADMIN
+    tmpfs:
+      - /tmp
+    healthcheck:
+      test: ["CMD", "nc", "-z", "localhost", "5901"] # VNC port
+      interval: 10s
+      timeout: 5s
+      retries: 3

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+browser-use==0.1.48
+pyperclip==1.9.0
+gradio==5.27.0
+json-repair
+langchain-mistralai==0.2.4
+MainContentExtractor==0.0.4
+langchain-ibm==0.3.10
+langchain_mcp_adapters==0.0.9
+langgraph==0.3.34
+langchain-community

src/__init__.py ADDED Viewed

File without changes

src/agent/__init__.py ADDED Viewed

File without changes

src/agent/browser_use/browser_use_agent.py ADDED Viewed

	@@ -0,0 +1,169 @@

+from __future__ import annotations
+import asyncio
+import logging
+import os
+# from lmnr.sdk.decorators import observe
+from browser_use.agent.gif import create_history_gif
+from browser_use.agent.service import Agent, AgentHookFunc
+from browser_use.agent.views import (
+    ActionResult,
+    AgentHistory,
+    AgentHistoryList,
+    AgentStepInfo,
+    ToolCallingMethod,
+)
+from browser_use.browser.views import BrowserStateHistory
+from browser_use.utils import time_execution_async
+from dotenv import load_dotenv
+from browser_use.agent.message_manager.utils import is_model_without_tool_support
+load_dotenv()
+logger = logging.getLogger(__name__)
+SKIP_LLM_API_KEY_VERIFICATION = (
+        os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
+)
+class BrowserUseAgent(Agent):
+    def _set_tool_calling_method(self) -> ToolCallingMethod | None:
+        tool_calling_method = self.settings.tool_calling_method
+        if tool_calling_method == 'auto':
+            if is_model_without_tool_support(self.model_name):
+                return 'raw'
+            elif self.chat_model_library == 'ChatGoogleGenerativeAI':
+                return None
+            elif self.chat_model_library == 'ChatOpenAI':
+                return 'function_calling'
+            elif self.chat_model_library == 'AzureChatOpenAI':
+                return 'function_calling'
+            else:
+                return None
+        else:
+            return tool_calling_method
+    @time_execution_async("--run (agent)")
+    async def run(
+            self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
+            on_step_end: AgentHookFunc | None = None
+    ) -> AgentHistoryList:
+        """Execute the task with maximum number of steps"""
+        loop = asyncio.get_event_loop()
+        # Set up the Ctrl+C signal handler with callbacks specific to this agent
+        from browser_use.utils import SignalHandler
+        signal_handler = SignalHandler(
+            loop=loop,
+            pause_callback=self.pause,
+            resume_callback=self.resume,
+            custom_exit_callback=None,  # No special cleanup needed on forced exit
+            exit_on_second_int=True,
+        )
+        signal_handler.register()
+        try:
+            self._log_agent_run()
+            # Execute initial actions if provided
+            if self.initial_actions:
+                result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
+                self.state.last_result = result
+            for step in range(max_steps):
+                # Check if waiting for user input after Ctrl+C
+                if self.state.paused:
+                    signal_handler.wait_for_resume()
+                    signal_handler.reset()
+                # Check if we should stop due to too many failures
+                if self.state.consecutive_failures >= self.settings.max_failures:
+                    logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
+                    break
+                # Check control flags before each step
+                if self.state.stopped:
+                    logger.info('Agent stopped')
+                    break
+                while self.state.paused:
+                    await asyncio.sleep(0.2)  # Small delay to prevent CPU spinning
+                    if self.state.stopped:  # Allow stopping while paused
+                        break
+                if on_step_start is not None:
+                    await on_step_start(self)
+                step_info = AgentStepInfo(step_number=step, max_steps=max_steps)
+                await self.step(step_info)
+                if on_step_end is not None:
+                    await on_step_end(self)
+                if self.state.history.is_done():
+                    if self.settings.validate_output and step < max_steps - 1:
+                        if not await self._validate_output():
+                            continue
+                    await self.log_completion()
+                    break
+            else:
+                error_message = 'Failed to complete task in maximum steps'
+                self.state.history.history.append(
+                    AgentHistory(
+                        model_output=None,
+                        result=[ActionResult(error=error_message, include_in_memory=True)],
+                        state=BrowserStateHistory(
+                            url='',
+                            title='',
+                            tabs=[],
+                            interacted_element=[],
+                            screenshot=None,
+                        ),
+                        metadata=None,
+                    )
+                )
+                logger.info(f'❌ {error_message}')
+            return self.state.history
+        except KeyboardInterrupt:
+            # Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
+            logger.info('Got KeyboardInterrupt during execution, returning current history')
+            return self.state.history
+        finally:
+            # Unregister signal handlers before cleanup
+            signal_handler.unregister()
+            if self.settings.save_playwright_script_path:
+                logger.info(
+                    f'Agent run finished. Attempting to save Playwright script to: {self.settings.save_playwright_script_path}'
+                )
+                try:
+                    # Extract sensitive data keys if sensitive_data is provided
+                    keys = list(self.sensitive_data.keys()) if self.sensitive_data else None
+                    # Pass browser and context config to the saving method
+                    self.state.history.save_as_playwright_script(
+                        self.settings.save_playwright_script_path,
+                        sensitive_data_keys=keys,
+                        browser_config=self.browser.config,
+                        context_config=self.browser_context.config,
+                    )
+                except Exception as script_gen_err:
+                    # Log any error during script generation/saving
+                    logger.error(f'Failed to save Playwright script: {script_gen_err}', exc_info=True)
+            await self.close()
+            if self.settings.generate_gif:
+                output_path: str = 'agent_history.gif'
+                if isinstance(self.settings.generate_gif, str):
+                    output_path = self.settings.generate_gif
+                create_history_gif(task=self.task, history=self.state.history, output_path=output_path)

src/agent/deep_research/deep_research_agent.py ADDED Viewed

	@@ -0,0 +1,1261 @@

+import asyncio
+import json
+import logging
+import os
+import threading
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Optional, TypedDict
+from browser_use.browser.browser import BrowserConfig
+from langchain_community.tools.file_management import (
+    ListDirectoryTool,
+    ReadFileTool,
+    WriteFileTool,
+)
+# Langchain imports
+from langchain_core.messages import (
+    AIMessage,
+    BaseMessage,
+    HumanMessage,
+    SystemMessage,
+    ToolMessage,
+)
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.tools import StructuredTool, Tool
+# Langgraph imports
+from langgraph.graph import StateGraph
+from pydantic import BaseModel, Field
+from browser_use.browser.context import BrowserContextConfig
+from src.agent.browser_use.browser_use_agent import BrowserUseAgent
+from src.browser.custom_browser import CustomBrowser
+from src.controller.custom_controller import CustomController
+from src.utils.mcp_client import setup_mcp_client_and_tools
+logger = logging.getLogger(__name__)
+# Constants
+REPORT_FILENAME = "report.md"
+PLAN_FILENAME = "research_plan.md"
+SEARCH_INFO_FILENAME = "search_info.json"
+_AGENT_STOP_FLAGS = {}
+_BROWSER_AGENT_INSTANCES = {}
+async def run_single_browser_task(
+        task_query: str,
+        task_id: str,
+        llm: Any,  # Pass the main LLM
+        browser_config: Dict[str, Any],
+        stop_event: threading.Event,
+        use_vision: bool = False,
+) -> Dict[str, Any]:
+    """
+    Runs a single BrowserUseAgent task.
+    Manages browser creation and closing for this specific task.
+    """
+    if not BrowserUseAgent:
+        return {
+            "query": task_query,
+            "error": "BrowserUseAgent components not available.",
+        }
+    # --- Browser Setup ---
+    # These should ideally come from the main agent's config
+    headless = browser_config.get("headless", False)
+    window_w = browser_config.get("window_width", 1280)
+    window_h = browser_config.get("window_height", 1100)
+    browser_user_data_dir = browser_config.get("user_data_dir", None)
+    use_own_browser = browser_config.get("use_own_browser", False)
+    browser_binary_path = browser_config.get("browser_binary_path", None)
+    wss_url = browser_config.get("wss_url", None)
+    cdp_url = browser_config.get("cdp_url", None)
+    disable_security = browser_config.get("disable_security", False)
+    bu_browser = None
+    bu_browser_context = None
+    try:
+        logger.info(f"Starting browser task for query: {task_query}")
+        extra_args = []
+        if use_own_browser:
+            browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
+            if browser_binary_path == "":
+                browser_binary_path = None
+            browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
+            if browser_user_data:
+                extra_args += [f"--user-data-dir={browser_user_data}"]
+        else:
+            browser_binary_path = None
+        bu_browser = CustomBrowser(
+            config=BrowserConfig(
+                headless=headless,
+                browser_binary_path=browser_binary_path,
+                extra_browser_args=extra_args,
+                wss_url=wss_url,
+                cdp_url=cdp_url,
+                new_context_config=BrowserContextConfig(
+                    window_width=window_w,
+                    window_height=window_h,
+                )
+            )
+        )
+        context_config = BrowserContextConfig(
+            save_downloads_path="./tmp/downloads",
+            window_height=window_h,
+            window_width=window_w,
+            force_new_context=True,
+        )
+        bu_browser_context = await bu_browser.new_context(config=context_config)
+        # Simple controller example, replace with your actual implementation if needed
+        bu_controller = CustomController()
+        # Construct the task prompt for BrowserUseAgent
+        # Instruct it to find specific info and return title/URL
+        bu_task_prompt = f"""
+        Research Task: {task_query}
+        Objective: Find relevant information answering the query.
+        Output Requirements: For each relevant piece of information found, please provide:
+        1. A concise summary of the information.
+        2. The title of the source page or document.
+        3. The URL of the source.
+        Focus on accuracy and relevance. Avoid irrelevant details.
+        PDF cannot directly extract _content, please try to download first, then using read_file, if you can't save or read, please try other methods.
+        """
+        bu_agent_instance = BrowserUseAgent(
+            task=bu_task_prompt,
+            llm=llm,  # Use the passed LLM
+            browser=bu_browser,
+            browser_context=bu_browser_context,
+            controller=bu_controller,
+            use_vision=use_vision,
+            source="webui",
+        )
+        # Store instance for potential stop() call
+        task_key = f"{task_id}_{uuid.uuid4()}"
+        _BROWSER_AGENT_INSTANCES[task_key] = bu_agent_instance
+        # --- Run with Stop Check ---
+        # BrowserUseAgent needs to internally check a stop signal or have a stop method.
+        # We simulate checking before starting and assume `run` might be interruptible
+        # or have its own stop mechanism we can trigger via bu_agent_instance.stop().
+        if stop_event.is_set():
+            logger.info(f"Browser task for '{task_query}' cancelled before start.")
+            return {"query": task_query, "result": None, "status": "cancelled"}
+        # The run needs to be awaitable and ideally accept a stop signal or have a .stop() method
+        # result = await bu_agent_instance.run(max_steps=max_steps) # Add max_steps if applicable
+        # Let's assume a simplified run for now
+        logger.info(f"Running BrowserUseAgent for: {task_query}")
+        result = await bu_agent_instance.run()  # Assuming run is the main method
+        logger.info(f"BrowserUseAgent finished for: {task_query}")
+        final_data = result.final_result()
+        if stop_event.is_set():
+            logger.info(f"Browser task for '{task_query}' stopped during execution.")
+            return {"query": task_query, "result": final_data, "status": "stopped"}
+        else:
+            logger.info(f"Browser result for '{task_query}': {final_data}")
+            return {"query": task_query, "result": final_data, "status": "completed"}
+    except Exception as e:
+        logger.error(
+            f"Error during browser task for query '{task_query}': {e}", exc_info=True
+        )
+        return {"query": task_query, "error": str(e), "status": "failed"}
+    finally:
+        if bu_browser_context:
+            try:
+                await bu_browser_context.close()
+                bu_browser_context = None
+                logger.info("Closed browser context.")
+            except Exception as e:
+                logger.error(f"Error closing browser context: {e}")
+        if bu_browser:
+            try:
+                await bu_browser.close()
+                bu_browser = None
+                logger.info("Closed browser.")
+            except Exception as e:
+                logger.error(f"Error closing browser: {e}")
+        if task_key in _BROWSER_AGENT_INSTANCES:
+            del _BROWSER_AGENT_INSTANCES[task_key]
+class BrowserSearchInput(BaseModel):
+    queries: List[str] = Field(
+        description="List of distinct search queries to find information relevant to the research task."
+    )
+async def _run_browser_search_tool(
+        queries: List[str],
+        task_id: str,  # Injected dependency
+        llm: Any,  # Injected dependency
+        browser_config: Dict[str, Any],
+        stop_event: threading.Event,
+        max_parallel_browsers: int = 1,
+) -> List[Dict[str, Any]]:
+    """
+    Internal function to execute parallel browser searches based on LLM-provided queries.
+    Handles concurrency and stop signals.
+    """
+    # Limit queries just in case LLM ignores the description
+    queries = queries[:max_parallel_browsers]
+    logger.info(
+        f"[Browser Tool {task_id}] Running search for {len(queries)} queries: {queries}"
+    )
+    results = []
+    semaphore = asyncio.Semaphore(max_parallel_browsers)
+    async def task_wrapper(query):
+        async with semaphore:
+            if stop_event.is_set():
+                logger.info(
+                    f"[Browser Tool {task_id}] Skipping task due to stop signal: {query}"
+                )
+                return {"query": query, "result": None, "status": "cancelled"}
+            # Pass necessary injected configs and the stop event
+            return await run_single_browser_task(
+                query,
+                task_id,
+                llm,  # Pass the main LLM (or a dedicated one if needed)
+                browser_config,
+                stop_event,
+                # use_vision could be added here if needed
+            )
+    tasks = [task_wrapper(query) for query in queries]
+    search_results = await asyncio.gather(*tasks, return_exceptions=True)
+    processed_results = []
+    for i, res in enumerate(search_results):
+        query = queries[i]  # Get corresponding query
+        if isinstance(res, Exception):
+            logger.error(
+                f"[Browser Tool {task_id}] Gather caught exception for query '{query}': {res}",
+                exc_info=True,
+            )
+            processed_results.append(
+                {"query": query, "error": str(res), "status": "failed"}
+            )
+        elif isinstance(res, dict):
+            processed_results.append(res)
+        else:
+            logger.error(
+                f"[Browser Tool {task_id}] Unexpected result type for query '{query}': {type(res)}"
+            )
+            processed_results.append(
+                {"query": query, "error": "Unexpected result type", "status": "failed"}
+            )
+    logger.info(
+        f"[Browser Tool {task_id}] Finished search. Results count: {len(processed_results)}"
+    )
+    return processed_results
+def create_browser_search_tool(
+        llm: Any,
+        browser_config: Dict[str, Any],
+        task_id: str,
+        stop_event: threading.Event,
+        max_parallel_browsers: int = 1,
+) -> StructuredTool:
+    """Factory function to create the browser search tool with necessary dependencies."""
+    # Use partial to bind the dependencies that aren't part of the LLM call arguments
+    from functools import partial
+    bound_tool_func = partial(
+        _run_browser_search_tool,
+        task_id=task_id,
+        llm=llm,
+        browser_config=browser_config,
+        stop_event=stop_event,
+        max_parallel_browsers=max_parallel_browsers,
+    )
+    return StructuredTool.from_function(
+        coroutine=bound_tool_func,
+        name="parallel_browser_search",
+        description=f"""Use this tool to actively search the web for information related to a specific research task or question.
+It runs up to {max_parallel_browsers} searches in parallel using a browser agent for better results than simple scraping.
+Provide a list of distinct search queries(up to {max_parallel_browsers}) that are likely to yield relevant information.""",
+        args_schema=BrowserSearchInput,
+    )
+# --- Langgraph State Definition ---
+class ResearchTaskItem(TypedDict):
+    # step: int # Maybe step within category, or just implicit by order
+    task_description: str
+    status: str  # "pending", "completed", "failed"
+    queries: Optional[List[str]]
+    result_summary: Optional[str]
+class ResearchCategoryItem(TypedDict):
+    category_name: str
+    tasks: List[ResearchTaskItem]
+    # Optional: category_status: str # Could be "pending", "in_progress", "completed"
+class DeepResearchState(TypedDict):
+    task_id: str
+    topic: str
+    research_plan: List[ResearchCategoryItem]  # CHANGED
+    search_results: List[Dict[str, Any]]
+    llm: Any
+    tools: List[Tool]
+    output_dir: Path
+    browser_config: Dict[str, Any]
+    final_report: Optional[str]
+    current_category_index: int
+    current_task_index_in_category: int
+    stop_requested: bool
+    error_message: Optional[str]
+    messages: List[BaseMessage]
+# --- Langgraph Nodes ---
+def _load_previous_state(task_id: str, output_dir: str) -> Dict[str, Any]:
+    state_updates = {}
+    plan_file = os.path.join(output_dir, PLAN_FILENAME)
+    search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
+    loaded_plan: List[ResearchCategoryItem] = []
+    next_cat_idx, next_task_idx = 0, 0
+    found_pending = False
+    if os.path.exists(plan_file):
+        try:
+            with open(plan_file, "r", encoding="utf-8") as f:
+                current_category: Optional[ResearchCategoryItem] = None
+                lines = f.readlines()
+                cat_counter = 0
+                task_counter_in_cat = 0
+                for line_num, line_content in enumerate(lines):
+                    line = line_content.strip()
+                    if line.startswith("## "):  # Category
+                        if current_category:  # Save previous category
+                            loaded_plan.append(current_category)
+                            if not found_pending:  # If previous category was all done, advance cat counter
+                                cat_counter += 1
+                                task_counter_in_cat = 0
+                        category_name = line[line.find(" "):].strip()  # Get text after "## X. "
+                        current_category = ResearchCategoryItem(category_name=category_name, tasks=[])
+                    elif (line.startswith("- [ ]") or line.startswith("- [x]") or line.startswith(
+                            "- [-]")) and current_category:  # Task
+                        status = "pending"
+                        if line.startswith("- [x]"):
+                            status = "completed"
+                        elif line.startswith("- [-]"):
+                            status = "failed"
+                        task_desc = line[5:].strip()
+                        current_category["tasks"].append(
+                            ResearchTaskItem(task_description=task_desc, status=status, queries=None,
+                                             result_summary=None)
+                        )
+                        if status == "pending" and not found_pending:
+                            next_cat_idx = cat_counter
+                            next_task_idx = task_counter_in_cat
+                            found_pending = True
+                        if not found_pending:  # only increment if previous tasks were completed/failed
+                            task_counter_in_cat += 1
+                if current_category:  # Append last category
+                    loaded_plan.append(current_category)
+            if loaded_plan:
+                state_updates["research_plan"] = loaded_plan
+                if not found_pending and loaded_plan:  # All tasks were completed or failed
+                    next_cat_idx = len(loaded_plan)  # Points beyond the last category
+                    next_task_idx = 0
+                state_updates["current_category_index"] = next_cat_idx
+                state_updates["current_task_index_in_category"] = next_task_idx
+                logger.info(
+                    f"Loaded hierarchical research plan from {plan_file}. "
+                    f"Next task: Category {next_cat_idx}, Task {next_task_idx} in category."
+                )
+            else:
+                logger.warning(f"Plan file {plan_file} was empty or malformed.")
+        except Exception as e:
+            logger.error(f"Failed to load or parse research plan {plan_file}: {e}", exc_info=True)
+            state_updates["error_message"] = f"Failed to load research plan: {e}"
+    else:
+        logger.info(f"Plan file {plan_file} not found. Will start fresh.")
+    if os.path.exists(search_file):
+        try:
+            with open(search_file, "r", encoding="utf-8") as f:
+                state_updates["search_results"] = json.load(f)
+                logger.info(f"Loaded search results from {search_file}")
+        except Exception as e:
+            logger.error(f"Failed to load search results {search_file}: {e}")
+            state_updates["error_message"] = (
+                    state_updates.get("error_message", "") + f" Failed to load search results: {e}").strip()
+    return state_updates
+def _save_plan_to_md(plan: List[ResearchCategoryItem], output_dir: str):
+    plan_file = os.path.join(output_dir, PLAN_FILENAME)
+    try:
+        with open(plan_file, "w", encoding="utf-8") as f:
+            f.write(f"# Research Plan\n\n")
+            for cat_idx, category in enumerate(plan):
+                f.write(f"## {cat_idx + 1}. {category['category_name']}\n\n")
+                for task_idx, task in enumerate(category['tasks']):
+                    marker = "- [x]" if task["status"] == "completed" else "- [ ]" if task[
+                                                                                          "status"] == "pending" else "- [-]"  # [-] for failed
+                    f.write(f"  {marker} {task['task_description']}\n")
+                f.write("\n")
+        logger.info(f"Hierarchical research plan saved to {plan_file}")
+    except Exception as e:
+        logger.error(f"Failed to save research plan to {plan_file}: {e}")
+def _save_search_results_to_json(results: List[Dict[str, Any]], output_dir: str):
+    """Appends or overwrites search results to a JSON file."""
+    search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
+    try:
+        # Simple overwrite for now, could be append
+        with open(search_file, "w", encoding="utf-8") as f:
+            json.dump(results, f, indent=2, ensure_ascii=False)
+        logger.info(f"Search results saved to {search_file}")
+    except Exception as e:
+        logger.error(f"Failed to save search results to {search_file}: {e}")
+def _save_report_to_md(report: str, output_dir: Path):
+    """Saves the final report to a markdown file."""
+    report_file = os.path.join(output_dir, REPORT_FILENAME)
+    try:
+        with open(report_file, "w", encoding="utf-8") as f:
+            f.write(report)
+        logger.info(f"Final report saved to {report_file}")
+    except Exception as e:
+        logger.error(f"Failed to save final report to {report_file}: {e}")
+async def planning_node(state: DeepResearchState) -> Dict[str, Any]:
+    logger.info("--- Entering Planning Node ---")
+    if state.get("stop_requested"):
+        logger.info("Stop requested, skipping planning.")
+        return {"stop_requested": True}
+    llm = state["llm"]
+    topic = state["topic"]
+    existing_plan = state.get("research_plan")
+    output_dir = state["output_dir"]
+    if existing_plan and (
+            state.get("current_category_index", 0) > 0 or state.get("current_task_index_in_category", 0) > 0):
+        logger.info("Resuming with existing plan.")
+        _save_plan_to_md(existing_plan, output_dir)  # Ensure it's saved initially
+        # current_category_index and current_task_index_in_category should be set by _load_previous_state
+        return {"research_plan": existing_plan}
+    logger.info(f"Generating new research plan for topic: {topic}")
+    prompt_text = f"""You are a meticulous research assistant. Your goal is to create a hierarchical research plan to thoroughly investigate the topic: "{topic}".
+The plan should be structured into several main research categories. Each category should contain a list of specific, actionable research tasks or questions.
+Format the output as a JSON list of objects. Each object represents a research category and should have:
+1. "category_name": A string for the name of the research category.
+2. "tasks": A list of strings, where each string is a specific research task for that category.
+Example JSON Output:
+[
+  {{
+    "category_name": "Understanding Core Concepts and Definitions",
+    "tasks": [
+      "Define the primary terminology associated with '{topic}'.",
+      "Identify the fundamental principles and theories underpinning '{topic}'."
+    ]
+  }},
+  {{
+    "category_name": "Historical Development and Key Milestones",
+    "tasks": [
+      "Trace the historical evolution of '{topic}'.",
+      "Identify key figures, events, or breakthroughs in the development of '{topic}'."
+    ]
+  }},
+  {{
+    "category_name": "Current State-of-the-Art and Applications",
+    "tasks": [
+      "Analyze the current advancements and prominent applications of '{topic}'.",
+      "Investigate ongoing research and active areas of development related to '{topic}'."
+    ]
+  }},
+  {{
+    "category_name": "Challenges, Limitations, and Future Outlook",
+    "tasks": [
+      "Identify the major challenges and limitations currently facing '{topic}'.",
+      "Explore potential future trends, ethical considerations, and societal impacts of '{topic}'."
+    ]
+  }}
+]
+Generate a plan with 3-10 categories, and 2-6 tasks per category for the topic: "{topic}" according to the complexity of the topic.
+Ensure the output is a valid JSON array.
+"""
+    messages = [
+        SystemMessage(content="You are a research planning assistant outputting JSON."),
+        HumanMessage(content=prompt_text)
+    ]
+    try:
+        response = await llm.ainvoke(messages)
+        raw_content = response.content
+        # The LLM might wrap the JSON in backticks
+        if raw_content.strip().startswith("```json"):
+            raw_content = raw_content.strip()[7:-3].strip()
+        elif raw_content.strip().startswith("```"):
+            raw_content = raw_content.strip()[3:-3].strip()
+        logger.debug(f"LLM response for plan: {raw_content}")
+        parsed_plan_from_llm = json.loads(raw_content)
+        new_plan: List[ResearchCategoryItem] = []
+        for cat_idx, category_data in enumerate(parsed_plan_from_llm):
+            if not isinstance(category_data,
+                              dict) or "category_name" not in category_data or "tasks" not in category_data:
+                logger.warning(f"Skipping invalid category data: {category_data}")
+                continue
+            tasks: List[ResearchTaskItem] = []
+            for task_idx, task_desc in enumerate(category_data["tasks"]):
+                if isinstance(task_desc, str):
+                    tasks.append(
+                        ResearchTaskItem(
+                            task_description=task_desc,
+                            status="pending",
+                            queries=None,
+                            result_summary=None,
+                        )
+                    )
+                else:  # Sometimes LLM puts tasks as {"task": "description"}
+                    if isinstance(task_desc, dict) and "task_description" in task_desc:
+                        tasks.append(
+                            ResearchTaskItem(
+                                task_description=task_desc["task_description"],
+                                status="pending",
+                                queries=None,
+                                result_summary=None,
+                            )
+                        )
+                    elif isinstance(task_desc, dict) and "task" in task_desc:  # common LLM mistake
+                        tasks.append(
+                            ResearchTaskItem(
+                                task_description=task_desc["task"],
+                                status="pending",
+                                queries=None,
+                                result_summary=None,
+                            )
+                        )
+                    else:
+                        logger.warning(
+                            f"Skipping invalid task data: {task_desc} in category {category_data['category_name']}")
+            new_plan.append(
+                ResearchCategoryItem(
+                    category_name=category_data["category_name"],
+                    tasks=tasks,
+                )
+            )
+        if not new_plan:
+            logger.error("LLM failed to generate a valid plan structure from JSON.")
+            return {"error_message": "Failed to generate research plan structure."}
+        logger.info(f"Generated research plan with {len(new_plan)} categories.")
+        _save_plan_to_md(new_plan, output_dir)  # Save the hierarchical plan
+        return {
+            "research_plan": new_plan,
+            "current_category_index": 0,
+            "current_task_index_in_category": 0,
+            "search_results": [],
+        }
+    except json.JSONDecodeError as e:
+        logger.error(f"Failed to parse JSON from LLM for plan: {e}. Response was: {raw_content}", exc_info=True)
+        return {"error_message": f"LLM generated invalid JSON for research plan: {e}"}
+    except Exception as e:
+        logger.error(f"Error during planning: {e}", exc_info=True)
+        return {"error_message": f"LLM Error during planning: {e}"}
+async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
+    logger.info("--- Entering Research Execution Node ---")
+    if state.get("stop_requested"):
+        logger.info("Stop requested, skipping research execution.")
+        return {
+            "stop_requested": True,
+            "current_category_index": state["current_category_index"],
+            "current_task_index_in_category": state["current_task_index_in_category"],
+        }
+    plan = state["research_plan"]
+    cat_idx = state["current_category_index"]
+    task_idx = state["current_task_index_in_category"]
+    llm = state["llm"]
+    tools = state["tools"]
+    output_dir = str(state["output_dir"])
+    task_id = state["task_id"]  # For _AGENT_STOP_FLAGS
+    # This check should ideally be handled by `should_continue`
+    if not plan or cat_idx >= len(plan):
+        logger.info("Research plan complete or categories exhausted.")
+        return {}  # should route to synthesis
+    current_category = plan[cat_idx]
+    if task_idx >= len(current_category["tasks"]):
+        logger.info(f"All tasks in category '{current_category['category_name']}' completed. Moving to next category.")
+        # This logic is now effectively handled by should_continue and the index updates below
+        # The next iteration will be caught by should_continue or this node with updated indices
+        return {
+            "current_category_index": cat_idx + 1,
+            "current_task_index_in_category": 0,
+            "messages": state["messages"]  # Pass messages along
+        }
+    current_task = current_category["tasks"][task_idx]
+    if current_task["status"] == "completed":
+        logger.info(
+            f"Task '{current_task['task_description']}' in category '{current_category['category_name']}' already completed. Skipping.")
+        # Logic to find next task
+        next_task_idx = task_idx + 1
+        next_cat_idx = cat_idx
+        if next_task_idx >= len(current_category["tasks"]):
+            next_cat_idx += 1
+            next_task_idx = 0
+        return {
+            "current_category_index": next_cat_idx,
+            "current_task_index_in_category": next_task_idx,
+            "messages": state["messages"]  # Pass messages along
+        }
+    logger.info(
+        f"Executing research task: '{current_task['task_description']}' (Category: '{current_category['category_name']}')"
+    )
+    llm_with_tools = llm.bind_tools(tools)
+    # Construct messages for LLM invocation
+    task_prompt_content = (
+        f"Current Research Category: {current_category['category_name']}\n"
+        f"Specific Task: {current_task['task_description']}\n\n"
+        "Please use the available tools, especially 'parallel_browser_search', to gather information for this specific task. "
+        "Provide focused search queries relevant ONLY to this task. "
+        "If you believe you have sufficient information from previous steps for this specific task, you can indicate that you are ready to summarize or that no further search is needed."
+    )
+    current_task_message_history = [
+        HumanMessage(content=task_prompt_content)
+    ]
+    if not state["messages"]:  # First actual execution message
+        invocation_messages = [
+                                  SystemMessage(
+                                      content="You are a research assistant executing one task of a research plan. Focus on the current task only."),
+                              ] + current_task_message_history
+    else:
+        invocation_messages = state["messages"] + current_task_message_history
+    try:
+        logger.info(f"Invoking LLM with tools for task: {current_task['task_description']}")
+        ai_response: BaseMessage = await llm_with_tools.ainvoke(invocation_messages)
+        logger.info("LLM invocation complete.")
+        tool_results = []
+        executed_tool_names = []
+        current_search_results = state.get("search_results", [])  # Get existing search results
+        if not isinstance(ai_response, AIMessage) or not ai_response.tool_calls:
+            logger.warning(
+                f"LLM did not call any tool for task '{current_task['task_description']}'. Response: {ai_response.content[:100]}..."
+            )
+            current_task["status"] = "pending"  # Or "completed_no_tool" if LLM explains it's done
+            current_task["result_summary"] = f"LLM did not use a tool. Response: {ai_response.content}"
+            current_task["current_category_index"] = cat_idx
+            current_task["current_task_index_in_category"] = task_idx
+            return current_task
+            # We still save the plan and advance.
+        else:
+            # Process tool calls
+            for tool_call in ai_response.tool_calls:
+                tool_name = tool_call.get("name")
+                tool_args = tool_call.get("args", {})
+                tool_call_id = tool_call.get("id")
+                logger.info(f"LLM requested tool call: {tool_name} with args: {tool_args}")
+                executed_tool_names.append(tool_name)
+                selected_tool = next((t for t in tools if t.name == tool_name), None)
+                if not selected_tool:
+                    logger.error(f"LLM called tool '{tool_name}' which is not available.")
+                    tool_results.append(
+                        ToolMessage(content=f"Error: Tool '{tool_name}' not found.", tool_call_id=tool_call_id))
+                    continue
+                try:
+                    stop_event = _AGENT_STOP_FLAGS.get(task_id)
+                    if stop_event and stop_event.is_set():
+                        logger.info(f"Stop requested before executing tool: {tool_name}")
+                        current_task["status"] = "pending"  # Or a new "stopped" status
+                        _save_plan_to_md(plan, output_dir)
+                        return {"stop_requested": True, "research_plan": plan, "current_category_index": cat_idx,
+                                "current_task_index_in_category": task_idx}
+                    logger.info(f"Executing tool: {tool_name}")
+                    tool_output = await selected_tool.ainvoke(tool_args)
+                    logger.info(f"Tool '{tool_name}' executed successfully.")
+                    if tool_name == "parallel_browser_search":
+                        current_search_results.extend(tool_output)  # tool_output is List[Dict]
+                    else:  # For other tools, we might need specific handling or just log
+                        logger.info(f"Result from tool '{tool_name}': {str(tool_output)[:200]}...")
+                        # Storing non-browser results might need a different structure or key in search_results
+                        current_search_results.append(
+                            {"tool_name": tool_name, "args": tool_args, "output": str(tool_output),
+                             "status": "completed"})
+                    tool_results.append(ToolMessage(content=json.dumps(tool_output), tool_call_id=tool_call_id))
+                except Exception as e:
+                    logger.error(f"Error executing tool '{tool_name}': {e}", exc_info=True)
+                    tool_results.append(
+                        ToolMessage(content=f"Error executing tool {tool_name}: {e}", tool_call_id=tool_call_id))
+                    current_search_results.append(
+                        {"tool_name": tool_name, "args": tool_args, "status": "failed", "error": str(e)})
+            # After processing all tool calls for this task
+            step_failed_tool_execution = any("Error:" in str(tr.content) for tr in tool_results)
+            # Consider a task successful if a browser search was attempted and didn't immediately error out during call
+            # The browser search itself returns status for each query.
+            browser_tool_attempted_successfully = "parallel_browser_search" in executed_tool_names and not step_failed_tool_execution
+            if step_failed_tool_execution:
+                current_task["status"] = "failed"
+                current_task[
+                    "result_summary"] = f"Tool execution failed. Errors: {[tr.content for tr in tool_results if 'Error' in str(tr.content)]}"
+            elif executed_tool_names:  # If any tool was called
+                current_task["status"] = "completed"
+                current_task["result_summary"] = f"Executed tool(s): {', '.join(executed_tool_names)}."
+                # TODO: Could ask LLM to summarize the tool_results for this task if needed, rather than just listing tools.
+            else:  # No tool calls but AI response had .tool_calls structure (empty)
+                current_task["status"] = "failed"  # Or a more specific status
+                current_task["result_summary"] = "LLM prepared for tool call but provided no tools."
+        # Save progress
+        _save_plan_to_md(plan, output_dir)
+        _save_search_results_to_json(current_search_results, output_dir)
+        # Determine next indices
+        next_task_idx = task_idx + 1
+        next_cat_idx = cat_idx
+        if next_task_idx >= len(current_category["tasks"]):
+            next_cat_idx += 1
+            next_task_idx = 0
+        updated_messages = state["messages"] + current_task_message_history + [ai_response] + tool_results
+        return {
+            "research_plan": plan,
+            "search_results": current_search_results,
+            "current_category_index": next_cat_idx,
+            "current_task_index_in_category": next_task_idx,
+            "messages": updated_messages,
+        }
+    except Exception as e:
+        logger.error(f"Unhandled error during research execution for task '{current_task['task_description']}': {e}",
+                     exc_info=True)
+        current_task["status"] = "failed"
+        _save_plan_to_md(plan, output_dir)
+        # Determine next indices even on error to attempt to move on
+        next_task_idx = task_idx + 1
+        next_cat_idx = cat_idx
+        if next_task_idx >= len(current_category["tasks"]):
+            next_cat_idx += 1
+            next_task_idx = 0
+        return {
+            "research_plan": plan,
+            "current_category_index": next_cat_idx,
+            "current_task_index_in_category": next_task_idx,
+            "error_message": f"Core Execution Error on task '{current_task['task_description']}': {e}",
+            "messages": state["messages"] + current_task_message_history  # Preserve messages up to error
+        }
+async def synthesis_node(state: DeepResearchState) -> Dict[str, Any]:
+    """Synthesizes the final report from the collected search results."""
+    logger.info("--- Entering Synthesis Node ---")
+    if state.get("stop_requested"):
+        logger.info("Stop requested, skipping synthesis.")
+        return {"stop_requested": True}
+    llm = state["llm"]
+    topic = state["topic"]
+    search_results = state.get("search_results", [])
+    output_dir = state["output_dir"]
+    plan = state["research_plan"]  # Include plan for context
+    if not search_results:
+        logger.warning("No search results found to synthesize report.")
+        report = f"# Research Report: {topic}\n\nNo information was gathered during the research process."
+        _save_report_to_md(report, output_dir)
+        return {"final_report": report}
+    logger.info(
+        f"Synthesizing report from {len(search_results)} collected search result entries."
+    )
+    # Prepare context for the LLM
+    # Format search results nicely, maybe group by query or original plan step
+    formatted_results = ""
+    references = {}
+    ref_count = 1
+    for i, result_entry in enumerate(search_results):
+        query = result_entry.get("query", "Unknown Query")  # From parallel_browser_search
+        tool_name = result_entry.get("tool_name")  # From other tools
+        status = result_entry.get("status", "unknown")
+        result_data = result_entry.get("result")  # From BrowserUseAgent's final_result
+        tool_output_str = result_entry.get("output")  # From other tools
+        if tool_name == "parallel_browser_search" and status == "completed" and result_data:
+            # result_data is the summary from BrowserUseAgent
+            formatted_results += f'### Finding from Web Search Query: "{query}"\n'
+            formatted_results += f"- **Summary:**\n{result_data}\n"  # result_data is already a summary string here
+            # If result_data contained title/URL, you'd format them here.
+            # The current BrowserUseAgent returns a string summary directly as 'final_data' in run_single_browser_task
+            formatted_results += "---\n"
+        elif tool_name != "parallel_browser_search" and status == "completed" and tool_output_str:
+            formatted_results += f'### Finding from Tool: "{tool_name}" (Args: {result_entry.get("args")})\n'
+            formatted_results += f"- **Output:**\n{tool_output_str}\n"
+            formatted_results += "---\n"
+        elif status == "failed":
+            error = result_entry.get("error")
+            q_or_t = f"Query: \"{query}\"" if query != "Unknown Query" else f"Tool: \"{tool_name}\""
+            formatted_results += f'### Failed {q_or_t}\n'
+            formatted_results += f"- **Error:** {error}\n"
+            formatted_results += "---\n"
+    # Prepare the research plan context
+    plan_summary = "\nResearch Plan Followed:\n"
+    for cat_idx, category in enumerate(plan):
+        plan_summary += f"\n#### Category {cat_idx + 1}: {category['category_name']}\n"
+        for task_idx, task in enumerate(category['tasks']):
+            marker = "[x]" if task["status"] == "completed" else "[ ]" if task["status"] == "pending" else "[-]"
+            plan_summary += f"  - {marker} {task['task_description']}\n"
+    synthesis_prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                """You are a professional researcher tasked with writing a comprehensive and well-structured report based on collected findings.
+        The report should address the research topic thoroughly, synthesizing the information gathered from various sources.
+        Structure the report logically:
+        1.  Briefly introduce the topic and the report's scope (mentioning the research plan followed, including categories and tasks, is good).
+        2.  Discuss the key findings, organizing them thematically, possibly aligning with the research categories. Analyze, compare, and contrast information.
+        3.  Summarize the main points and offer concluding thoughts.
+        Ensure the tone is objective and professional.
+        If findings are contradictory or incomplete, acknowledge this.
+        """,  # Removed citation part for simplicity for now, as browser agent returns summaries.
+            ),
+            (
+                "human",
+                f"""
+            **Research Topic:** {topic}
+            {plan_summary}
+            **Collected Findings:**
+            ```
+            {formatted_results}
+            ```
+            Please generate the final research report in Markdown format based **only** on the information above.
+            """,
+            ),
+        ]
+    )
+    try:
+        response = await llm.ainvoke(
+            synthesis_prompt.format_prompt(
+                topic=topic,
+                plan_summary=plan_summary,
+                formatted_results=formatted_results,
+            ).to_messages()
+        )
+        final_report_md = response.content
+        # Append the reference list automatically to the end of the generated markdown
+        if references:
+            report_references_section = "\n\n## References\n\n"
+            # Sort refs by ID for consistent output
+            sorted_refs = sorted(references.values(), key=lambda x: x["id"])
+            for ref in sorted_refs:
+                report_references_section += (
+                    f"[{ref['id']}] {ref['title']} - {ref['url']}\n"
+                )
+            final_report_md += report_references_section
+        logger.info("Successfully synthesized the final report.")
+        _save_report_to_md(final_report_md, output_dir)
+        return {"final_report": final_report_md}
+    except Exception as e:
+        logger.error(f"Error during report synthesis: {e}", exc_info=True)
+        return {"error_message": f"LLM Error during synthesis: {e}"}
+# --- Langgraph Edges and Conditional Logic ---
+def should_continue(state: DeepResearchState) -> str:
+    logger.info("--- Evaluating Condition: Should Continue? ---")
+    if state.get("stop_requested"):
+        logger.info("Stop requested, routing to END.")
+        return "end_run"
+    if state.get("error_message") and "Core Execution Error" in state["error_message"]:  # Critical error in node
+        logger.warning(f"Critical error detected: {state['error_message']}. Routing to END.")
+        return "end_run"
+    plan = state.get("research_plan")
+    cat_idx = state.get("current_category_index", 0)
+    task_idx = state.get("current_task_index_in_category", 0)  # This is the *next* task to check
+    if not plan:
+        logger.warning("No research plan found. Routing to END.")
+        return "end_run"
+    # Check if the current indices point to a valid pending task
+    if cat_idx < len(plan):
+        current_category = plan[cat_idx]
+        if task_idx < len(current_category["tasks"]):
+            # We are trying to execute the task at plan[cat_idx]["tasks"][task_idx]
+            # The research_execution_node will handle if it's already completed.
+            logger.info(
+                f"Plan has potential pending tasks (next up: Category {cat_idx}, Task {task_idx}). Routing to Research Execution."
+            )
+            return "execute_research"
+        else:  # task_idx is out of bounds for current category, means we need to check next category
+            if cat_idx + 1 < len(plan):  # If there is a next category
+                logger.info(
+                    f"Finished tasks in category {cat_idx}. Moving to category {cat_idx + 1}. Routing to Research Execution."
+                )
+                # research_execution_node will update state to {current_category_index: cat_idx + 1, current_task_index_in_category: 0}
+                # Or rather, the previous execution node already set these indices to the start of the next category.
+                return "execute_research"
+    # If we've gone through all categories and tasks (cat_idx >= len(plan))
+    logger.info("All plan categories and tasks processed or current indices are out of bounds. Routing to Synthesis.")
+    return "synthesize_report"
+# --- DeepSearchAgent Class ---
+class DeepResearchAgent:
+    def __init__(
+            self,
+            llm: Any,
+            browser_config: Dict[str, Any],
+            mcp_server_config: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        Initializes the DeepSearchAgent.
+        Args:
+            llm: The Langchain compatible language model instance.
+            browser_config: Configuration dictionary for the BrowserUseAgent tool.
+                            Example: {"headless": True, "window_width": 1280, ...}
+            mcp_server_config: Optional configuration for the MCP client.
+        """
+        self.llm = llm
+        self.browser_config = browser_config
+        self.mcp_server_config = mcp_server_config
+        self.mcp_client = None
+        self.stopped = False
+        self.graph = self._compile_graph()
+        self.current_task_id: Optional[str] = None
+        self.stop_event: Optional[threading.Event] = None
+        self.runner: Optional[asyncio.Task] = None  # To hold the asyncio task for run
+    async def _setup_tools(
+            self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
+    ) -> List[Tool]:
+        """Sets up the basic tools (File I/O) and optional MCP tools."""
+        tools = [
+            WriteFileTool(),
+            ReadFileTool(),
+            ListDirectoryTool(),
+        ]  # Basic file operations
+        browser_use_tool = create_browser_search_tool(
+            llm=self.llm,
+            browser_config=self.browser_config,
+            task_id=task_id,
+            stop_event=stop_event,
+            max_parallel_browsers=max_parallel_browsers,
+        )
+        tools += [browser_use_tool]
+        # Add MCP tools if config is provided
+        if self.mcp_server_config:
+            try:
+                logger.info("Setting up MCP client and tools...")
+                if not self.mcp_client:
+                    self.mcp_client = await setup_mcp_client_and_tools(
+                        self.mcp_server_config
+                    )
+                mcp_tools = self.mcp_client.get_tools()
+                logger.info(f"Loaded {len(mcp_tools)} MCP tools.")
+                tools.extend(mcp_tools)
+            except Exception as e:
+                logger.error(f"Failed to set up MCP tools: {e}", exc_info=True)
+        elif self.mcp_server_config:
+            logger.warning(
+                "MCP server config provided, but setup function unavailable."
+            )
+        tools_map = {tool.name: tool for tool in tools}
+        return tools_map.values()
+    async def close_mcp_client(self):
+        if self.mcp_client:
+            await self.mcp_client.__aexit__(None, None, None)
+            self.mcp_client = None
+    def _compile_graph(self) -> StateGraph:
+        """Compiles the Langgraph state machine."""
+        workflow = StateGraph(DeepResearchState)
+        # Add nodes
+        workflow.add_node("plan_research", planning_node)
+        workflow.add_node("execute_research", research_execution_node)
+        workflow.add_node("synthesize_report", synthesis_node)
+        workflow.add_node(
+            "end_run", lambda state: logger.info("--- Reached End Run Node ---") or {}
+        )  # Simple end node
+        # Define edges
+        workflow.set_entry_point("plan_research")
+        workflow.add_edge(
+            "plan_research", "execute_research"
+        )  # Always execute after planning
+        # Conditional edge after execution
+        workflow.add_conditional_edges(
+            "execute_research",
+            should_continue,
+            {
+                "execute_research": "execute_research",  # Loop back if more steps
+                "synthesize_report": "synthesize_report",  # Move to synthesis if done
+                "end_run": "end_run",  # End if stop requested or error
+            },
+        )
+        workflow.add_edge("synthesize_report", "end_run")  # End after synthesis
+        app = workflow.compile()
+        return app
+    async def run(
+            self,
+            topic: str,
+            task_id: Optional[str] = None,
+            save_dir: str = "./tmp/deep_research",
+            max_parallel_browsers: int = 1,
+    ) -> Dict[str, Any]:
+        """
+        Starts the deep research process (Async Generator Version).
+        Args:
+            topic: The research topic.
+            task_id: Optional existing task ID to resume. If None, a new ID is generated.
+        Yields:
+             Intermediate state updates or messages during execution.
+        """
+        if self.runner and not self.runner.done():
+            logger.warning(
+                "Agent is already running. Please stop the current task first."
+            )
+            # Return an error status instead of yielding
+            return {
+                "status": "error",
+                "message": "Agent already running.",
+                "task_id": self.current_task_id,
+            }
+        self.current_task_id = task_id if task_id else str(uuid.uuid4())
+        safe_root_dir = "./tmp/deep_research"
+        normalized_save_dir = os.path.normpath(save_dir)
+        if not normalized_save_dir.startswith(os.path.abspath(safe_root_dir)):
+            logger.warning(f"Unsafe save_dir detected: {save_dir}. Using default directory.")
+            normalized_save_dir = os.path.abspath(safe_root_dir)
+        output_dir = os.path.join(normalized_save_dir, self.current_task_id)
+        os.makedirs(output_dir, exist_ok=True)
+        logger.info(
+            f"[AsyncGen] Starting research task ID: {self.current_task_id} for topic: '{topic}'"
+        )
+        logger.info(f"[AsyncGen] Output directory: {output_dir}")
+        self.stop_event = threading.Event()
+        _AGENT_STOP_FLAGS[self.current_task_id] = self.stop_event
+        agent_tools = await self._setup_tools(
+            self.current_task_id, self.stop_event, max_parallel_browsers
+        )
+        initial_state: DeepResearchState = {
+            "task_id": self.current_task_id,
+            "topic": topic,
+            "research_plan": [],
+            "search_results": [],
+            "messages": [],
+            "llm": self.llm,
+            "tools": agent_tools,
+            "output_dir": Path(output_dir),
+            "browser_config": self.browser_config,
+            "final_report": None,
+            "current_category_index": 0,
+            "current_task_index_in_category": 0,
+            "stop_requested": False,
+            "error_message": None,
+        }
+        if task_id:
+            logger.info(f"Attempting to resume task {task_id}...")
+            loaded_state = _load_previous_state(task_id, output_dir)
+            initial_state.update(loaded_state)
+            if loaded_state.get("research_plan"):
+                logger.info(
+                    f"Resuming with {len(loaded_state['research_plan'])} plan categories "
+                    f"and {len(loaded_state.get('search_results', []))} existing results. "
+                    f"Next task: Cat {initial_state['current_category_index']}, Task {initial_state['current_task_index_in_category']}"
+                )
+                initial_state["topic"] = (
+                    topic  # Allow overriding topic even when resuming? Or use stored topic? Let's use new one.
+                )
+            else:
+                logger.warning(
+                    f"Resume requested for {task_id}, but no previous plan found. Starting fresh."
+                )
+        # --- Execute Graph using ainvoke ---
+        final_state = None
+        status = "unknown"
+        message = None
+        try:
+            logger.info(f"Invoking graph execution for task {self.current_task_id}...")
+            self.runner = asyncio.create_task(self.graph.ainvoke(initial_state))
+            final_state = await self.runner
+            logger.info(f"Graph execution finished for task {self.current_task_id}.")
+            # Determine status based on final state
+            if self.stop_event and self.stop_event.is_set():
+                status = "stopped"
+                message = "Research process was stopped by request."
+                logger.info(message)
+            elif final_state and final_state.get("error_message"):
+                status = "error"
+                message = final_state["error_message"]
+                logger.error(f"Graph execution completed with error: {message}")
+            elif final_state and final_state.get("final_report"):
+                status = "completed"
+                message = "Research process completed successfully."
+                logger.info(message)
+            else:
+                # If it ends without error/report (e.g., empty plan, stopped before synthesis)
+                status = "finished_incomplete"
+                message = "Research process finished, but may be incomplete (no final report generated)."
+                logger.warning(message)
+        except asyncio.CancelledError:
+            status = "cancelled"
+            message = f"Agent run task cancelled for {self.current_task_id}."
+            logger.info(message)
+            # final_state will remain None or the state before cancellation if checkpointing was used
+        except Exception as e:
+            status = "error"
+            message = f"Unhandled error during graph execution for {self.current_task_id}: {e}"
+            logger.error(message, exc_info=True)
+            # final_state will remain None or the state before the error
+        finally:
+            logger.info(f"Cleaning up resources for task {self.current_task_id}")
+            task_id_to_clean = self.current_task_id
+            self.stop_event = None
+            self.current_task_id = None
+            self.runner = None  # Mark runner as finished
+            if self.mcp_client:
+                await self.mcp_client.__aexit__(None, None, None)
+            # Return a result dictionary including the status and the final state if available
+            return {
+                "status": status,
+                "message": message,
+                "task_id": task_id_to_clean,  # Use the stored task_id
+                "final_state": final_state
+                if final_state
+                else {},  # Return the final state dict
+            }
+    async def _stop_lingering_browsers(self, task_id):
+        """Attempts to stop any BrowserUseAgent instances associated with the task_id."""
+        keys_to_stop = [
+            key for key in _BROWSER_AGENT_INSTANCES if key.startswith(f"{task_id}_")
+        ]
+        if not keys_to_stop:
+            return
+        logger.warning(
+            f"Found {len(keys_to_stop)} potentially lingering browser agents for task {task_id}. Attempting stop..."
+        )
+        for key in keys_to_stop:
+            agent_instance = _BROWSER_AGENT_INSTANCES.get(key)
+            try:
+                if agent_instance:
+                    # Assuming BU agent has an async stop method
+                    await agent_instance.stop()
+                    logger.info(f"Called stop() on browser agent instance {key}")
+            except Exception as e:
+                logger.error(
+                    f"Error calling stop() on browser agent instance {key}: {e}"
+                )
+    async def stop(self):
+        """Signals the currently running agent task to stop."""
+        if not self.current_task_id or not self.stop_event:
+            logger.info("No agent task is currently running.")
+            return
+        logger.info(f"Stop requested for task ID: {self.current_task_id}")
+        self.stop_event.set()  # Signal the stop event
+        self.stopped = True
+        await self._stop_lingering_browsers(self.current_task_id)
+    def close(self):
+        self.stopped = False

src/browser/__init__.py ADDED Viewed

File without changes

src/browser/custom_browser.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import asyncio
+import pdb
+from playwright.async_api import Browser as PlaywrightBrowser
+from playwright.async_api import (
+    BrowserContext as PlaywrightBrowserContext,
+)
+from playwright.async_api import (
+    Playwright,
+    async_playwright,
+)
+from browser_use.browser.browser import Browser, IN_DOCKER
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from playwright.async_api import BrowserContext as PlaywrightBrowserContext
+import logging
+from browser_use.browser.chrome import (
+    CHROME_ARGS,
+    CHROME_DETERMINISTIC_RENDERING_ARGS,
+    CHROME_DISABLE_SECURITY_ARGS,
+    CHROME_DOCKER_ARGS,
+    CHROME_HEADLESS_ARGS,
+)
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments
+from browser_use.utils import time_execution_async
+import socket
+from .custom_context import CustomBrowserContext
+logger = logging.getLogger(__name__)
+class CustomBrowser(Browser):
+    async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext:
+        """Create a browser context"""
+        browser_config = self.config.model_dump() if self.config else {}
+        context_config = config.model_dump() if config else {}
+        merged_config = {**browser_config, **context_config}
+        return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
+    async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
+        """Sets up and returns a Playwright Browser instance with anti-detection measures."""
+        assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
+        # Use the configured window size from new_context_config if available
+        if (
+                not self.config.headless
+                and hasattr(self.config, 'new_context_config')
+                and hasattr(self.config.new_context_config, 'window_width')
+                and hasattr(self.config.new_context_config, 'window_height')
+        ):
+            screen_size = {
+                'width': self.config.new_context_config.window_width,
+                'height': self.config.new_context_config.window_height,
+            }
+            offset_x, offset_y = get_window_adjustments()
+        elif self.config.headless:
+            screen_size = {'width': 1920, 'height': 1080}
+            offset_x, offset_y = 0, 0
+        else:
+            screen_size = get_screen_resolution()
+            offset_x, offset_y = get_window_adjustments()
+        chrome_args = {
+            f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
+            *CHROME_ARGS,
+            *(CHROME_DOCKER_ARGS if IN_DOCKER else []),
+            *(CHROME_HEADLESS_ARGS if self.config.headless else []),
+            *(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
+            *(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
+            f'--window-position={offset_x},{offset_y}',
+            f'--window-size={screen_size["width"]},{screen_size["height"]}',
+            *self.config.extra_browser_args,
+        }
+        # check if chrome remote debugging port is already taken,
+        # if so remove the remote-debugging-port arg to prevent conflicts
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
+                chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
+        browser_class = getattr(playwright, self.config.browser_class)
+        args = {
+            'chromium': list(chrome_args),
+            'firefox': [
+                *{
+                    '-no-remote',
+                    *self.config.extra_browser_args,
+                }
+            ],
+            'webkit': [
+                *{
+                    '--no-startup-window',
+                    *self.config.extra_browser_args,
+                }
+            ],
+        }
+        browser = await browser_class.launch(
+            channel='chromium',  # https://github.com/microsoft/playwright/issues/33566
+            headless=self.config.headless,
+            args=args[self.config.browser_class],
+            proxy=self.config.proxy.model_dump() if self.config.proxy else None,
+            handle_sigterm=False,
+            handle_sigint=False,
+        )
+        return browser

src/browser/custom_context.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import json
+import logging
+import os
+from browser_use.browser.browser import Browser, IN_DOCKER
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from playwright.async_api import Browser as PlaywrightBrowser
+from playwright.async_api import BrowserContext as PlaywrightBrowserContext
+from typing import Optional
+from browser_use.browser.context import BrowserContextState
+logger = logging.getLogger(__name__)
+class CustomBrowserContext(BrowserContext):
+    def __init__(
+            self,
+            browser: 'Browser',
+            config: BrowserContextConfig | None = None,
+            state: Optional[BrowserContextState] = None,
+    ):
+        super(CustomBrowserContext, self).__init__(browser=browser, config=config, state=state)

src/controller/__init__.py ADDED Viewed

File without changes

src/controller/custom_controller.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import pdb
+import pyperclip
+from typing import Optional, Type, Callable, Dict, Any, Union, Awaitable, TypeVar
+from pydantic import BaseModel
+from browser_use.agent.views import ActionResult
+from browser_use.browser.context import BrowserContext
+from browser_use.controller.service import Controller, DoneAction
+from browser_use.controller.registry.service import Registry, RegisteredAction
+from main_content_extractor import MainContentExtractor
+from browser_use.controller.views import (
+    ClickElementAction,
+    DoneAction,
+    ExtractPageContentAction,
+    GoToUrlAction,
+    InputTextAction,
+    OpenTabAction,
+    ScrollAction,
+    SearchGoogleAction,
+    SendKeysAction,
+    SwitchTabAction,
+)
+import logging
+import inspect
+import asyncio
+import os
+from langchain_core.language_models.chat_models import BaseChatModel
+from browser_use.agent.views import ActionModel, ActionResult
+from src.utils.mcp_client import create_tool_param_model, setup_mcp_client_and_tools
+from browser_use.utils import time_execution_sync
+logger = logging.getLogger(__name__)
+Context = TypeVar('Context')
+class CustomController(Controller):
+    def __init__(self, exclude_actions: list[str] = [],
+                 output_model: Optional[Type[BaseModel]] = None,
+                 ask_assistant_callback: Optional[Union[Callable[[str, BrowserContext], Dict[str, Any]], Callable[
+                     [str, BrowserContext], Awaitable[Dict[str, Any]]]]] = None,
+                 ):
+        super().__init__(exclude_actions=exclude_actions, output_model=output_model)
+        self._register_custom_actions()
+        self.ask_assistant_callback = ask_assistant_callback
+        self.mcp_client = None
+        self.mcp_server_config = None
+    def _register_custom_actions(self):
+        """Register all custom browser actions"""
+        @self.registry.action(
+            "When executing tasks, prioritize autonomous completion. However, if you encounter a definitive blocker "
+            "that prevents you from proceeding independently – such as needing credentials you don't possess, "
+            "requiring subjective human judgment, needing a physical action performed, encountering complex CAPTCHAs, "
+            "or facing limitations in your capabilities – you must request human assistance."
+        )
+        async def ask_for_assistant(query: str, browser: BrowserContext):
+            if self.ask_assistant_callback:
+                if inspect.iscoroutinefunction(self.ask_assistant_callback):
+                    user_response = await self.ask_assistant_callback(query, browser)
+                else:
+                    user_response = self.ask_assistant_callback(query, browser)
+                msg = f"AI ask: {query}. User response: {user_response['response']}"
+                logger.info(msg)
+                return ActionResult(extracted_content=msg, include_in_memory=True)
+            else:
+                return ActionResult(extracted_content="Human cannot help you. Please try another way.",
+                                    include_in_memory=True)
+        @self.registry.action(
+            'Upload file to interactive element with file path ',
+        )
+        async def upload_file(index: int, path: str, browser: BrowserContext, available_file_paths: list[str]):
+            if path not in available_file_paths:
+                return ActionResult(error=f'File path {path} is not available')
+            if not os.path.exists(path):
+                return ActionResult(error=f'File {path} does not exist')
+            dom_el = await browser.get_dom_element_by_index(index)
+            file_upload_dom_el = dom_el.get_file_upload_element()
+            if file_upload_dom_el is None:
+                msg = f'No file upload element found at index {index}'
+                logger.info(msg)
+                return ActionResult(error=msg)
+            file_upload_el = await browser.get_locate_element(file_upload_dom_el)
+            if file_upload_el is None:
+                msg = f'No file upload element found at index {index}'
+                logger.info(msg)
+                return ActionResult(error=msg)
+            try:
+                await file_upload_el.set_input_files(path)
+                msg = f'Successfully uploaded file to index {index}'
+                logger.info(msg)
+                return ActionResult(extracted_content=msg, include_in_memory=True)
+            except Exception as e:
+                msg = f'Failed to upload file to index {index}: {str(e)}'
+                logger.info(msg)
+                return ActionResult(error=msg)
+    @time_execution_sync('--act')
+    async def act(
+            self,
+            action: ActionModel,
+            browser_context: Optional[BrowserContext] = None,
+            #
+            page_extraction_llm: Optional[BaseChatModel] = None,
+            sensitive_data: Optional[Dict[str, str]] = None,
+            available_file_paths: Optional[list[str]] = None,
+            #
+            context: Context | None = None,
+    ) -> ActionResult:
+        """Execute an action"""
+        try:
+            for action_name, params in action.model_dump(exclude_unset=True).items():
+                if params is not None:
+                    if action_name.startswith("mcp"):
+                        # this is a mcp tool
+                        logger.debug(f"Invoke MCP tool: {action_name}")
+                        mcp_tool = self.registry.registry.actions.get(action_name).function
+                        result = await mcp_tool.ainvoke(params)
+                    else:
+                        result = await self.registry.execute_action(
+                            action_name,
+                            params,
+                            browser=browser_context,
+                            page_extraction_llm=page_extraction_llm,
+                            sensitive_data=sensitive_data,
+                            available_file_paths=available_file_paths,
+                            context=context,
+                        )
+                    if isinstance(result, str):
+                        return ActionResult(extracted_content=result)
+                    elif isinstance(result, ActionResult):
+                        return result
+                    elif result is None:
+                        return ActionResult()
+                    else:
+                        raise ValueError(f'Invalid action result type: {type(result)} of {result}')
+            return ActionResult()
+        except Exception as e:
+            raise e
+    async def setup_mcp_client(self, mcp_server_config: Optional[Dict[str, Any]] = None):
+        self.mcp_server_config = mcp_server_config
+        if self.mcp_server_config:
+            self.mcp_client = await setup_mcp_client_and_tools(self.mcp_server_config)
+            self.register_mcp_tools()
+    def register_mcp_tools(self):
+        """
+        Register the MCP tools used by this controller.
+        """
+        if self.mcp_client:
+            for server_name in self.mcp_client.server_name_to_tools:
+                for tool in self.mcp_client.server_name_to_tools[server_name]:
+                    tool_name = f"mcp.{server_name}.{tool.name}"
+                    self.registry.registry.actions[tool_name] = RegisteredAction(
+                        name=tool_name,
+                        description=tool.description,
+                        function=tool,
+                        param_model=create_tool_param_model(tool),
+                    )
+                    logger.info(f"Add mcp tool: {tool_name}")
+                logger.debug(
+                    f"Registered {len(self.mcp_client.server_name_to_tools[server_name])} mcp tools for {server_name}")
+        else:
+            logger.warning(f"MCP client not started.")
+    async def close_mcp_client(self):
+        if self.mcp_client:
+            await self.mcp_client.__aexit__(None, None, None)

src/utils/__init__.py ADDED Viewed

File without changes

src/utils/config.py ADDED Viewed

	@@ -0,0 +1,100 @@

+PROVIDER_DISPLAY_NAMES = {
+    "openai": "OpenAI",
+    "azure_openai": "Azure OpenAI",
+    "anthropic": "Anthropic",
+    "deepseek": "DeepSeek",
+    "google": "Google",
+    "alibaba": "Alibaba",
+    "moonshot": "MoonShot",
+    "unbound": "Unbound AI",
+    "ibm": "IBM",
+    "grok": "Grok",
+}
+# Predefined model names for common providers
+model_names = {
+    "anthropic": ["claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-opus-20240229"],
+    "openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "o3-mini"],
+    "deepseek": ["deepseek-chat", "deepseek-reasoner"],
+    "google": ["gemini-2.0-flash", "gemini-2.0-flash-thinking-exp", "gemini-1.5-flash-latest",
+               "gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-01-21", "gemini-2.0-pro-exp-02-05",
+               "gemini-2.5-pro-preview-03-25", "gemini-2.5-flash-preview-04-17"],
+    "ollama": ["qwen2.5:7b", "qwen2.5:14b", "qwen2.5:32b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "llama2:7b",
+               "deepseek-r1:14b", "deepseek-r1:32b"],
+    "azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"],
+    "mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
+    "alibaba": ["qwen-plus", "qwen-max", "qwen-vl-max", "qwen-vl-plus", "qwen-turbo", "qwen-long"],
+    "moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"],
+    "unbound": ["gemini-2.0-flash", "gpt-4o-mini", "gpt-4o", "gpt-4.5-preview"],
+    "grok": [
+        "grok-3",
+        "grok-3-fast",
+        "grok-3-mini",
+        "grok-3-mini-fast",
+        "grok-2-vision",
+        "grok-2-image",
+        "grok-2",
+    ],
+    "siliconflow": [
+        "deepseek-ai/DeepSeek-R1",
+        "deepseek-ai/DeepSeek-V3",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        "deepseek-ai/DeepSeek-V2.5",
+        "deepseek-ai/deepseek-vl2",
+        "Qwen/Qwen2.5-72B-Instruct-128K",
+        "Qwen/Qwen2.5-72B-Instruct",
+        "Qwen/Qwen2.5-32B-Instruct",
+        "Qwen/Qwen2.5-14B-Instruct",
+        "Qwen/Qwen2.5-7B-Instruct",
+        "Qwen/Qwen2.5-Coder-32B-Instruct",
+        "Qwen/Qwen2.5-Coder-7B-Instruct",
+        "Qwen/Qwen2-7B-Instruct",
+        "Qwen/Qwen2-1.5B-Instruct",
+        "Qwen/QwQ-32B-Preview",
+        "Qwen/Qwen2-VL-72B-Instruct",
+        "Qwen/Qwen2.5-VL-32B-Instruct",
+        "Qwen/Qwen2.5-VL-72B-Instruct",
+        "TeleAI/TeleChat2",
+        "THUDM/glm-4-9b-chat",
+        "Vendor-A/Qwen/Qwen2.5-72B-Instruct",
+        "internlm/internlm2_5-7b-chat",
+        "internlm/internlm2_5-20b-chat",
+        "Pro/Qwen/Qwen2.5-7B-Instruct",
+        "Pro/Qwen/Qwen2-7B-Instruct",
+        "Pro/Qwen/Qwen2-1.5B-Instruct",
+        "Pro/THUDM/chatglm3-6b",
+        "Pro/THUDM/glm-4-9b-chat",
+    ],
+    "ibm": ["ibm/granite-vision-3.1-2b-preview", "meta-llama/llama-4-maverick-17b-128e-instruct-fp8",
+            "meta-llama/llama-3-2-90b-vision-instruct"],
+    "modelscope":[
+        "Qwen/Qwen2.5-Coder-32B-Instruct",
+        "Qwen/Qwen2.5-Coder-14B-Instruct",
+        "Qwen/Qwen2.5-Coder-7B-Instruct",
+        "Qwen/Qwen2.5-72B-Instruct",
+        "Qwen/Qwen2.5-32B-Instruct",
+        "Qwen/Qwen2.5-14B-Instruct",
+        "Qwen/Qwen2.5-7B-Instruct",
+        "Qwen/QwQ-32B-Preview",
+        "Qwen/Qwen2.5-VL-3B-Instruct",
+        "Qwen/Qwen2.5-VL-7B-Instruct",
+        "Qwen/Qwen2.5-VL-32B-Instruct",
+        "Qwen/Qwen2.5-VL-72B-Instruct",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        "deepseek-ai/DeepSeek-R1",
+        "deepseek-ai/DeepSeek-V3",
+        "Qwen/Qwen3-1.7B",
+        "Qwen/Qwen3-4B",
+        "Qwen/Qwen3-8B",
+        "Qwen/Qwen3-14B",
+        "Qwen/Qwen3-30B-A3B",
+        "Qwen/Qwen3-32B",
+        "Qwen/Qwen3-235B-A22B",
+    ],
+}

src/utils/llm_provider.py ADDED Viewed

	@@ -0,0 +1,354 @@

+from openai import OpenAI
+import pdb
+from langchain_openai import ChatOpenAI
+from langchain_core.globals import get_llm_cache
+from langchain_core.language_models.base import (
+    BaseLanguageModel,
+    LangSmithParams,
+    LanguageModelInput,
+)
+import os
+from langchain_core.load import dumpd, dumps
+from langchain_core.messages import (
+    AIMessage,
+    SystemMessage,
+    AnyMessage,
+    BaseMessage,
+    BaseMessageChunk,
+    HumanMessage,
+    convert_to_messages,
+    message_chunk_to_message,
+)
+from langchain_core.outputs import (
+    ChatGeneration,
+    ChatGenerationChunk,
+    ChatResult,
+    LLMResult,
+    RunInfo,
+)
+from langchain_ollama import ChatOllama
+from langchain_core.output_parsers.base import OutputParserLike
+from langchain_core.runnables import Runnable, RunnableConfig
+from langchain_core.tools import BaseTool
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Literal,
+    Optional,
+    Union,
+    cast, List,
+)
+from langchain_anthropic import ChatAnthropic
+from langchain_mistralai import ChatMistralAI
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_ollama import ChatOllama
+from langchain_openai import AzureChatOpenAI, ChatOpenAI
+from langchain_ibm import ChatWatsonx
+from langchain_aws import ChatBedrock
+from pydantic import SecretStr
+from src.utils import config
+class DeepSeekR1ChatOpenAI(ChatOpenAI):
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.client = OpenAI(
+            base_url=kwargs.get("base_url"),
+            api_key=kwargs.get("api_key")
+        )
+    async def ainvoke(
+            self,
+            input: LanguageModelInput,
+            config: Optional[RunnableConfig] = None,
+            *,
+            stop: Optional[list[str]] = None,
+            **kwargs: Any,
+    ) -> AIMessage:
+        message_history = []
+        for input_ in input:
+            if isinstance(input_, SystemMessage):
+                message_history.append({"role": "system", "content": input_.content})
+            elif isinstance(input_, AIMessage):
+                message_history.append({"role": "assistant", "content": input_.content})
+            else:
+                message_history.append({"role": "user", "content": input_.content})
+        response = self.client.chat.completions.create(
+            model=self.model_name,
+            messages=message_history
+        )
+        reasoning_content = response.choices[0].message.reasoning_content
+        content = response.choices[0].message.content
+        return AIMessage(content=content, reasoning_content=reasoning_content)
+    def invoke(
+            self,
+            input: LanguageModelInput,
+            config: Optional[RunnableConfig] = None,
+            *,
+            stop: Optional[list[str]] = None,
+            **kwargs: Any,
+    ) -> AIMessage:
+        message_history = []
+        for input_ in input:
+            if isinstance(input_, SystemMessage):
+                message_history.append({"role": "system", "content": input_.content})
+            elif isinstance(input_, AIMessage):
+                message_history.append({"role": "assistant", "content": input_.content})
+            else:
+                message_history.append({"role": "user", "content": input_.content})
+        response = self.client.chat.completions.create(
+            model=self.model_name,
+            messages=message_history
+        )
+        reasoning_content = response.choices[0].message.reasoning_content
+        content = response.choices[0].message.content
+        return AIMessage(content=content, reasoning_content=reasoning_content)
+class DeepSeekR1ChatOllama(ChatOllama):
+    async def ainvoke(
+            self,
+            input: LanguageModelInput,
+            config: Optional[RunnableConfig] = None,
+            *,
+            stop: Optional[list[str]] = None,
+            **kwargs: Any,
+    ) -> AIMessage:
+        org_ai_message = await super().ainvoke(input=input)
+        org_content = org_ai_message.content
+        reasoning_content = org_content.split("</think>")[0].replace("<think>", "")
+        content = org_content.split("</think>")[1]
+        if "**JSON Response:**" in content:
+            content = content.split("**JSON Response:**")[-1]
+        return AIMessage(content=content, reasoning_content=reasoning_content)
+    def invoke(
+            self,
+            input: LanguageModelInput,
+            config: Optional[RunnableConfig] = None,
+            *,
+            stop: Optional[list[str]] = None,
+            **kwargs: Any,
+    ) -> AIMessage:
+        org_ai_message = super().invoke(input=input)
+        org_content = org_ai_message.content
+        reasoning_content = org_content.split("</think>")[0].replace("<think>", "")
+        content = org_content.split("</think>")[1]
+        if "**JSON Response:**" in content:
+            content = content.split("**JSON Response:**")[-1]
+        return AIMessage(content=content, reasoning_content=reasoning_content)
+def get_llm_model(provider: str, **kwargs):
+    """
+    Get LLM model
+    :param provider: LLM provider
+    :param kwargs:
+    :return:
+    """
+    if provider not in ["ollama", "bedrock"]:
+        env_var = f"{provider.upper()}_API_KEY"
+        api_key = kwargs.get("api_key", "") or os.getenv(env_var, "")
+        if not api_key:
+            provider_display = config.PROVIDER_DISPLAY_NAMES.get(provider, provider.upper())
+            error_msg = f"💥 {provider_display} API key not found! 🔑 Please set the `{env_var}` environment variable or provide it in the UI."
+            raise ValueError(error_msg)
+        kwargs["api_key"] = api_key
+    if provider == "anthropic":
+        if not kwargs.get("base_url", ""):
+            base_url = "https://api.anthropic.com"
+        else:
+            base_url = kwargs.get("base_url")
+        return ChatAnthropic(
+            model=kwargs.get("model_name", "claude-3-5-sonnet-20241022"),
+            temperature=kwargs.get("temperature", 0.0),
+            base_url=base_url,
+            api_key=api_key,
+        )
+    elif provider == 'mistral':
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("MISTRAL_ENDPOINT", "https://api.mistral.ai/v1")
+        else:
+            base_url = kwargs.get("base_url")
+        if not kwargs.get("api_key", ""):
+            api_key = os.getenv("MISTRAL_API_KEY", "")
+        else:
+            api_key = kwargs.get("api_key")
+        return ChatMistralAI(
+            model=kwargs.get("model_name", "mistral-large-latest"),
+            temperature=kwargs.get("temperature", 0.0),
+            base_url=base_url,
+            api_key=api_key,
+        )
+    elif provider == "openai":
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1")
+        else:
+            base_url = kwargs.get("base_url")
+        return ChatOpenAI(
+            model=kwargs.get("model_name", "gpt-4o"),
+            temperature=kwargs.get("temperature", 0.0),
+            base_url=base_url,
+            api_key=api_key,
+        )
+    elif provider == "grok":
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("GROK_ENDPOINT", "https://api.x.ai/v1")
+        else:
+            base_url = kwargs.get("base_url")
+        return ChatOpenAI(
+            model=kwargs.get("model_name", "grok-3"),
+            temperature=kwargs.get("temperature", 0.0),
+            base_url=base_url,
+            api_key=api_key,
+        )
+    elif provider == "deepseek":
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("DEEPSEEK_ENDPOINT", "")
+        else:
+            base_url = kwargs.get("base_url")
+        if kwargs.get("model_name", "deepseek-chat") == "deepseek-reasoner":
+            return DeepSeekR1ChatOpenAI(
+                model=kwargs.get("model_name", "deepseek-reasoner"),
+                temperature=kwargs.get("temperature", 0.0),
+                base_url=base_url,
+                api_key=api_key,
+            )
+        else:
+            return ChatOpenAI(
+                model=kwargs.get("model_name", "deepseek-chat"),
+                temperature=kwargs.get("temperature", 0.0),
+                base_url=base_url,
+                api_key=api_key,
+            )
+    elif provider == "google":
+        return ChatGoogleGenerativeAI(
+            model=kwargs.get("model_name", "gemini-2.0-flash-exp"),
+            temperature=kwargs.get("temperature", 0.0),
+            api_key=api_key,
+        )
+    elif provider == "ollama":
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434")
+        else:
+            base_url = kwargs.get("base_url")
+        if "deepseek-r1" in kwargs.get("model_name", "qwen2.5:7b"):
+            return DeepSeekR1ChatOllama(
+                model=kwargs.get("model_name", "deepseek-r1:14b"),
+                temperature=kwargs.get("temperature", 0.0),
+                num_ctx=kwargs.get("num_ctx", 32000),
+                base_url=base_url,
+            )
+        else:
+            return ChatOllama(
+                model=kwargs.get("model_name", "qwen2.5:7b"),
+                temperature=kwargs.get("temperature", 0.0),
+                num_ctx=kwargs.get("num_ctx", 32000),
+                num_predict=kwargs.get("num_predict", 1024),
+                base_url=base_url,
+            )
+    elif provider == "azure_openai":
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
+        else:
+            base_url = kwargs.get("base_url")
+        api_version = kwargs.get("api_version", "") or os.getenv("AZURE_OPENAI_API_VERSION", "2025-01-01-preview")
+        return AzureChatOpenAI(
+            model=kwargs.get("model_name", "gpt-4o"),
+            temperature=kwargs.get("temperature", 0.0),
+            api_version=api_version,
+            azure_endpoint=base_url,
+            api_key=api_key,
+        )
+    elif provider == "alibaba":
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("ALIBABA_ENDPOINT", "https://dashscope.aliyuncs.com/compatible-mode/v1")
+        else:
+            base_url = kwargs.get("base_url")
+        return ChatOpenAI(
+            model=kwargs.get("model_name", "qwen-plus"),
+            temperature=kwargs.get("temperature", 0.0),
+            base_url=base_url,
+            api_key=api_key,
+        )
+    elif provider == "ibm":
+        parameters = {
+            "temperature": kwargs.get("temperature", 0.0),
+            "max_tokens": kwargs.get("num_ctx", 32000)
+        }
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("IBM_ENDPOINT", "https://us-south.ml.cloud.ibm.com")
+        else:
+            base_url = kwargs.get("base_url")
+        return ChatWatsonx(
+            model_id=kwargs.get("model_name", "ibm/granite-vision-3.1-2b-preview"),
+            url=base_url,
+            project_id=os.getenv("IBM_PROJECT_ID"),
+            apikey=os.getenv("IBM_API_KEY"),
+            params=parameters
+        )
+    elif provider == "moonshot":
+        return ChatOpenAI(
+            model=kwargs.get("model_name", "moonshot-v1-32k-vision-preview"),
+            temperature=kwargs.get("temperature", 0.0),
+            base_url=os.getenv("MOONSHOT_ENDPOINT"),
+            api_key=os.getenv("MOONSHOT_API_KEY"),
+        )
+    elif provider == "unbound":
+        return ChatOpenAI(
+            model=kwargs.get("model_name", "gpt-4o-mini"),
+            temperature=kwargs.get("temperature", 0.0),
+            base_url=os.getenv("UNBOUND_ENDPOINT", "https://api.getunbound.ai"),
+            api_key=api_key,
+        )
+    elif provider == "siliconflow":
+        if not kwargs.get("api_key", ""):
+            api_key = os.getenv("SiliconFLOW_API_KEY", "")
+        else:
+            api_key = kwargs.get("api_key")
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("SiliconFLOW_ENDPOINT", "")
+        else:
+            base_url = kwargs.get("base_url")
+        return ChatOpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            model_name=kwargs.get("model_name", "Qwen/QwQ-32B"),
+            temperature=kwargs.get("temperature", 0.0),
+        )
+    elif provider == "modelscope":
+        if not kwargs.get("api_key", ""):
+            api_key = os.getenv("MODELSCOPE_API_KEY", "")
+        else:
+            api_key = kwargs.get("api_key")
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("MODELSCOPE_ENDPOINT", "")
+        else:
+            base_url = kwargs.get("base_url")
+        return ChatOpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            model_name=kwargs.get("model_name", "Qwen/QwQ-32B"),
+            temperature=kwargs.get("temperature", 0.0),
+        )
+    else:
+        raise ValueError(f"Unsupported provider: {provider}")

src/utils/mcp_client.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import inspect
+import logging
+import uuid
+from datetime import date, datetime, time
+from enum import Enum
+from typing import Any, Dict, List, Optional, Set, Type, Union, get_type_hints
+from browser_use.controller.registry.views import ActionModel
+from langchain.tools import BaseTool
+from langchain_mcp_adapters.client import MultiServerMCPClient
+from pydantic import BaseModel, Field, create_model
+from pydantic.v1 import BaseModel, Field
+logger = logging.getLogger(__name__)
+async def setup_mcp_client_and_tools(mcp_server_config: Dict[str, Any]) -> Optional[MultiServerMCPClient]:
+    """
+    Initializes the MultiServerMCPClient, connects to servers, fetches tools,
+    filters them, and returns a flat list of usable tools and the client instance.
+    Returns:
+        A tuple containing:
+        - list[BaseTool]: The filtered list of usable LangChain tools.
+        - MultiServerMCPClient | None: The initialized and started client instance, or None on failure.
+    """
+    logger.info("Initializing MultiServerMCPClient...")
+    if not mcp_server_config:
+        logger.error("No MCP server configuration provided.")
+        return None
+    try:
+        if "mcpServers" in mcp_server_config:
+            mcp_server_config = mcp_server_config["mcpServers"]
+        client = MultiServerMCPClient(mcp_server_config)
+        await client.__aenter__()
+        return client
+    except Exception as e:
+        logger.error(f"Failed to setup MCP client or fetch tools: {e}", exc_info=True)
+        return None
+def create_tool_param_model(tool: BaseTool) -> Type[BaseModel]:
+    """Creates a Pydantic model from a LangChain tool's schema"""
+    # Get tool schema information
+    json_schema = tool.args_schema
+    tool_name = tool.name
+    # If the tool already has a schema defined, convert it to a new param_model
+    if json_schema is not None:
+        # Create new parameter model
+        params = {}
+        # Process properties if they exist
+        if 'properties' in json_schema:
+            # Find required fields
+            required_fields: Set[str] = set(json_schema.get('required', []))
+            for prop_name, prop_details in json_schema['properties'].items():
+                field_type = resolve_type(prop_details, f"{tool_name}_{prop_name}")
+                # Check if parameter is required
+                is_required = prop_name in required_fields
+                # Get default value and description
+                default_value = prop_details.get('default', ... if is_required else None)
+                description = prop_details.get('description', '')
+                # Add field constraints
+                field_kwargs = {'default': default_value}
+                if description:
+                    field_kwargs['description'] = description
+                # Add additional constraints if present
+                if 'minimum' in prop_details:
+                    field_kwargs['ge'] = prop_details['minimum']
+                if 'maximum' in prop_details:
+                    field_kwargs['le'] = prop_details['maximum']
+                if 'minLength' in prop_details:
+                    field_kwargs['min_length'] = prop_details['minLength']
+                if 'maxLength' in prop_details:
+                    field_kwargs['max_length'] = prop_details['maxLength']
+                if 'pattern' in prop_details:
+                    field_kwargs['pattern'] = prop_details['pattern']
+                # Add to parameters dictionary
+                params[prop_name] = (field_type, Field(**field_kwargs))
+        return create_model(
+            f'{tool_name}_parameters',
+            __base__=ActionModel,
+            **params,  # type: ignore
+        )
+    # If no schema is defined, extract parameters from the _run method
+    run_method = tool._run
+    sig = inspect.signature(run_method)
+    # Get type hints for better type information
+    try:
+        type_hints = get_type_hints(run_method)
+    except Exception:
+        type_hints = {}
+    params = {}
+    for name, param in sig.parameters.items():
+        # Skip 'self' parameter and any other parameters you want to exclude
+        if name == 'self':
+            continue
+        # Get annotation from type hints if available, otherwise from signature
+        annotation = type_hints.get(name, param.annotation)
+        if annotation == inspect.Parameter.empty:
+            annotation = Any
+        # Use default value if available, otherwise make it required
+        if param.default != param.empty:
+            params[name] = (annotation, param.default)
+        else:
+            params[name] = (annotation, ...)
+    return create_model(
+        f'{tool_name}_parameters',
+        __base__=ActionModel,
+        **params,  # type: ignore
+    )
+def resolve_type(prop_details: Dict[str, Any], prefix: str = "") -> Any:
+    """Recursively resolves JSON schema type to Python/Pydantic type"""
+    # Handle reference types
+    if '$ref' in prop_details:
+        # In a real application, reference resolution would be needed
+        return Any
+    # Basic type mapping
+    type_mapping = {
+        'string': str,
+        'integer': int,
+        'number': float,
+        'boolean': bool,
+        'array': List,
+        'object': Dict,
+        'null': type(None),
+    }
+    # Handle formatted strings
+    if prop_details.get('type') == 'string' and 'format' in prop_details:
+        format_mapping = {
+            'date-time': datetime,
+            'date': date,
+            'time': time,
+            'email': str,
+            'uri': str,
+            'url': str,
+            'uuid': uuid.UUID,
+            'binary': bytes,
+        }
+        return format_mapping.get(prop_details['format'], str)
+    # Handle enum types
+    if 'enum' in prop_details:
+        enum_values = prop_details['enum']
+        # Create dynamic enum class with safe names
+        enum_dict = {}
+        for i, v in enumerate(enum_values):
+            # Ensure enum names are valid Python identifiers
+            if isinstance(v, str):
+                key = v.upper().replace(' ', '_').replace('-', '_')
+                if not key.isidentifier():
+                    key = f"VALUE_{i}"
+            else:
+                key = f"VALUE_{i}"
+            enum_dict[key] = v
+        # Only create enum if we have values
+        if enum_dict:
+            return Enum(f"{prefix}_Enum", enum_dict)
+        return str  # Fallback
+    # Handle array types
+    if prop_details.get('type') == 'array' and 'items' in prop_details:
+        item_type = resolve_type(prop_details['items'], f"{prefix}_item")
+        return List[item_type]  # type: ignore
+    # Handle object types with properties
+    if prop_details.get('type') == 'object' and 'properties' in prop_details:
+        nested_params = {}
+        for nested_name, nested_details in prop_details['properties'].items():
+            nested_type = resolve_type(nested_details, f"{prefix}_{nested_name}")
+            # Get required field info
+            required_fields = prop_details.get('required', [])
+            is_required = nested_name in required_fields
+            default_value = nested_details.get('default', ... if is_required else None)
+            description = nested_details.get('description', '')
+            field_kwargs = {'default': default_value}
+            if description:
+                field_kwargs['description'] = description
+            nested_params[nested_name] = (nested_type, Field(**field_kwargs))
+        # Create nested model
+        nested_model = create_model(f"{prefix}_Model", **nested_params)
+        return nested_model
+    # Handle union types (oneOf, anyOf)
+    if 'oneOf' in prop_details or 'anyOf' in prop_details:
+        union_schema = prop_details.get('oneOf') or prop_details.get('anyOf')
+        union_types = []
+        for i, t in enumerate(union_schema):
+            union_types.append(resolve_type(t, f"{prefix}_{i}"))
+        if union_types:
+            return Union.__getitem__(tuple(union_types))  # type: ignore
+        return Any
+    # Handle allOf (intersection types)
+    if 'allOf' in prop_details:
+        nested_params = {}
+        for i, schema_part in enumerate(prop_details['allOf']):
+            if 'properties' in schema_part:
+                for nested_name, nested_details in schema_part['properties'].items():
+                    nested_type = resolve_type(nested_details, f"{prefix}_allOf_{i}_{nested_name}")
+                    # Check if required
+                    required_fields = schema_part.get('required', [])
+                    is_required = nested_name in required_fields
+                    nested_params[nested_name] = (nested_type, ... if is_required else None)
+        # Create composite model
+        if nested_params:
+            composite_model = create_model(f"{prefix}_CompositeModel", **nested_params)
+            return composite_model
+        return Dict
+    # Default to basic types
+    schema_type = prop_details.get('type', 'string')
+    if isinstance(schema_type, list):
+        # Handle multiple types (e.g., ["string", "null"])
+        non_null_types = [t for t in schema_type if t != 'null']
+        if non_null_types:
+            primary_type = type_mapping.get(non_null_types[0], Any)
+            if 'null' in schema_type:
+                return Optional[primary_type]  # type: ignore
+            return primary_type
+        return Any
+    return type_mapping.get(schema_type, Any)

src/utils/utils.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import base64
+import os
+import time
+from pathlib import Path
+from typing import Dict, Optional
+import requests
+import json
+import gradio as gr
+import uuid
+def encode_image(img_path):
+    if not img_path:
+        return None
+    with open(img_path, "rb") as fin:
+        image_data = base64.b64encode(fin.read()).decode("utf-8")
+    return image_data
+def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Dict[str, Optional[str]]:
+    """Get the latest recording and trace files"""
+    latest_files: Dict[str, Optional[str]] = {ext: None for ext in file_types}
+    if not os.path.exists(directory):
+        os.makedirs(directory, exist_ok=True)
+        return latest_files
+    for file_type in file_types:
+        try:
+            matches = list(Path(directory).rglob(f"*{file_type}"))
+            if matches:
+                latest = max(matches, key=lambda p: p.stat().st_mtime)
+                # Only return files that are complete (not being written)
+                if time.time() - latest.stat().st_mtime > 1.0:
+                    latest_files[file_type] = str(latest)
+        except Exception as e:
+            print(f"Error getting latest {file_type} file: {e}")
+    return latest_files

src/webui/__init__.py ADDED Viewed

File without changes

src/webui/components/__init__.py ADDED Viewed

File without changes

src/webui/components/agent_settings_tab.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import json
+import os
+import gradio as gr
+from gradio.components import Component
+from typing import Any, Dict, Optional
+from src.webui.webui_manager import WebuiManager
+from src.utils import config
+import logging
+from functools import partial
+logger = logging.getLogger(__name__)
+def update_model_dropdown(llm_provider):
+    """
+    Update the model name dropdown with predefined models for the selected provider.
+    """
+    # Use predefined models for the selected provider
+    if llm_provider in config.model_names:
+        return gr.Dropdown(choices=config.model_names[llm_provider], value=config.model_names[llm_provider][0],
+                           interactive=True)
+    else:
+        return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True)
+async def update_mcp_server(mcp_file: str, webui_manager: WebuiManager):
+    """
+    Update the MCP server.
+    """
+    if hasattr(webui_manager, "bu_controller") and webui_manager.bu_controller:
+        logger.warning("⚠️ Close controller because mcp file has changed!")
+        await webui_manager.bu_controller.close_mcp_client()
+        webui_manager.bu_controller = None
+    if not mcp_file or not os.path.exists(mcp_file) or not mcp_file.endswith('.json'):
+        logger.warning(f"{mcp_file} is not a valid MCP file.")
+        return None, gr.update(visible=False)
+    with open(mcp_file, 'r') as f:
+        mcp_server = json.load(f)
+    return json.dumps(mcp_server, indent=2), gr.update(visible=True)
+def create_agent_settings_tab(webui_manager: WebuiManager):
+    """
+    Creates an agent settings tab.
+    """
+    input_components = set(webui_manager.get_components())
+    tab_components = {}
+    with gr.Group():
+        with gr.Column():
+            override_system_prompt = gr.Textbox(label="Override system prompt", lines=4, interactive=True)
+            extend_system_prompt = gr.Textbox(label="Extend system prompt", lines=4, interactive=True)
+    with gr.Group():
+        mcp_json_file = gr.File(label="MCP server json", interactive=True, file_types=[".json"])
+        mcp_server_config = gr.Textbox(label="MCP server", lines=6, interactive=True, visible=False)
+    with gr.Group():
+        with gr.Row():
+            llm_provider = gr.Dropdown(
+                choices=[provider for provider, model in config.model_names.items()],
+                label="LLM Provider",
+                value=os.getenv("DEFAULT_LLM", "openai"),
+                info="Select LLM provider for LLM",
+                interactive=True
+            )
+            llm_model_name = gr.Dropdown(
+                label="LLM Model Name",
+                choices=config.model_names[os.getenv("DEFAULT_LLM", "openai")],
+                value=config.model_names[os.getenv("DEFAULT_LLM", "openai")][0],
+                interactive=True,
+                allow_custom_value=True,
+                info="Select a model in the dropdown options or directly type a custom model name"
+            )
+        with gr.Row():
+            llm_temperature = gr.Slider(
+                minimum=0.0,
+                maximum=2.0,
+                value=0.6,
+                step=0.1,
+                label="LLM Temperature",
+                info="Controls randomness in model outputs",
+                interactive=True
+            )
+            use_vision = gr.Checkbox(
+                label="Use Vision",
+                value=True,
+                info="Enable Vision(Input highlighted screenshot into LLM)",
+                interactive=True
+            )
+            ollama_num_ctx = gr.Slider(
+                minimum=2 ** 8,
+                maximum=2 ** 16,
+                value=16000,
+                step=1,
+                label="Ollama Context Length",
+                info="Controls max context length model needs to handle (less = faster)",
+                visible=False,
+                interactive=True
+            )
+        with gr.Row():
+            llm_base_url = gr.Textbox(
+                label="Base URL",
+                value="",
+                info="API endpoint URL (if required)"
+            )
+            llm_api_key = gr.Textbox(
+                label="API Key",
+                type="password",
+                value="",
+                info="Your API key (leave blank to use .env)"
+            )
+    with gr.Group():
+        with gr.Row():
+            planner_llm_provider = gr.Dropdown(
+                choices=[provider for provider, model in config.model_names.items()],
+                label="Planner LLM Provider",
+                info="Select LLM provider for LLM",
+                value=None,
+                interactive=True
+            )
+            planner_llm_model_name = gr.Dropdown(
+                label="Planner LLM Model Name",
+                interactive=True,
+                allow_custom_value=True,
+                info="Select a model in the dropdown options or directly type a custom model name"
+            )
+        with gr.Row():
+            planner_llm_temperature = gr.Slider(
+                minimum=0.0,
+                maximum=2.0,
+                value=0.6,
+                step=0.1,
+                label="Planner LLM Temperature",
+                info="Controls randomness in model outputs",
+                interactive=True
+            )
+            planner_use_vision = gr.Checkbox(
+                label="Use Vision(Planner LLM)",
+                value=False,
+                info="Enable Vision(Input highlighted screenshot into LLM)",
+                interactive=True
+            )
+            planner_ollama_num_ctx = gr.Slider(
+                minimum=2 ** 8,
+                maximum=2 ** 16,
+                value=16000,
+                step=1,
+                label="Ollama Context Length",
+                info="Controls max context length model needs to handle (less = faster)",
+                visible=False,
+                interactive=True
+            )
+        with gr.Row():
+            planner_llm_base_url = gr.Textbox(
+                label="Base URL",
+                value="",
+                info="API endpoint URL (if required)"
+            )
+            planner_llm_api_key = gr.Textbox(
+                label="API Key",
+                type="password",
+                value="",
+                info="Your API key (leave blank to use .env)"
+            )
+    with gr.Row():
+        max_steps = gr.Slider(
+            minimum=1,
+            maximum=1000,
+            value=100,
+            step=1,
+            label="Max Run Steps",
+            info="Maximum number of steps the agent will take",
+            interactive=True
+        )
+        max_actions = gr.Slider(
+            minimum=1,
+            maximum=100,
+            value=10,
+            step=1,
+            label="Max Number of Actions",
+            info="Maximum number of actions the agent will take per step",
+            interactive=True
+        )
+    with gr.Row():
+        max_input_tokens = gr.Number(
+            label="Max Input Tokens",
+            value=128000,
+            precision=0,
+            interactive=True
+        )
+        tool_calling_method = gr.Dropdown(
+            label="Tool Calling Method",
+            value="auto",
+            interactive=True,
+            allow_custom_value=True,
+            choices=['function_calling', 'json_mode', 'raw', 'auto', 'tools', "None"],
+            visible=True
+        )
+    tab_components.update(dict(
+        override_system_prompt=override_system_prompt,
+        extend_system_prompt=extend_system_prompt,
+        llm_provider=llm_provider,
+        llm_model_name=llm_model_name,
+        llm_temperature=llm_temperature,
+        use_vision=use_vision,
+        ollama_num_ctx=ollama_num_ctx,
+        llm_base_url=llm_base_url,
+        llm_api_key=llm_api_key,
+        planner_llm_provider=planner_llm_provider,
+        planner_llm_model_name=planner_llm_model_name,
+        planner_llm_temperature=planner_llm_temperature,
+        planner_use_vision=planner_use_vision,
+        planner_ollama_num_ctx=planner_ollama_num_ctx,
+        planner_llm_base_url=planner_llm_base_url,
+        planner_llm_api_key=planner_llm_api_key,
+        max_steps=max_steps,
+        max_actions=max_actions,
+        max_input_tokens=max_input_tokens,
+        tool_calling_method=tool_calling_method,
+        mcp_json_file=mcp_json_file,
+        mcp_server_config=mcp_server_config,
+    ))
+    webui_manager.add_components("agent_settings", tab_components)
+    llm_provider.change(
+        fn=lambda x: gr.update(visible=x == "ollama"),
+        inputs=llm_provider,
+        outputs=ollama_num_ctx
+    )
+    llm_provider.change(
+        lambda provider: update_model_dropdown(provider),
+        inputs=[llm_provider],
+        outputs=[llm_model_name]
+    )
+    planner_llm_provider.change(
+        fn=lambda x: gr.update(visible=x == "ollama"),
+        inputs=[planner_llm_provider],
+        outputs=[planner_ollama_num_ctx]
+    )
+    planner_llm_provider.change(
+        lambda provider: update_model_dropdown(provider),
+        inputs=[planner_llm_provider],
+        outputs=[planner_llm_model_name]
+    )
+    async def update_wrapper(mcp_file):
+        """Wrapper for handle_pause_resume."""
+        update_dict = await update_mcp_server(mcp_file, webui_manager)
+        yield update_dict
+    mcp_json_file.change(
+        update_wrapper,
+        inputs=[mcp_json_file],
+        outputs=[mcp_server_config, mcp_server_config]
+    )

src/webui/components/browser_settings_tab.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import os
+from distutils.util import strtobool
+import gradio as gr
+import logging
+from gradio.components import Component
+from src.webui.webui_manager import WebuiManager
+from src.utils import config
+logger = logging.getLogger(__name__)
+async def close_browser(webui_manager: WebuiManager):
+    """
+    Close browser
+    """
+    if webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
+        webui_manager.bu_current_task.cancel()
+        webui_manager.bu_current_task = None
+    if webui_manager.bu_browser_context:
+        logger.info("⚠️ Closing browser context when changing browser config.")
+        await webui_manager.bu_browser_context.close()
+        webui_manager.bu_browser_context = None
+    if webui_manager.bu_browser:
+        logger.info("⚠️ Closing browser when changing browser config.")
+        await webui_manager.bu_browser.close()
+        webui_manager.bu_browser = None
+def create_browser_settings_tab(webui_manager: WebuiManager):
+    """
+    Creates a browser settings tab.
+    """
+    input_components = set(webui_manager.get_components())
+    tab_components = {}
+    with gr.Group():
+        with gr.Row():
+            browser_binary_path = gr.Textbox(
+                label="Browser Binary Path",
+                lines=1,
+                interactive=True,
+                placeholder="e.g. '/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome'"
+            )
+            browser_user_data_dir = gr.Textbox(
+                label="Browser User Data Dir",
+                lines=1,
+                interactive=True,
+                placeholder="Leave it empty if you use your default user data",
+            )
+    with gr.Group():
+        with gr.Row():
+            use_own_browser = gr.Checkbox(
+                label="Use Own Browser",
+                value=bool(strtobool(os.getenv("USE_OWN_BROWSER", "false"))),
+                info="Use your existing browser instance",
+                interactive=True
+            )
+            keep_browser_open = gr.Checkbox(
+                label="Keep Browser Open",
+                value=bool(strtobool(os.getenv("KEEP_BROWSER_OPEN", "true"))),
+                info="Keep Browser Open between Tasks",
+                interactive=True
+            )
+            headless = gr.Checkbox(
+                label="Headless Mode",
+                value=False,
+                info="Run browser without GUI",
+                interactive=True
+            )
+            disable_security = gr.Checkbox(
+                label="Disable Security",
+                value=False,
+                info="Disable browser security",
+                interactive=True
+            )
+    with gr.Group():
+        with gr.Row():
+            window_w = gr.Number(
+                label="Window Width",
+                value=1280,
+                info="Browser window width",
+                interactive=True
+            )
+            window_h = gr.Number(
+                label="Window Height",
+                value=1100,
+                info="Browser window height",
+                interactive=True
+            )
+    with gr.Group():
+        with gr.Row():
+            cdp_url = gr.Textbox(
+                label="CDP URL",
+                value=os.getenv("BROWSER_CDP", None),
+                info="CDP URL for browser remote debugging",
+                interactive=True,
+            )
+            wss_url = gr.Textbox(
+                label="WSS URL",
+                info="WSS URL for browser remote debugging",
+                interactive=True,
+            )
+    with gr.Group():
+        with gr.Row():
+            save_recording_path = gr.Textbox(
+                label="Recording Path",
+                placeholder="e.g. ./tmp/record_videos",
+                info="Path to save browser recordings",
+                interactive=True,
+            )
+            save_trace_path = gr.Textbox(
+                label="Trace Path",
+                placeholder="e.g. ./tmp/traces",
+                info="Path to save Agent traces",
+                interactive=True,
+            )
+        with gr.Row():
+            save_agent_history_path = gr.Textbox(
+                label="Agent History Save Path",
+                value="./tmp/agent_history",
+                info="Specify the directory where agent history should be saved.",
+                interactive=True,
+            )
+            save_download_path = gr.Textbox(
+                label="Save Directory for browser downloads",
+                value="./tmp/downloads",
+                info="Specify the directory where downloaded files should be saved.",
+                interactive=True,
+            )
+    tab_components.update(
+        dict(
+            browser_binary_path=browser_binary_path,
+            browser_user_data_dir=browser_user_data_dir,
+            use_own_browser=use_own_browser,
+            keep_browser_open=keep_browser_open,
+            headless=headless,
+            disable_security=disable_security,
+            save_recording_path=save_recording_path,
+            save_trace_path=save_trace_path,
+            save_agent_history_path=save_agent_history_path,
+            save_download_path=save_download_path,
+            cdp_url=cdp_url,
+            wss_url=wss_url,
+            window_h=window_h,
+            window_w=window_w,
+        )
+    )
+    webui_manager.add_components("browser_settings", tab_components)
+    async def close_wrapper():
+        """Wrapper for handle_clear."""
+        await close_browser(webui_manager)
+    headless.change(close_wrapper)
+    keep_browser_open.change(close_wrapper)
+    disable_security.change(close_wrapper)
+    use_own_browser.change(close_wrapper)

src/webui/components/browser_use_agent_tab.py ADDED Viewed

	@@ -0,0 +1,1083 @@

+import asyncio
+import json
+import logging
+import os
+import uuid
+from typing import Any, AsyncGenerator, Dict, Optional
+import gradio as gr
+# from browser_use.agent.service import Agent
+from browser_use.agent.views import (
+    AgentHistoryList,
+    AgentOutput,
+)
+from browser_use.browser.browser import BrowserConfig
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from browser_use.browser.views import BrowserState
+from gradio.components import Component
+from langchain_core.language_models.chat_models import BaseChatModel
+from src.agent.browser_use.browser_use_agent import BrowserUseAgent
+from src.browser.custom_browser import CustomBrowser
+from src.controller.custom_controller import CustomController
+from src.utils import llm_provider
+from src.webui.webui_manager import WebuiManager
+logger = logging.getLogger(__name__)
+# --- Helper Functions --- (Defined at module level)
+async def _initialize_llm(
+        provider: Optional[str],
+        model_name: Optional[str],
+        temperature: float,
+        base_url: Optional[str],
+        api_key: Optional[str],
+        num_ctx: Optional[int] = None,
+) -> Optional[BaseChatModel]:
+    """Initializes the LLM based on settings. Returns None if provider/model is missing."""
+    if not provider or not model_name:
+        logger.info("LLM Provider or Model Name not specified, LLM will be None.")
+        return None
+    try:
+        # Use your actual LLM provider logic here
+        logger.info(
+            f"Initializing LLM: Provider={provider}, Model={model_name}, Temp={temperature}"
+        )
+        # Example using a placeholder function
+        llm = llm_provider.get_llm_model(
+            provider=provider,
+            model_name=model_name,
+            temperature=temperature,
+            base_url=base_url or None,
+            api_key=api_key or None,
+            # Add other relevant params like num_ctx for ollama
+            num_ctx=num_ctx if provider == "ollama" else None,
+        )
+        return llm
+    except Exception as e:
+        logger.error(f"Failed to initialize LLM: {e}", exc_info=True)
+        gr.Warning(
+            f"Failed to initialize LLM '{model_name}' for provider '{provider}'. Please check settings. Error: {e}"
+        )
+        return None
+def _get_config_value(
+        webui_manager: WebuiManager,
+        comp_dict: Dict[gr.components.Component, Any],
+        comp_id_suffix: str,
+        default: Any = None,
+) -> Any:
+    """Safely get value from component dictionary using its ID suffix relative to the tab."""
+    # Assumes component ID format is "tab_name.comp_name"
+    tab_name = "browser_use_agent"  # Hardcode or derive if needed
+    comp_id = f"{tab_name}.{comp_id_suffix}"
+    # Need to find the component object first using the ID from the manager
+    try:
+        comp = webui_manager.get_component_by_id(comp_id)
+        return comp_dict.get(comp, default)
+    except KeyError:
+        # Try accessing settings tabs as well
+        for prefix in ["agent_settings", "browser_settings"]:
+            try:
+                comp_id = f"{prefix}.{comp_id_suffix}"
+                comp = webui_manager.get_component_by_id(comp_id)
+                return comp_dict.get(comp, default)
+            except KeyError:
+                continue
+        logger.warning(
+            f"Component with suffix '{comp_id_suffix}' not found in manager for value lookup."
+        )
+        return default
+def _format_agent_output(model_output: AgentOutput) -> str:
+    """Formats AgentOutput for display in the chatbot using JSON."""
+    content = ""
+    if model_output:
+        try:
+            # Directly use model_dump if actions and current_state are Pydantic models
+            action_dump = [
+                action.model_dump(exclude_none=True) for action in model_output.action
+            ]
+            state_dump = model_output.current_state.model_dump(exclude_none=True)
+            model_output_dump = {
+                "current_state": state_dump,
+                "action": action_dump,
+            }
+            # Dump to JSON string with indentation
+            json_string = json.dumps(model_output_dump, indent=4, ensure_ascii=False)
+            # Wrap in <pre><code> for proper display in HTML
+            content = f"<pre><code class='language-json'>{json_string}</code></pre>"
+        except AttributeError as ae:
+            logger.error(
+                f"AttributeError during model dump: {ae}. Check if 'action' or 'current_state' or their items support 'model_dump'."
+            )
+            content = f"<pre><code>Error: Could not format agent output (AttributeError: {ae}).\nRaw output: {str(model_output)}</code></pre>"
+        except Exception as e:
+            logger.error(f"Error formatting agent output: {e}", exc_info=True)
+            # Fallback to simple string representation on error
+            content = f"<pre><code>Error formatting agent output.\nRaw output:\n{str(model_output)}</code></pre>"
+    return content.strip()
+# --- Updated Callback Implementation ---
+async def _handle_new_step(
+        webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
+):
+    """Callback for each step taken by the agent, including screenshot display."""
+    # Use the correct chat history attribute name from the user's code
+    if not hasattr(webui_manager, "bu_chat_history"):
+        logger.error(
+            "Attribute 'bu_chat_history' not found in webui_manager! Cannot add chat message."
+        )
+        # Initialize it maybe? Or raise an error? For now, log and potentially skip chat update.
+        webui_manager.bu_chat_history = []  # Initialize if missing (consider if this is the right place)
+        # return # Or stop if this is critical
+    step_num -= 1
+    logger.info(f"Step {step_num} completed.")
+    # --- Screenshot Handling ---
+    screenshot_html = ""
+    # Ensure state.screenshot exists and is not empty before proceeding
+    # Use getattr for safer access
+    screenshot_data = getattr(state, "screenshot", None)
+    if screenshot_data:
+        try:
+            # Basic validation: check if it looks like base64
+            if (
+                    isinstance(screenshot_data, str) and len(screenshot_data) > 100
+            ):  # Arbitrary length check
+                # *** UPDATED STYLE: Removed centering, adjusted width ***
+                img_tag = f'<img src="data:image/jpeg;base64,{screenshot_data}" alt="Step {step_num} Screenshot" style="max-width: 800px; max-height: 600px; object-fit:contain;" />'
+                screenshot_html = (
+                        img_tag + "<br/>"
+                )  # Use <br/> for line break after inline-block image
+            else:
+                logger.warning(
+                    f"Screenshot for step {step_num} seems invalid (type: {type(screenshot_data)}, len: {len(screenshot_data) if isinstance(screenshot_data, str) else 'N/A'})."
+                )
+                screenshot_html = "**[Invalid screenshot data]**<br/>"
+        except Exception as e:
+            logger.error(
+                f"Error processing or formatting screenshot for step {step_num}: {e}",
+                exc_info=True,
+            )
+            screenshot_html = "**[Error displaying screenshot]**<br/>"
+    else:
+        logger.debug(f"No screenshot available for step {step_num}.")
+    # --- Format Agent Output ---
+    formatted_output = _format_agent_output(output)  # Use the updated function
+    # --- Combine and Append to Chat ---
+    step_header = f"--- **Step {step_num}** ---"
+    # Combine header, image (with line break), and JSON block
+    final_content = step_header + "<br/>" + screenshot_html + formatted_output
+    chat_message = {
+        "role": "assistant",
+        "content": final_content.strip(),  # Remove leading/trailing whitespace
+    }
+    # Append to the correct chat history list
+    webui_manager.bu_chat_history.append(chat_message)
+    await asyncio.sleep(0.05)
+def _handle_done(webui_manager: WebuiManager, history: AgentHistoryList):
+    """Callback when the agent finishes the task (success or failure)."""
+    logger.info(
+        f"Agent task finished. Duration: {history.total_duration_seconds():.2f}s, Tokens: {history.total_input_tokens()}"
+    )
+    final_summary = "**Task Completed**\n"
+    final_summary += f"- Duration: {history.total_duration_seconds():.2f} seconds\n"
+    final_summary += f"- Total Input Tokens: {history.total_input_tokens()}\n"  # Or total tokens if available
+    final_result = history.final_result()
+    if final_result:
+        final_summary += f"- Final Result: {final_result}\n"
+    errors = history.errors()
+    if errors and any(errors):
+        final_summary += f"- **Errors:**\n```\n{errors}\n```\n"
+    else:
+        final_summary += "- Status: Success\n"
+    webui_manager.bu_chat_history.append(
+        {"role": "assistant", "content": final_summary}
+    )
+async def _ask_assistant_callback(
+        webui_manager: WebuiManager, query: str, browser_context: BrowserContext
+) -> Dict[str, Any]:
+    """Callback triggered by the agent's ask_for_assistant action."""
+    logger.info("Agent requires assistance. Waiting for user input.")
+    if not hasattr(webui_manager, "_chat_history"):
+        logger.error("Chat history not found in webui_manager during ask_assistant!")
+        return {"response": "Internal Error: Cannot display help request."}
+    webui_manager.bu_chat_history.append(
+        {
+            "role": "assistant",
+            "content": f"**Need Help:** {query}\nPlease provide information or perform the required action in the browser, then type your response/confirmation below and click 'Submit Response'.",
+        }
+    )
+    # Use state stored in webui_manager
+    webui_manager.bu_response_event = asyncio.Event()
+    webui_manager.bu_user_help_response = None  # Reset previous response
+    try:
+        logger.info("Waiting for user response event...")
+        await asyncio.wait_for(
+            webui_manager.bu_response_event.wait(), timeout=3600.0
+        )  # Long timeout
+        logger.info("User response event received.")
+    except asyncio.TimeoutError:
+        logger.warning("Timeout waiting for user assistance.")
+        webui_manager.bu_chat_history.append(
+            {
+                "role": "assistant",
+                "content": "**Timeout:** No response received. Trying to proceed.",
+            }
+        )
+        webui_manager.bu_response_event = None  # Clear the event
+        return {"response": "Timeout: User did not respond."}  # Inform the agent
+    response = webui_manager.bu_user_help_response
+    webui_manager.bu_chat_history.append(
+        {"role": "user", "content": response}
+    )  # Show user response in chat
+    webui_manager.bu_response_event = (
+        None  # Clear the event for the next potential request
+    )
+    return {"response": response}
+# --- Core Agent Execution Logic --- (Needs access to webui_manager)
+async def run_agent_task(
+        webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
+) -> AsyncGenerator[Dict[gr.components.Component, Any], None]:
+    """Handles the entire lifecycle of initializing and running the agent."""
+    # --- Get Components ---
+    # Need handles to specific UI components to update them
+    user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
+    run_button_comp = webui_manager.get_component_by_id("browser_use_agent.run_button")
+    stop_button_comp = webui_manager.get_component_by_id(
+        "browser_use_agent.stop_button"
+    )
+    pause_resume_button_comp = webui_manager.get_component_by_id(
+        "browser_use_agent.pause_resume_button"
+    )
+    clear_button_comp = webui_manager.get_component_by_id(
+        "browser_use_agent.clear_button"
+    )
+    chatbot_comp = webui_manager.get_component_by_id("browser_use_agent.chatbot")
+    history_file_comp = webui_manager.get_component_by_id(
+        "browser_use_agent.agent_history_file"
+    )
+    gif_comp = webui_manager.get_component_by_id("browser_use_agent.recording_gif")
+    browser_view_comp = webui_manager.get_component_by_id(
+        "browser_use_agent.browser_view"
+    )
+    # --- 1. Get Task and Initial UI Update ---
+    task = components.get(user_input_comp, "").strip()
+    if not task:
+        gr.Warning("Please enter a task.")
+        yield {run_button_comp: gr.update(interactive=True)}
+        return
+    # Set running state indirectly via _current_task
+    webui_manager.bu_chat_history.append({"role": "user", "content": task})
+    yield {
+        user_input_comp: gr.Textbox(
+            value="", interactive=False, placeholder="Agent is running..."
+        ),
+        run_button_comp: gr.Button(value="⏳ Running...", interactive=False),
+        stop_button_comp: gr.Button(interactive=True),
+        pause_resume_button_comp: gr.Button(value="⏸️ Pause", interactive=True),
+        clear_button_comp: gr.Button(interactive=False),
+        chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
+        history_file_comp: gr.update(value=None),
+        gif_comp: gr.update(value=None),
+    }
+    # --- Agent Settings ---
+    # Access settings values via components dict, getting IDs from webui_manager
+    def get_setting(key, default=None):
+        comp = webui_manager.id_to_component.get(f"agent_settings.{key}")
+        return components.get(comp, default) if comp else default
+    override_system_prompt = get_setting("override_system_prompt") or None
+    extend_system_prompt = get_setting("extend_system_prompt") or None
+    llm_provider_name = get_setting(
+        "llm_provider", None
+    )  # Default to None if not found
+    llm_model_name = get_setting("llm_model_name", None)
+    llm_temperature = get_setting("llm_temperature", 0.6)
+    use_vision = get_setting("use_vision", True)
+    ollama_num_ctx = get_setting("ollama_num_ctx", 16000)
+    llm_base_url = get_setting("llm_base_url") or None
+    llm_api_key = get_setting("llm_api_key") or None
+    max_steps = get_setting("max_steps", 100)
+    max_actions = get_setting("max_actions", 10)
+    max_input_tokens = get_setting("max_input_tokens", 128000)
+    tool_calling_str = get_setting("tool_calling_method", "auto")
+    tool_calling_method = tool_calling_str if tool_calling_str != "None" else None
+    mcp_server_config_comp = webui_manager.id_to_component.get(
+        "agent_settings.mcp_server_config"
+    )
+    mcp_server_config_str = (
+        components.get(mcp_server_config_comp) if mcp_server_config_comp else None
+    )
+    mcp_server_config = (
+        json.loads(mcp_server_config_str) if mcp_server_config_str else None
+    )
+    # Planner LLM Settings (Optional)
+    planner_llm_provider_name = get_setting("planner_llm_provider") or None
+    planner_llm = None
+    planner_use_vision = False
+    if planner_llm_provider_name:
+        planner_llm_model_name = get_setting("planner_llm_model_name")
+        planner_llm_temperature = get_setting("planner_llm_temperature", 0.6)
+        planner_ollama_num_ctx = get_setting("planner_ollama_num_ctx", 16000)
+        planner_llm_base_url = get_setting("planner_llm_base_url") or None
+        planner_llm_api_key = get_setting("planner_llm_api_key") or None
+        planner_use_vision = get_setting("planner_use_vision", False)
+        planner_llm = await _initialize_llm(
+            planner_llm_provider_name,
+            planner_llm_model_name,
+            planner_llm_temperature,
+            planner_llm_base_url,
+            planner_llm_api_key,
+            planner_ollama_num_ctx if planner_llm_provider_name == "ollama" else None,
+        )
+    # --- Browser Settings ---
+    def get_browser_setting(key, default=None):
+        comp = webui_manager.id_to_component.get(f"browser_settings.{key}")
+        return components.get(comp, default) if comp else default
+    browser_binary_path = get_browser_setting("browser_binary_path") or None
+    browser_user_data_dir = get_browser_setting("browser_user_data_dir") or None
+    use_own_browser = get_browser_setting(
+        "use_own_browser", False
+    )  # Logic handled by CDP/WSS presence
+    keep_browser_open = get_browser_setting("keep_browser_open", False)
+    headless = get_browser_setting("headless", False)
+    disable_security = get_browser_setting("disable_security", False)
+    window_w = int(get_browser_setting("window_w", 1280))
+    window_h = int(get_browser_setting("window_h", 1100))
+    cdp_url = get_browser_setting("cdp_url") or None
+    wss_url = get_browser_setting("wss_url") or None
+    save_recording_path = get_browser_setting("save_recording_path") or None
+    save_trace_path = get_browser_setting("save_trace_path") or None
+    save_agent_history_path = get_browser_setting(
+        "save_agent_history_path", "./tmp/agent_history"
+    )
+    save_download_path = get_browser_setting("save_download_path", "./tmp/downloads")
+    stream_vw = 70
+    stream_vh = int(70 * window_h // window_w)
+    os.makedirs(save_agent_history_path, exist_ok=True)
+    if save_recording_path:
+        os.makedirs(save_recording_path, exist_ok=True)
+    if save_trace_path:
+        os.makedirs(save_trace_path, exist_ok=True)
+    if save_download_path:
+        os.makedirs(save_download_path, exist_ok=True)
+    # --- 2. Initialize LLM ---
+    main_llm = await _initialize_llm(
+        llm_provider_name,
+        llm_model_name,
+        llm_temperature,
+        llm_base_url,
+        llm_api_key,
+        ollama_num_ctx if llm_provider_name == "ollama" else None,
+    )
+    # Pass the webui_manager instance to the callback when wrapping it
+    async def ask_callback_wrapper(
+            query: str, browser_context: BrowserContext
+    ) -> Dict[str, Any]:
+        return await _ask_assistant_callback(webui_manager, query, browser_context)
+    if not webui_manager.bu_controller:
+        webui_manager.bu_controller = CustomController(
+            ask_assistant_callback=ask_callback_wrapper
+        )
+        await webui_manager.bu_controller.setup_mcp_client(mcp_server_config)
+    # --- 4. Initialize Browser and Context ---
+    should_close_browser_on_finish = not keep_browser_open
+    try:
+        # Close existing resources if not keeping open
+        if not keep_browser_open:
+            if webui_manager.bu_browser_context:
+                logger.info("Closing previous browser context.")
+                await webui_manager.bu_browser_context.close()
+                webui_manager.bu_browser_context = None
+            if webui_manager.bu_browser:
+                logger.info("Closing previous browser.")
+                await webui_manager.bu_browser.close()
+                webui_manager.bu_browser = None
+        # Create Browser if needed
+        if not webui_manager.bu_browser:
+            logger.info("Launching new browser instance.")
+            extra_args = []
+            if use_own_browser:
+                browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
+                if browser_binary_path == "":
+                    browser_binary_path = None
+                browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
+                if browser_user_data:
+                    extra_args += [f"--user-data-dir={browser_user_data}"]
+            else:
+                browser_binary_path = None
+            webui_manager.bu_browser = CustomBrowser(
+                config=BrowserConfig(
+                    headless=headless,
+                    disable_security=disable_security,
+                    browser_binary_path=browser_binary_path,
+                    extra_browser_args=extra_args,
+                    wss_url=wss_url,
+                    cdp_url=cdp_url,
+                    new_context_config=BrowserContextConfig(
+                        window_width=window_w,
+                        window_height=window_h,
+                    )
+                )
+            )
+        # Create Context if needed
+        if not webui_manager.bu_browser_context:
+            logger.info("Creating new browser context.")
+            context_config = BrowserContextConfig(
+                trace_path=save_trace_path if save_trace_path else None,
+                save_recording_path=save_recording_path
+                if save_recording_path
+                else None,
+                save_downloads_path=save_download_path if save_download_path else None,
+                window_height=window_h,
+                window_width=window_w,
+            )
+            if not webui_manager.bu_browser:
+                raise ValueError("Browser not initialized, cannot create context.")
+            webui_manager.bu_browser_context = (
+                await webui_manager.bu_browser.new_context(config=context_config)
+            )
+        # --- 5. Initialize or Update Agent ---
+        webui_manager.bu_agent_task_id = str(uuid.uuid4())  # New ID for this task run
+        os.makedirs(
+            os.path.join(save_agent_history_path, webui_manager.bu_agent_task_id),
+            exist_ok=True,
+        )
+        history_file = os.path.join(
+            save_agent_history_path,
+            webui_manager.bu_agent_task_id,
+            f"{webui_manager.bu_agent_task_id}.json",
+        )
+        gif_path = os.path.join(
+            save_agent_history_path,
+            webui_manager.bu_agent_task_id,
+            f"{webui_manager.bu_agent_task_id}.gif",
+        )
+        # Pass the webui_manager to callbacks when wrapping them
+        async def step_callback_wrapper(
+                state: BrowserState, output: AgentOutput, step_num: int
+        ):
+            await _handle_new_step(webui_manager, state, output, step_num)
+        def done_callback_wrapper(history: AgentHistoryList):
+            _handle_done(webui_manager, history)
+        if not webui_manager.bu_agent:
+            logger.info(f"Initializing new agent for task: {task}")
+            if not webui_manager.bu_browser or not webui_manager.bu_browser_context:
+                raise ValueError(
+                    "Browser or Context not initialized, cannot create agent."
+                )
+            webui_manager.bu_agent = BrowserUseAgent(
+                task=task,
+                llm=main_llm,
+                browser=webui_manager.bu_browser,
+                browser_context=webui_manager.bu_browser_context,
+                controller=webui_manager.bu_controller,
+                register_new_step_callback=step_callback_wrapper,
+                register_done_callback=done_callback_wrapper,
+                use_vision=use_vision,
+                override_system_message=override_system_prompt,
+                extend_system_message=extend_system_prompt,
+                max_input_tokens=max_input_tokens,
+                max_actions_per_step=max_actions,
+                tool_calling_method=tool_calling_method,
+                planner_llm=planner_llm,
+                use_vision_for_planner=planner_use_vision if planner_llm else False,
+                source="webui",
+            )
+            webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
+            webui_manager.bu_agent.settings.generate_gif = gif_path
+        else:
+            webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
+            webui_manager.bu_agent.add_new_task(task)
+            webui_manager.bu_agent.settings.generate_gif = gif_path
+            webui_manager.bu_agent.browser = webui_manager.bu_browser
+            webui_manager.bu_agent.browser_context = webui_manager.bu_browser_context
+            webui_manager.bu_agent.controller = webui_manager.bu_controller
+        # --- 6. Run Agent Task and Stream Updates ---
+        agent_run_coro = webui_manager.bu_agent.run(max_steps=max_steps)
+        agent_task = asyncio.create_task(agent_run_coro)
+        webui_manager.bu_current_task = agent_task  # Store the task
+        last_chat_len = len(webui_manager.bu_chat_history)
+        while not agent_task.done():
+            is_paused = webui_manager.bu_agent.state.paused
+            is_stopped = webui_manager.bu_agent.state.stopped
+            # Check for pause state
+            if is_paused:
+                yield {
+                    pause_resume_button_comp: gr.update(
+                        value="▶️ Resume", interactive=True
+                    ),
+                    stop_button_comp: gr.update(interactive=True),
+                }
+                # Wait until pause is released or task is stopped/done
+                while is_paused and not agent_task.done():
+                    # Re-check agent state in loop
+                    is_paused = webui_manager.bu_agent.state.paused
+                    is_stopped = webui_manager.bu_agent.state.stopped
+                    if is_stopped:  # Stop signal received while paused
+                        break
+                    await asyncio.sleep(0.2)
+                if (
+                        agent_task.done() or is_stopped
+                ):  # If stopped or task finished while paused
+                    break
+                # If resumed, yield UI update
+                yield {
+                    pause_resume_button_comp: gr.update(
+                        value="⏸️ Pause", interactive=True
+                    ),
+                    run_button_comp: gr.update(
+                        value="⏳ Running...", interactive=False
+                    ),
+                }
+            # Check if agent stopped itself or stop button was pressed (which sets agent.state.stopped)
+            if is_stopped:
+                logger.info("Agent has stopped (internally or via stop button).")
+                if not agent_task.done():
+                    # Ensure the task coroutine finishes if agent just set flag
+                    try:
+                        await asyncio.wait_for(
+                            agent_task, timeout=1.0
+                        )  # Give it a moment to exit run()
+                    except asyncio.TimeoutError:
+                        logger.warning(
+                            "Agent task did not finish quickly after stop signal, cancelling."
+                        )
+                        agent_task.cancel()
+                    except Exception:  # Catch task exceptions if it errors on stop
+                        pass
+                break  # Exit the streaming loop
+            # Check if agent is asking for help (via response_event)
+            update_dict = {}
+            if webui_manager.bu_response_event is not None:
+                update_dict = {
+                    user_input_comp: gr.update(
+                        placeholder="Agent needs help. Enter response and submit.",
+                        interactive=True,
+                    ),
+                    run_button_comp: gr.update(
+                        value="✔️ Submit Response", interactive=True
+                    ),
+                    pause_resume_button_comp: gr.update(interactive=False),
+                    stop_button_comp: gr.update(interactive=False),
+                    chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
+                }
+                last_chat_len = len(webui_manager.bu_chat_history)
+                yield update_dict
+                # Wait until response is submitted or task finishes
+                while (
+                        webui_manager.bu_response_event is not None
+                        and not agent_task.done()
+                ):
+                    await asyncio.sleep(0.2)
+                # Restore UI after response submitted or if task ended unexpectedly
+                if not agent_task.done():
+                    yield {
+                        user_input_comp: gr.update(
+                            placeholder="Agent is running...", interactive=False
+                        ),
+                        run_button_comp: gr.update(
+                            value="⏳ Running...", interactive=False
+                        ),
+                        pause_resume_button_comp: gr.update(interactive=True),
+                        stop_button_comp: gr.update(interactive=True),
+                    }
+                else:
+                    break  # Task finished while waiting for response
+            # Update Chatbot if new messages arrived via callbacks
+            if len(webui_manager.bu_chat_history) > last_chat_len:
+                update_dict[chatbot_comp] = gr.update(
+                    value=webui_manager.bu_chat_history
+                )
+                last_chat_len = len(webui_manager.bu_chat_history)
+            # Update Browser View
+            if headless and webui_manager.bu_browser_context:
+                try:
+                    screenshot_b64 = (
+                        await webui_manager.bu_browser_context.take_screenshot()
+                    )
+                    if screenshot_b64:
+                        html_content = f'<img src="data:image/jpeg;base64,{screenshot_b64}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
+                        update_dict[browser_view_comp] = gr.update(
+                            value=html_content, visible=True
+                        )
+                    else:
+                        html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
+                        update_dict[browser_view_comp] = gr.update(
+                            value=html_content, visible=True
+                        )
+                except Exception as e:
+                    logger.debug(f"Failed to capture screenshot: {e}")
+                    update_dict[browser_view_comp] = gr.update(
+                        value="<div style='...'>Error loading view...</div>",
+                        visible=True,
+                    )
+            else:
+                update_dict[browser_view_comp] = gr.update(visible=False)
+            # Yield accumulated updates
+            if update_dict:
+                yield update_dict
+            await asyncio.sleep(0.1)  # Polling interval
+        # --- 7. Task Finalization ---
+        webui_manager.bu_agent.state.paused = False
+        webui_manager.bu_agent.state.stopped = False
+        final_update = {}
+        try:
+            logger.info("Agent task completing...")
+            # Await the task ensure completion and catch exceptions if not already caught
+            if not agent_task.done():
+                await agent_task  # Retrieve result/exception
+            elif agent_task.exception():  # Check if task finished with exception
+                agent_task.result()  # Raise the exception to be caught below
+            logger.info("Agent task completed processing.")
+            logger.info(f"Explicitly saving agent history to: {history_file}")
+            webui_manager.bu_agent.save_history(history_file)
+            if os.path.exists(history_file):
+                final_update[history_file_comp] = gr.File(value=history_file)
+            if gif_path and os.path.exists(gif_path):
+                logger.info(f"GIF found at: {gif_path}")
+                final_update[gif_comp] = gr.Image(value=gif_path)
+        except asyncio.CancelledError:
+            logger.info("Agent task was cancelled.")
+            if not any(
+                    "Cancelled" in msg.get("content", "")
+                    for msg in webui_manager.bu_chat_history
+                    if msg.get("role") == "assistant"
+            ):
+                webui_manager.bu_chat_history.append(
+                    {"role": "assistant", "content": "**Task Cancelled**."}
+                )
+            final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
+        except Exception as e:
+            logger.error(f"Error during agent execution: {e}", exc_info=True)
+            error_message = (
+                f"**Agent Execution Error:**\n```\n{type(e).__name__}: {e}\n```"
+            )
+            if not any(
+                    error_message in msg.get("content", "")
+                    for msg in webui_manager.bu_chat_history
+                    if msg.get("role") == "assistant"
+            ):
+                webui_manager.bu_chat_history.append(
+                    {"role": "assistant", "content": error_message}
+                )
+            final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
+            gr.Error(f"Agent execution failed: {e}")
+        finally:
+            webui_manager.bu_current_task = None  # Clear the task reference
+            # Close browser/context if requested
+            if should_close_browser_on_finish:
+                if webui_manager.bu_browser_context:
+                    logger.info("Closing browser context after task.")
+                    await webui_manager.bu_browser_context.close()
+                    webui_manager.bu_browser_context = None
+                if webui_manager.bu_browser:
+                    logger.info("Closing browser after task.")
+                    await webui_manager.bu_browser.close()
+                    webui_manager.bu_browser = None
+            # --- 8. Final UI Update ---
+            final_update.update(
+                {
+                    user_input_comp: gr.update(
+                        value="",
+                        interactive=True,
+                        placeholder="Enter your next task...",
+                    ),
+                    run_button_comp: gr.update(value="▶️ Submit Task", interactive=True),
+                    stop_button_comp: gr.update(value="⏹️ Stop", interactive=False),
+                    pause_resume_button_comp: gr.update(
+                        value="⏸️ Pause", interactive=False
+                    ),
+                    clear_button_comp: gr.update(interactive=True),
+                    # Ensure final chat history is shown
+                    chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
+                }
+            )
+            yield final_update
+    except Exception as e:
+        # Catch errors during setup (before agent run starts)
+        logger.error(f"Error setting up agent task: {e}", exc_info=True)
+        webui_manager.bu_current_task = None  # Ensure state is reset
+        yield {
+            user_input_comp: gr.update(
+                interactive=True, placeholder="Error during setup. Enter task..."
+            ),
+            run_button_comp: gr.update(value="▶️ Submit Task", interactive=True),
+            stop_button_comp: gr.update(value="⏹️ Stop", interactive=False),
+            pause_resume_button_comp: gr.update(value="⏸️ Pause", interactive=False),
+            clear_button_comp: gr.update(interactive=True),
+            chatbot_comp: gr.update(
+                value=webui_manager.bu_chat_history
+                      + [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
+            ),
+        }
+# --- Button Click Handlers --- (Need access to webui_manager)
+async def handle_submit(
+        webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
+):
+    """Handles clicks on the main 'Submit' button."""
+    user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
+    user_input_value = components.get(user_input_comp, "").strip()
+    # Check if waiting for user assistance
+    if webui_manager.bu_response_event and not webui_manager.bu_response_event.is_set():
+        logger.info(f"User submitted assistance: {user_input_value}")
+        webui_manager.bu_user_help_response = (
+            user_input_value if user_input_value else "User provided no text response."
+        )
+        webui_manager.bu_response_event.set()
+        # UI updates handled by the main loop reacting to the event being set
+        yield {
+            user_input_comp: gr.update(
+                value="",
+                interactive=False,
+                placeholder="Waiting for agent to continue...",
+            ),
+            webui_manager.get_component_by_id(
+                "browser_use_agent.run_button"
+            ): gr.update(value="⏳ Running...", interactive=False),
+        }
+    # Check if a task is currently running (using _current_task)
+    elif webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
+        logger.warning(
+            "Submit button clicked while agent is already running and not asking for help."
+        )
+        gr.Info("Agent is currently running. Please wait or use Stop/Pause.")
+        yield {}  # No change
+    else:
+        # Handle submission for a new task
+        logger.info("Submit button clicked for new task.")
+        # Use async generator to stream updates from run_agent_task
+        async for update in run_agent_task(webui_manager, components):
+            yield update
+async def handle_stop(webui_manager: WebuiManager):
+    """Handles clicks on the 'Stop' button."""
+    logger.info("Stop button clicked.")
+    agent = webui_manager.bu_agent
+    task = webui_manager.bu_current_task
+    if agent and task and not task.done():
+        # Signal the agent to stop by setting its internal flag
+        agent.state.stopped = True
+        agent.state.paused = False  # Ensure not paused if stopped
+        return {
+            webui_manager.get_component_by_id(
+                "browser_use_agent.stop_button"
+            ): gr.update(interactive=False, value="⏹️ Stopping..."),
+            webui_manager.get_component_by_id(
+                "browser_use_agent.pause_resume_button"
+            ): gr.update(interactive=False),
+            webui_manager.get_component_by_id(
+                "browser_use_agent.run_button"
+            ): gr.update(interactive=False),
+        }
+    else:
+        logger.warning("Stop clicked but agent is not running or task is already done.")
+        # Reset UI just in case it's stuck
+        return {
+            webui_manager.get_component_by_id(
+                "browser_use_agent.run_button"
+            ): gr.update(interactive=True),
+            webui_manager.get_component_by_id(
+                "browser_use_agent.stop_button"
+            ): gr.update(interactive=False),
+            webui_manager.get_component_by_id(
+                "browser_use_agent.pause_resume_button"
+            ): gr.update(interactive=False),
+            webui_manager.get_component_by_id(
+                "browser_use_agent.clear_button"
+            ): gr.update(interactive=True),
+        }
+async def handle_pause_resume(webui_manager: WebuiManager):
+    """Handles clicks on the 'Pause/Resume' button."""
+    agent = webui_manager.bu_agent
+    task = webui_manager.bu_current_task
+    if agent and task and not task.done():
+        if agent.state.paused:
+            logger.info("Resume button clicked.")
+            agent.resume()
+            # UI update happens in main loop
+            return {
+                webui_manager.get_component_by_id(
+                    "browser_use_agent.pause_resume_button"
+                ): gr.update(value="⏸️ Pause", interactive=True)
+            }  # Optimistic update
+        else:
+            logger.info("Pause button clicked.")
+            agent.pause()
+            return {
+                webui_manager.get_component_by_id(
+                    "browser_use_agent.pause_resume_button"
+                ): gr.update(value="▶️ Resume", interactive=True)
+            }  # Optimistic update
+    else:
+        logger.warning(
+            "Pause/Resume clicked but agent is not running or doesn't support state."
+        )
+        return {}  # No change
+async def handle_clear(webui_manager: WebuiManager):
+    """Handles clicks on the 'Clear' button."""
+    logger.info("Clear button clicked.")
+    # Stop any running task first
+    task = webui_manager.bu_current_task
+    if task and not task.done():
+        logger.info("Clearing requires stopping the current task.")
+        webui_manager.bu_agent.stop()
+        task.cancel()
+        try:
+            await asyncio.wait_for(task, timeout=2.0)  # Wait briefly
+        except (asyncio.CancelledError, asyncio.TimeoutError):
+            pass
+        except Exception as e:
+            logger.warning(f"Error stopping task on clear: {e}")
+    webui_manager.bu_current_task = None
+    if webui_manager.bu_controller:
+        await webui_manager.bu_controller.close_mcp_client()
+        webui_manager.bu_controller = None
+    webui_manager.bu_agent = None
+    # Reset state stored in manager
+    webui_manager.bu_chat_history = []
+    webui_manager.bu_response_event = None
+    webui_manager.bu_user_help_response = None
+    webui_manager.bu_agent_task_id = None
+    logger.info("Agent state and browser resources cleared.")
+    # Reset UI components
+    return {
+        webui_manager.get_component_by_id("browser_use_agent.chatbot"): gr.update(
+            value=[]
+        ),
+        webui_manager.get_component_by_id("browser_use_agent.user_input"): gr.update(
+            value="", placeholder="Enter your task here..."
+        ),
+        webui_manager.get_component_by_id(
+            "browser_use_agent.agent_history_file"
+        ): gr.update(value=None),
+        webui_manager.get_component_by_id("browser_use_agent.recording_gif"): gr.update(
+            value=None
+        ),
+        webui_manager.get_component_by_id("browser_use_agent.browser_view"): gr.update(
+            value="<div style='...'>Browser Cleared</div>"
+        ),
+        webui_manager.get_component_by_id("browser_use_agent.run_button"): gr.update(
+            value="▶️ Submit Task", interactive=True
+        ),
+        webui_manager.get_component_by_id("browser_use_agent.stop_button"): gr.update(
+            interactive=False
+        ),
+        webui_manager.get_component_by_id(
+            "browser_use_agent.pause_resume_button"
+        ): gr.update(value="⏸️ Pause", interactive=False),
+        webui_manager.get_component_by_id("browser_use_agent.clear_button"): gr.update(
+            interactive=True
+        ),
+    }
+# --- Tab Creation Function ---
+def create_browser_use_agent_tab(webui_manager: WebuiManager):
+    """
+    Create the run agent tab, defining UI, state, and handlers.
+    """
+    webui_manager.init_browser_use_agent()
+    # --- Define UI Components ---
+    tab_components = {}
+    with gr.Column():
+        chatbot = gr.Chatbot(
+            lambda: webui_manager.bu_chat_history,  # Load history dynamically
+            elem_id="browser_use_chatbot",
+            label="Agent Interaction",
+            type="messages",
+            height=600,
+            show_copy_button=True,
+        )
+        user_input = gr.Textbox(
+            label="Your Task or Response",
+            placeholder="Enter your task here or provide assistance when asked.",
+            lines=3,
+            interactive=True,
+            elem_id="user_input",
+        )
+        with gr.Row():
+            stop_button = gr.Button(
+                "⏹️ Stop", interactive=False, variant="stop", scale=2
+            )
+            pause_resume_button = gr.Button(
+                "⏸️ Pause", interactive=False, variant="secondary", scale=2, visible=True
+            )
+            clear_button = gr.Button(
+                "🗑️ Clear", interactive=True, variant="secondary", scale=2
+            )
+            run_button = gr.Button("▶️ Submit Task", variant="primary", scale=3)
+        browser_view = gr.HTML(
+            value="<div style='width:100%; height:50vh; display:flex; justify-content:center; align-items:center; border:1px solid #ccc; background-color:#f0f0f0;'><p>Browser View (Requires Headless=True)</p></div>",
+            label="Browser Live View",
+            elem_id="browser_view",
+            visible=False,
+        )
+        with gr.Column():
+            gr.Markdown("### Task Outputs")
+            agent_history_file = gr.File(label="Agent History JSON", interactive=False)
+            recording_gif = gr.Image(
+                label="Task Recording GIF",
+                format="gif",
+                interactive=False,
+                type="filepath",
+            )
+    # --- Store Components in Manager ---
+    tab_components.update(
+        dict(
+            chatbot=chatbot,
+            user_input=user_input,
+            clear_button=clear_button,
+            run_button=run_button,
+            stop_button=stop_button,
+            pause_resume_button=pause_resume_button,
+            agent_history_file=agent_history_file,
+            recording_gif=recording_gif,
+            browser_view=browser_view,
+        )
+    )
+    webui_manager.add_components(
+        "browser_use_agent", tab_components
+    )  # Use "browser_use_agent" as tab_name prefix
+    all_managed_components = set(
+        webui_manager.get_components()
+    )  # Get all components known to manager
+    run_tab_outputs = list(tab_components.values())
+    async def submit_wrapper(
+            components_dict: Dict[Component, Any],
+    ) -> AsyncGenerator[Dict[Component, Any], None]:
+        """Wrapper for handle_submit that yields its results."""
+        async for update in handle_submit(webui_manager, components_dict):
+            yield update
+    async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
+        """Wrapper for handle_stop."""
+        update_dict = await handle_stop(webui_manager)
+        yield update_dict
+    async def pause_resume_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
+        """Wrapper for handle_pause_resume."""
+        update_dict = await handle_pause_resume(webui_manager)
+        yield update_dict
+    async def clear_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
+        """Wrapper for handle_clear."""
+        update_dict = await handle_clear(webui_manager)
+        yield update_dict
+    # --- Connect Event Handlers using the Wrappers --
+    run_button.click(
+        fn=submit_wrapper, inputs=all_managed_components, outputs=run_tab_outputs
+    )
+    user_input.submit(
+        fn=submit_wrapper, inputs=all_managed_components, outputs=run_tab_outputs
+    )
+    stop_button.click(fn=stop_wrapper, inputs=None, outputs=run_tab_outputs)
+    pause_resume_button.click(
+        fn=pause_resume_wrapper, inputs=None, outputs=run_tab_outputs
+    )
+    clear_button.click(fn=clear_wrapper, inputs=None, outputs=run_tab_outputs)

src/webui/components/deep_research_agent_tab.py ADDED Viewed

	@@ -0,0 +1,457 @@

+import gradio as gr
+from gradio.components import Component
+from functools import partial
+from src.webui.webui_manager import WebuiManager
+from src.utils import config
+import logging
+import os
+from typing import Any, Dict, AsyncGenerator, Optional, Tuple, Union
+import asyncio
+import json
+from src.agent.deep_research.deep_research_agent import DeepResearchAgent
+from src.utils import llm_provider
+logger = logging.getLogger(__name__)
+async def _initialize_llm(provider: Optional[str], model_name: Optional[str], temperature: float,
+                          base_url: Optional[str], api_key: Optional[str], num_ctx: Optional[int] = None):
+    """Initializes the LLM based on settings. Returns None if provider/model is missing."""
+    if not provider or not model_name:
+        logger.info("LLM Provider or Model Name not specified, LLM will be None.")
+        return None
+    try:
+        logger.info(f"Initializing LLM: Provider={provider}, Model={model_name}, Temp={temperature}")
+        # Use your actual LLM provider logic here
+        llm = llm_provider.get_llm_model(
+            provider=provider,
+            model_name=model_name,
+            temperature=temperature,
+            base_url=base_url or None,
+            api_key=api_key or None,
+            num_ctx=num_ctx if provider == "ollama" else None
+        )
+        return llm
+    except Exception as e:
+        logger.error(f"Failed to initialize LLM: {e}", exc_info=True)
+        gr.Warning(
+            f"Failed to initialize LLM '{model_name}' for provider '{provider}'. Please check settings. Error: {e}")
+        return None
+def _read_file_safe(file_path: str) -> Optional[str]:
+    """Safely read a file, returning None if it doesn't exist or on error."""
+    if not os.path.exists(file_path):
+        return None
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return f.read()
+    except Exception as e:
+        logger.error(f"Error reading file {file_path}: {e}")
+        return None
+# --- Deep Research Agent Specific Logic ---
+async def run_deep_research(webui_manager: WebuiManager, components: Dict[Component, Any]) -> AsyncGenerator[
+    Dict[Component, Any], None]:
+    """Handles initializing and running the DeepResearchAgent."""
+    # --- Get Components ---
+    research_task_comp = webui_manager.get_component_by_id("deep_research_agent.research_task")
+    resume_task_id_comp = webui_manager.get_component_by_id("deep_research_agent.resume_task_id")
+    parallel_num_comp = webui_manager.get_component_by_id("deep_research_agent.parallel_num")
+    save_dir_comp = webui_manager.get_component_by_id(
+        "deep_research_agent.max_query")  # Note: component ID seems misnamed in original code
+    start_button_comp = webui_manager.get_component_by_id("deep_research_agent.start_button")
+    stop_button_comp = webui_manager.get_component_by_id("deep_research_agent.stop_button")
+    markdown_display_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_display")
+    markdown_download_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_download")
+    mcp_server_config_comp = webui_manager.get_component_by_id("deep_research_agent.mcp_server_config")
+    # --- 1. Get Task and Settings ---
+    task_topic = components.get(research_task_comp, "").strip()
+    task_id_to_resume = components.get(resume_task_id_comp, "").strip() or None
+    max_parallel_agents = int(components.get(parallel_num_comp, 1))
+    base_save_dir = components.get(save_dir_comp, "./tmp/deep_research").strip()
+    safe_root_dir = "./tmp/deep_research"
+    normalized_base_save_dir = os.path.abspath(os.path.normpath(base_save_dir))
+    if os.path.commonpath([normalized_base_save_dir, os.path.abspath(safe_root_dir)]) != os.path.abspath(safe_root_dir):
+        logger.warning(f"Unsafe base_save_dir detected: {base_save_dir}. Using default directory.")
+        normalized_base_save_dir = os.path.abspath(safe_root_dir)
+    base_save_dir = normalized_base_save_dir
+    mcp_server_config_str = components.get(mcp_server_config_comp)
+    mcp_config = json.loads(mcp_server_config_str) if mcp_server_config_str else None
+    if not task_topic:
+        gr.Warning("Please enter a research task.")
+        yield {start_button_comp: gr.update(interactive=True)}  # Re-enable start button
+        return
+    # Store base save dir for stop handler
+    webui_manager.dr_save_dir = base_save_dir
+    os.makedirs(base_save_dir, exist_ok=True)
+    # --- 2. Initial UI Update ---
+    yield {
+        start_button_comp: gr.update(value="⏳ Running...", interactive=False),
+        stop_button_comp: gr.update(interactive=True),
+        research_task_comp: gr.update(interactive=False),
+        resume_task_id_comp: gr.update(interactive=False),
+        parallel_num_comp: gr.update(interactive=False),
+        save_dir_comp: gr.update(interactive=False),
+        markdown_display_comp: gr.update(value="Starting research..."),
+        markdown_download_comp: gr.update(value=None, interactive=False)
+    }
+    agent_task = None
+    running_task_id = None
+    plan_file_path = None
+    report_file_path = None
+    last_plan_content = None
+    last_plan_mtime = 0
+    try:
+        # --- 3. Get LLM and Browser Config from other tabs ---
+        # Access settings values via components dict, getting IDs from webui_manager
+        def get_setting(tab: str, key: str, default: Any = None):
+            comp = webui_manager.id_to_component.get(f"{tab}.{key}")
+            return components.get(comp, default) if comp else default
+        # LLM Config (from agent_settings tab)
+        llm_provider_name = get_setting("agent_settings", "llm_provider")
+        llm_model_name = get_setting("agent_settings", "llm_model_name")
+        llm_temperature = max(get_setting("agent_settings", "llm_temperature", 0.5), 0.5)
+        llm_base_url = get_setting("agent_settings", "llm_base_url")
+        llm_api_key = get_setting("agent_settings", "llm_api_key")
+        ollama_num_ctx = get_setting("agent_settings", "ollama_num_ctx")
+        llm = await _initialize_llm(
+            llm_provider_name, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
+            ollama_num_ctx if llm_provider_name == "ollama" else None
+        )
+        if not llm:
+            raise ValueError("LLM Initialization failed. Please check Agent Settings.")
+        # Browser Config (from browser_settings tab)
+        # Note: DeepResearchAgent constructor takes a dict, not full Browser/Context objects
+        browser_config_dict = {
+            "headless": get_setting("browser_settings", "headless", False),
+            "disable_security": get_setting("browser_settings", "disable_security", False),
+            "browser_binary_path": get_setting("browser_settings", "browser_binary_path"),
+            "user_data_dir": get_setting("browser_settings", "browser_user_data_dir"),
+            "window_width": int(get_setting("browser_settings", "window_w", 1280)),
+            "window_height": int(get_setting("browser_settings", "window_h", 1100)),
+            # Add other relevant fields if DeepResearchAgent accepts them
+        }
+        # --- 4. Initialize or Get Agent ---
+        if not webui_manager.dr_agent:
+            webui_manager.dr_agent = DeepResearchAgent(
+                llm=llm,
+                browser_config=browser_config_dict,
+                mcp_server_config=mcp_config
+            )
+            logger.info("DeepResearchAgent initialized.")
+        # --- 5. Start Agent Run ---
+        agent_run_coro = webui_manager.dr_agent.run(
+            topic=task_topic,
+            task_id=task_id_to_resume,
+            save_dir=base_save_dir,
+            max_parallel_browsers=max_parallel_agents
+        )
+        agent_task = asyncio.create_task(agent_run_coro)
+        webui_manager.dr_current_task = agent_task
+        # Wait briefly for the agent to start and potentially create the task ID/folder
+        await asyncio.sleep(1.0)
+        # Determine the actual task ID being used (agent sets this)
+        running_task_id = webui_manager.dr_agent.current_task_id
+        if not running_task_id:
+            # Agent might not have set it yet, try to get from result later? Risky.
+            # Or derive from resume_task_id if provided?
+            running_task_id = task_id_to_resume
+            if not running_task_id:
+                logger.warning("Could not determine running task ID immediately.")
+                # We can still monitor, but might miss initial plan if ID needed for path
+            else:
+                logger.info(f"Assuming task ID based on resume ID: {running_task_id}")
+        else:
+            logger.info(f"Agent started with Task ID: {running_task_id}")
+        webui_manager.dr_task_id = running_task_id  # Store for stop handler
+        # --- 6. Monitor Progress via research_plan.md ---
+        if running_task_id:
+            task_specific_dir = os.path.join(base_save_dir, str(running_task_id))
+            plan_file_path = os.path.join(task_specific_dir, "research_plan.md")
+            report_file_path = os.path.join(task_specific_dir, "report.md")
+            logger.info(f"Monitoring plan file: {plan_file_path}")
+        else:
+            logger.warning("Cannot monitor plan file: Task ID unknown.")
+            plan_file_path = None
+        last_plan_content = None
+        while not agent_task.done():
+            update_dict = {}
+            update_dict[resume_task_id_comp] = gr.update(value=running_task_id)
+            agent_stopped = getattr(webui_manager.dr_agent, 'stopped', False)
+            if agent_stopped:
+                logger.info("Stop signal detected from agent state.")
+                break  # Exit monitoring loop
+            # Check and update research plan display
+            if plan_file_path:
+                try:
+                    current_mtime = os.path.getmtime(plan_file_path) if os.path.exists(plan_file_path) else 0
+                    if current_mtime > last_plan_mtime:
+                        logger.info(f"Detected change in {plan_file_path}")
+                        plan_content = _read_file_safe(plan_file_path)
+                        if last_plan_content is None or (
+                                plan_content is not None and plan_content != last_plan_content):
+                            update_dict[markdown_display_comp] = gr.update(value=plan_content)
+                            last_plan_content = plan_content
+                            last_plan_mtime = current_mtime
+                        elif plan_content is None:
+                            # File might have been deleted or became unreadable
+                            last_plan_mtime = 0  # Reset to force re-read attempt later
+                except Exception as e:
+                    logger.warning(f"Error checking/reading plan file {plan_file_path}: {e}")
+                    # Avoid continuous logging for the same error
+                    await asyncio.sleep(2.0)
+            # Yield updates if any
+            if update_dict:
+                yield update_dict
+            await asyncio.sleep(1.0)  # Check file changes every second
+        # --- 7. Task Finalization ---
+        logger.info("Agent task processing finished. Awaiting final result...")
+        final_result_dict = await agent_task  # Get result or raise exception
+        logger.info(f"Agent run completed. Result keys: {final_result_dict.keys() if final_result_dict else 'None'}")
+        # Try to get task ID from result if not known before
+        if not running_task_id and final_result_dict and 'task_id' in final_result_dict:
+            running_task_id = final_result_dict['task_id']
+            webui_manager.dr_task_id = running_task_id
+            task_specific_dir = os.path.join(base_save_dir, str(running_task_id))
+            report_file_path = os.path.join(task_specific_dir, "report.md")
+            logger.info(f"Task ID confirmed from result: {running_task_id}")
+        final_ui_update = {}
+        if report_file_path and os.path.exists(report_file_path):
+            logger.info(f"Loading final report from: {report_file_path}")
+            report_content = _read_file_safe(report_file_path)
+            if report_content:
+                final_ui_update[markdown_display_comp] = gr.update(value=report_content)
+                final_ui_update[markdown_download_comp] = gr.File(value=report_file_path,
+                                                                  label=f"Report ({running_task_id}.md)",
+                                                                  interactive=True)
+            else:
+                final_ui_update[markdown_display_comp] = gr.update(
+                    value="# Research Complete\n\n*Error reading final report file.*")
+        elif final_result_dict and 'report' in final_result_dict:
+            logger.info("Using report content directly from agent result.")
+            # If agent directly returns report content
+            final_ui_update[markdown_display_comp] = gr.update(value=final_result_dict['report'])
+            # Cannot offer download if only content is available
+            final_ui_update[markdown_download_comp] = gr.update(value=None, label="Download Research Report",
+                                                                interactive=False)
+        else:
+            logger.warning("Final report file not found and not in result dict.")
+            final_ui_update[markdown_display_comp] = gr.update(value="# Research Complete\n\n*Final report not found.*")
+        yield final_ui_update
+    except Exception as e:
+        logger.error(f"Error during Deep Research Agent execution: {e}", exc_info=True)
+        gr.Error(f"Research failed: {e}")
+        yield {markdown_display_comp: gr.update(value=f"# Research Failed\n\n**Error:**\n```\n{e}\n```")}
+    finally:
+        # --- 8. Final UI Reset ---
+        webui_manager.dr_current_task = None  # Clear task reference
+        webui_manager.dr_task_id = None  # Clear running task ID
+        yield {
+            start_button_comp: gr.update(value="▶️ Run", interactive=True),
+            stop_button_comp: gr.update(interactive=False),
+            research_task_comp: gr.update(interactive=True),
+            resume_task_id_comp: gr.update(value="", interactive=True),
+            parallel_num_comp: gr.update(interactive=True),
+            save_dir_comp: gr.update(interactive=True),
+            # Keep download button enabled if file exists
+            markdown_download_comp: gr.update() if report_file_path and os.path.exists(report_file_path) else gr.update(
+                interactive=False)
+        }
+async def stop_deep_research(webui_manager: WebuiManager) -> Dict[Component, Any]:
+    """Handles the Stop button click."""
+    logger.info("Stop button clicked for Deep Research.")
+    agent = webui_manager.dr_agent
+    task = webui_manager.dr_current_task
+    task_id = webui_manager.dr_task_id
+    base_save_dir = webui_manager.dr_save_dir
+    stop_button_comp = webui_manager.get_component_by_id("deep_research_agent.stop_button")
+    start_button_comp = webui_manager.get_component_by_id("deep_research_agent.start_button")
+    markdown_display_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_display")
+    markdown_download_comp = webui_manager.get_component_by_id("deep_research_agent.markdown_download")
+    final_update = {
+        stop_button_comp: gr.update(interactive=False, value="⏹️ Stopping...")
+    }
+    if agent and task and not task.done():
+        logger.info("Signalling DeepResearchAgent to stop.")
+        try:
+            # Assuming stop is synchronous or sets a flag quickly
+            await agent.stop()
+        except Exception as e:
+            logger.error(f"Error calling agent.stop(): {e}")
+        # The run_deep_research loop should detect the stop and exit.
+        # We yield an intermediate "Stopping..." state. The final reset is done by run_deep_research.
+        # Try to show the final report if available after stopping
+        await asyncio.sleep(1.5)  # Give agent a moment to write final files potentially
+        report_file_path = None
+        if task_id and base_save_dir:
+            report_file_path = os.path.join(base_save_dir, str(task_id), "report.md")
+        if report_file_path and os.path.exists(report_file_path):
+            report_content = _read_file_safe(report_file_path)
+            if report_content:
+                final_update[markdown_display_comp] = gr.update(
+                    value=report_content + "\n\n---\n*Research stopped by user.*")
+                final_update[markdown_download_comp] = gr.File(value=report_file_path, label=f"Report ({task_id}.md)",
+                                                               interactive=True)
+            else:
+                final_update[markdown_display_comp] = gr.update(
+                    value="# Research Stopped\n\n*Error reading final report file after stop.*")
+        else:
+            final_update[markdown_display_comp] = gr.update(value="# Research Stopped by User")
+        # Keep start button disabled, run_deep_research finally block will re-enable it.
+        final_update[start_button_comp] = gr.update(interactive=False)
+    else:
+        logger.warning("Stop clicked but no active research task found.")
+        # Reset UI state just in case
+        final_update = {
+            start_button_comp: gr.update(interactive=True),
+            stop_button_comp: gr.update(interactive=False),
+            webui_manager.get_component_by_id("deep_research_agent.research_task"): gr.update(interactive=True),
+            webui_manager.get_component_by_id("deep_research_agent.resume_task_id"): gr.update(interactive=True),
+            webui_manager.get_component_by_id("deep_research_agent.max_iteration"): gr.update(interactive=True),
+            webui_manager.get_component_by_id("deep_research_agent.max_query"): gr.update(interactive=True),
+        }
+    return final_update
+async def update_mcp_server(mcp_file: str, webui_manager: WebuiManager):
+    """
+    Update the MCP server.
+    """
+    if hasattr(webui_manager, "dr_agent") and webui_manager.dr_agent:
+        logger.warning("⚠️ Close controller because mcp file has changed!")
+        await webui_manager.dr_agent.close_mcp_client()
+    if not mcp_file or not os.path.exists(mcp_file) or not mcp_file.endswith('.json'):
+        logger.warning(f"{mcp_file} is not a valid MCP file.")
+        return None, gr.update(visible=False)
+    with open(mcp_file, 'r') as f:
+        mcp_server = json.load(f)
+    return json.dumps(mcp_server, indent=2), gr.update(visible=True)
+def create_deep_research_agent_tab(webui_manager: WebuiManager):
+    """
+    Creates a deep research agent tab
+    """
+    input_components = set(webui_manager.get_components())
+    tab_components = {}
+    with gr.Group():
+        with gr.Row():
+            mcp_json_file = gr.File(label="MCP server json", interactive=True, file_types=[".json"])
+            mcp_server_config = gr.Textbox(label="MCP server", lines=6, interactive=True, visible=False)
+    with gr.Group():
+        research_task = gr.Textbox(label="Research Task", lines=5,
+                                   value="Give me a detailed travel plan to Switzerland from June 1st to 10th.",
+                                   interactive=True)
+        with gr.Row():
+            resume_task_id = gr.Textbox(label="Resume Task ID", value="",
+                                        interactive=True)
+            parallel_num = gr.Number(label="Parallel Agent Num", value=1,
+                                     precision=0,
+                                     interactive=True)
+            max_query = gr.Textbox(label="Research Save Dir", value="./tmp/deep_research",
+                                   interactive=True)
+    with gr.Row():
+        stop_button = gr.Button("⏹️ Stop", variant="stop", scale=2)
+        start_button = gr.Button("▶️ Run", variant="primary", scale=3)
+    with gr.Group():
+        markdown_display = gr.Markdown(label="Research Report")
+        markdown_download = gr.File(label="Download Research Report", interactive=False)
+    tab_components.update(
+        dict(
+            research_task=research_task,
+            parallel_num=parallel_num,
+            max_query=max_query,
+            start_button=start_button,
+            stop_button=stop_button,
+            markdown_display=markdown_display,
+            markdown_download=markdown_download,
+            resume_task_id=resume_task_id,
+            mcp_json_file=mcp_json_file,
+            mcp_server_config=mcp_server_config,
+        )
+    )
+    webui_manager.add_components("deep_research_agent", tab_components)
+    webui_manager.init_deep_research_agent()
+    async def update_wrapper(mcp_file):
+        """Wrapper for handle_pause_resume."""
+        update_dict = await update_mcp_server(mcp_file, webui_manager)
+        yield update_dict
+    mcp_json_file.change(
+        update_wrapper,
+        inputs=[mcp_json_file],
+        outputs=[mcp_server_config, mcp_server_config]
+    )
+    dr_tab_outputs = list(tab_components.values())
+    all_managed_inputs = set(webui_manager.get_components())
+    # --- Define Event Handler Wrappers ---
+    async def start_wrapper(comps: Dict[Component, Any]) -> AsyncGenerator[Dict[Component, Any], None]:
+        async for update in run_deep_research(webui_manager, comps):
+            yield update
+    async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
+        update_dict = await stop_deep_research(webui_manager)
+        yield update_dict
+    # --- Connect Handlers ---
+    start_button.click(
+        fn=start_wrapper,
+        inputs=all_managed_inputs,
+        outputs=dr_tab_outputs
+    )
+    stop_button.click(
+        fn=stop_wrapper,
+        inputs=None,
+        outputs=dr_tab_outputs
+    )

src/webui/components/load_save_config_tab.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import gradio as gr
+from gradio.components import Component
+from src.webui.webui_manager import WebuiManager
+from src.utils import config
+def create_load_save_config_tab(webui_manager: WebuiManager):
+    """
+    Creates a load and save config tab.
+    """
+    input_components = set(webui_manager.get_components())
+    tab_components = {}
+    config_file = gr.File(
+        label="Load UI Settings from json",
+        file_types=[".json"],
+        interactive=True
+    )
+    with gr.Row():
+        load_config_button = gr.Button("Load Config", variant="primary")
+        save_config_button = gr.Button("Save UI Settings", variant="primary")
+    config_status = gr.Textbox(
+        label="Status",
+        lines=2,
+        interactive=False
+    )
+    tab_components.update(dict(
+        load_config_button=load_config_button,
+        save_config_button=save_config_button,
+        config_status=config_status,
+        config_file=config_file,
+    ))
+    webui_manager.add_components("load_save_config", tab_components)
+    save_config_button.click(
+        fn=webui_manager.save_config,
+        inputs=set(webui_manager.get_components()),
+        outputs=[config_status]
+    )
+    load_config_button.click(
+        fn=webui_manager.load_config,
+        inputs=[config_file],
+        outputs=webui_manager.get_components(),
+    )

src/webui/interface.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import gradio as gr
+from src.webui.webui_manager import WebuiManager
+from src.webui.components.agent_settings_tab import create_agent_settings_tab
+from src.webui.components.browser_settings_tab import create_browser_settings_tab
+from src.webui.components.browser_use_agent_tab import create_browser_use_agent_tab
+from src.webui.components.deep_research_agent_tab import create_deep_research_agent_tab
+from src.webui.components.load_save_config_tab import create_load_save_config_tab
+theme_map = {
+    "Default": gr.themes.Default(),
+    "Soft": gr.themes.Soft(),
+    "Monochrome": gr.themes.Monochrome(),
+    "Glass": gr.themes.Glass(),
+    "Origin": gr.themes.Origin(),
+    "Citrus": gr.themes.Citrus(),
+    "Ocean": gr.themes.Ocean(),
+    "Base": gr.themes.Base()
+}
+def create_ui(theme_name="Ocean"):
+    css = """
+    .gradio-container {
+        width: 70vw !important;
+        max-width: 70% !important;
+        margin-left: auto !important;
+        margin-right: auto !important;
+        padding-top: 10px !important;
+    }
+    .header-text {
+        text-align: center;
+        margin-bottom: 20px;
+    }
+    .tab-header-text {
+        text-align: center;
+    }
+    .theme-section {
+        margin-bottom: 10px;
+        padding: 15px;
+        border-radius: 10px;
+    }
+    """
+    # dark mode in default
+    js_func = """
+    function refresh() {
+        const url = new URL(window.location);
+        if (url.searchParams.get('__theme') !== 'dark') {
+            url.searchParams.set('__theme', 'dark');
+            window.location.href = url.href;
+        }
+    }
+    """
+    ui_manager = WebuiManager()
+    with gr.Blocks(
+            title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js_func,
+    ) as demo:
+        with gr.Row():
+            gr.Markdown(
+                """
+                # 🌐 Browser Use WebUI
+                ### Control your browser with AI assistance
+                """,
+                elem_classes=["header-text"],
+            )
+        with gr.Tabs() as tabs:
+            with gr.TabItem("⚙️ Agent Settings"):
+                create_agent_settings_tab(ui_manager)
+            with gr.TabItem("🌐 Browser Settings"):
+                create_browser_settings_tab(ui_manager)
+            with gr.TabItem("🤖 Run Agent"):
+                create_browser_use_agent_tab(ui_manager)
+            with gr.TabItem("🎁 Agent Marketplace"):
+                gr.Markdown(
+                    """
+                    ### Agents built on Browser-Use
+                    """,
+                    elem_classes=["tab-header-text"],
+                )
+                with gr.Tabs():
+                    with gr.TabItem("Deep Research"):
+                        create_deep_research_agent_tab(ui_manager)
+            with gr.TabItem("📁 Load & Save Config"):
+                create_load_save_config_tab(ui_manager)
+    return demo

src/webui/webui_manager.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import json
+from collections.abc import Generator
+from typing import TYPE_CHECKING
+import os
+import gradio as gr
+from datetime import datetime
+from typing import Optional, Dict, List
+import uuid
+import asyncio
+import time
+from gradio.components import Component
+from browser_use.browser.browser import Browser
+from browser_use.browser.context import BrowserContext
+from browser_use.agent.service import Agent
+from src.browser.custom_browser import CustomBrowser
+from src.browser.custom_context import CustomBrowserContext
+from src.controller.custom_controller import CustomController
+from src.agent.deep_research.deep_research_agent import DeepResearchAgent
+class WebuiManager:
+    def __init__(self, settings_save_dir: str = "./tmp/webui_settings"):
+        self.id_to_component: dict[str, Component] = {}
+        self.component_to_id: dict[Component, str] = {}
+        self.settings_save_dir = settings_save_dir
+        os.makedirs(self.settings_save_dir, exist_ok=True)
+    def init_browser_use_agent(self) -> None:
+        """
+        init browser use agent
+        """
+        self.bu_agent: Optional[Agent] = None
+        self.bu_browser: Optional[CustomBrowser] = None
+        self.bu_browser_context: Optional[CustomBrowserContext] = None
+        self.bu_controller: Optional[CustomController] = None
+        self.bu_chat_history: List[Dict[str, Optional[str]]] = []
+        self.bu_response_event: Optional[asyncio.Event] = None
+        self.bu_user_help_response: Optional[str] = None
+        self.bu_current_task: Optional[asyncio.Task] = None
+        self.bu_agent_task_id: Optional[str] = None
+    def init_deep_research_agent(self) -> None:
+        """
+        init deep research agent
+        """
+        self.dr_agent: Optional[DeepResearchAgent] = None
+        self.dr_current_task = None
+        self.dr_agent_task_id: Optional[str] = None
+        self.dr_save_dir: Optional[str] = None
+    def add_components(self, tab_name: str, components_dict: dict[str, "Component"]) -> None:
+        """
+        Add tab components
+        """
+        for comp_name, component in components_dict.items():
+            comp_id = f"{tab_name}.{comp_name}"
+            self.id_to_component[comp_id] = component
+            self.component_to_id[component] = comp_id
+    def get_components(self) -> list["Component"]:
+        """
+        Get all components
+        """
+        return list(self.id_to_component.values())
+    def get_component_by_id(self, comp_id: str) -> "Component":
+        """
+        Get component by id
+        """
+        return self.id_to_component[comp_id]
+    def get_id_by_component(self, comp: "Component") -> str:
+        """
+        Get id by component
+        """
+        return self.component_to_id[comp]
+    def save_config(self, components: Dict["Component", str]) -> None:
+        """
+        Save config
+        """
+        cur_settings = {}
+        for comp in components:
+            if not isinstance(comp, gr.Button) and not isinstance(comp, gr.File) and str(
+                    getattr(comp, "interactive", True)).lower() != "false":
+                comp_id = self.get_id_by_component(comp)
+                cur_settings[comp_id] = components[comp]
+        config_name = datetime.now().strftime("%Y%m%d-%H%M%S")
+        with open(os.path.join(self.settings_save_dir, f"{config_name}.json"), "w") as fw:
+            json.dump(cur_settings, fw, indent=4)
+        return os.path.join(self.settings_save_dir, f"{config_name}.json")
+    def load_config(self, config_path: str):
+        """
+        Load config
+        """
+        with open(config_path, "r") as fr:
+            ui_settings = json.load(fr)
+        update_components = {}
+        for comp_id, comp_val in ui_settings.items():
+            if comp_id in self.id_to_component:
+                comp = self.id_to_component[comp_id]
+                if comp.__class__.__name__ == "Chatbot":
+                    update_components[comp] = comp.__class__(value=comp_val, type="messages")
+                else:
+                    update_components[comp] = comp.__class__(value=comp_val)
+                    if comp_id == "agent_settings.planner_llm_provider":
+                        yield update_components  # yield provider, let callback run
+                        time.sleep(0.1)  # wait for Gradio UI callback
+        config_status = self.id_to_component["load_save_config.config_status"]
+        update_components.update(
+            {
+                config_status: config_status.__class__(value=f"Successfully loaded config: {config_path}")
+            }
+        )
+        yield update_components

supervisord.conf ADDED Viewed

	@@ -0,0 +1,80 @@

+[supervisord]
+user=root
+nodaemon=true
+logfile=/dev/stdout
+logfile_maxbytes=0
+loglevel=error
+[program:xvfb]
+command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=100
+startsecs=3
+stopsignal=TERM
+stopwaitsecs=10
+[program:vnc_setup]
+command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
+autorestart=false
+startsecs=0
+priority=150
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+[program:x11vnc]
+command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5901 -o /var/log/x11vnc.log"
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=200
+startretries=10
+startsecs=10
+stopsignal=TERM
+stopwaitsecs=10
+depends_on=vnc_setup,xvfb
+[program:x11vnc_log]
+command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=250
+stopsignal=TERM
+stopwaitsecs=5
+depends_on=x11vnc
+[program:novnc]
+command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5901 --listen 0.0.0.0:6080 --web /opt/novnc"
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=300
+startretries=5
+startsecs=3
+depends_on=x11vnc
+[program:webui]
+command=python webui.py --ip 0.0.0.0 --port 7788
+directory=/app
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=400
+startretries=3
+startsecs=3
+stopsignal=TERM
+stopwaitsecs=10

tests/test_agents.py ADDED Viewed

	@@ -0,0 +1,400 @@

+import pdb
+from dotenv import load_dotenv
+load_dotenv()
+import sys
+sys.path.append(".")
+import asyncio
+import os
+import sys
+from pprint import pprint
+from browser_use import Agent
+from browser_use.agent.views import AgentHistoryList
+from src.utils import utils
+async def test_browser_use_agent():
+    from browser_use.browser.browser import Browser, BrowserConfig
+    from browser_use.browser.context import (
+        BrowserContextConfig
+    )
+    from browser_use.agent.service import Agent
+    from src.browser.custom_browser import CustomBrowser
+    from src.controller.custom_controller import CustomController
+    from src.utils import llm_provider
+    from src.agent.browser_use.browser_use_agent import BrowserUseAgent
+    llm = llm_provider.get_llm_model(
+        provider="openai",
+        model_name="gpt-4o",
+        temperature=0.8,
+    )
+    # llm = llm_provider.get_llm_model(
+    #     provider="google",
+    #     model_name="gemini-2.0-flash",
+    #     temperature=0.6,
+    #     api_key=os.getenv("GOOGLE_API_KEY", "")
+    # )
+    # llm = utils.get_llm_model(
+    #     provider="deepseek",
+    #     model_name="deepseek-reasoner",
+    #     temperature=0.8
+    # )
+    # llm = utils.get_llm_model(
+    #     provider="deepseek",
+    #     model_name="deepseek-chat",
+    #     temperature=0.8
+    # )
+    # llm = utils.get_llm_model(
+    #     provider="ollama", model_name="qwen2.5:7b", temperature=0.5
+    # )
+    # llm = utils.get_llm_model(
+    #     provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
+    # )
+    window_w, window_h = 1280, 1100
+    # llm = llm_provider.get_llm_model(
+    #     provider="azure_openai",
+    #     model_name="gpt-4o",
+    #     temperature=0.5,
+    #     base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
+    #     api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
+    # )
+    mcp_server_config = {
+        "mcpServers": {
+            # "markitdown": {
+            #     "command": "docker",
+            #     "args": [
+            #         "run",
+            #         "--rm",
+            #         "-i",
+            #         "markitdown-mcp:latest"
+            #     ]
+            # },
+            "desktop-commander": {
+                "command": "npx",
+                "args": [
+                    "-y",
+                    "@wonderwhy-er/desktop-commander"
+                ]
+            },
+        }
+    }
+    controller = CustomController()
+    await controller.setup_mcp_client(mcp_server_config)
+    use_own_browser = True
+    use_vision = True  # Set to False when using DeepSeek
+    max_actions_per_step = 10
+    browser = None
+    browser_context = None
+    try:
+        extra_browser_args = []
+        if use_own_browser:
+            browser_binary_path = os.getenv("BROWSER_PATH", None)
+            if browser_binary_path == "":
+                browser_binary_path = None
+            browser_user_data = os.getenv("BROWSER_USER_DATA", None)
+            if browser_user_data:
+                extra_browser_args += [f"--user-data-dir={browser_user_data}"]
+        else:
+            browser_binary_path = None
+        browser = CustomBrowser(
+            config=BrowserConfig(
+                headless=False,
+                browser_binary_path=browser_binary_path,
+                extra_browser_args=extra_browser_args,
+                new_context_config=BrowserContextConfig(
+                    window_width=window_w,
+                    window_height=window_h,
+                )
+            )
+        )
+        browser_context = await browser.new_context(
+            config=BrowserContextConfig(
+                trace_path=None,
+                save_recording_path=None,
+                save_downloads_path="./tmp/downloads",
+                window_height=window_h,
+                window_width=window_w,
+            )
+        )
+        agent = BrowserUseAgent(
+            # task="download pdf from https://arxiv.org/pdf/2311.16498 and rename this pdf to 'mcp-test.pdf'",
+            task="give me nvidia stock price",
+            llm=llm,
+            browser=browser,
+            browser_context=browser_context,
+            controller=controller,
+            use_vision=use_vision,
+            max_actions_per_step=max_actions_per_step,
+            generate_gif=True
+        )
+        history: AgentHistoryList = await agent.run(max_steps=100)
+        print("Final Result:")
+        pprint(history.final_result(), indent=4)
+        print("\nErrors:")
+        pprint(history.errors(), indent=4)
+    except Exception:
+        import traceback
+        traceback.print_exc()
+    finally:
+        if browser_context:
+            await browser_context.close()
+        if browser:
+            await browser.close()
+        if controller:
+            await controller.close_mcp_client()
+async def test_browser_use_parallel():
+    from browser_use.browser.browser import Browser, BrowserConfig
+    from browser_use.browser.context import (
+        BrowserContextConfig,
+    )
+    from browser_use.agent.service import Agent
+    from src.browser.custom_browser import CustomBrowser
+    from src.controller.custom_controller import CustomController
+    from src.utils import llm_provider
+    from src.agent.browser_use.browser_use_agent import BrowserUseAgent
+    # llm = utils.get_llm_model(
+    #     provider="openai",
+    #     model_name="gpt-4o",
+    #     temperature=0.8,
+    #     base_url=os.getenv("OPENAI_ENDPOINT", ""),
+    #     api_key=os.getenv("OPENAI_API_KEY", ""),
+    # )
+    # llm = utils.get_llm_model(
+    #     provider="google",
+    #     model_name="gemini-2.0-flash",
+    #     temperature=0.6,
+    #     api_key=os.getenv("GOOGLE_API_KEY", "")
+    # )
+    # llm = utils.get_llm_model(
+    #     provider="deepseek",
+    #     model_name="deepseek-reasoner",
+    #     temperature=0.8
+    # )
+    # llm = utils.get_llm_model(
+    #     provider="deepseek",
+    #     model_name="deepseek-chat",
+    #     temperature=0.8
+    # )
+    # llm = utils.get_llm_model(
+    #     provider="ollama", model_name="qwen2.5:7b", temperature=0.5
+    # )
+    # llm = utils.get_llm_model(
+    #     provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
+    # )
+    window_w, window_h = 1280, 1100
+    llm = llm_provider.get_llm_model(
+        provider="azure_openai",
+        model_name="gpt-4o",
+        temperature=0.5,
+        base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
+        api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
+    )
+    mcp_server_config = {
+        "mcpServers": {
+            # "markitdown": {
+            #     "command": "docker",
+            #     "args": [
+            #         "run",
+            #         "--rm",
+            #         "-i",
+            #         "markitdown-mcp:latest"
+            #     ]
+            # },
+            "desktop-commander": {
+                "command": "npx",
+                "args": [
+                    "-y",
+                    "@wonderwhy-er/desktop-commander"
+                ]
+            },
+            # "filesystem": {
+            #     "command": "npx",
+            #     "args": [
+            #         "-y",
+            #         "@modelcontextprotocol/server-filesystem",
+            #         "/Users/xxx/ai_workspace",
+            #     ]
+            # },
+        }
+    }
+    controller = CustomController()
+    await controller.setup_mcp_client(mcp_server_config)
+    use_own_browser = True
+    use_vision = True  # Set to False when using DeepSeek
+    max_actions_per_step = 10
+    browser = None
+    browser_context = None
+    try:
+        extra_browser_args = []
+        if use_own_browser:
+            browser_binary_path = os.getenv("BROWSER_PATH", None)
+            if browser_binary_path == "":
+                browser_binary_path = None
+            browser_user_data = os.getenv("BROWSER_USER_DATA", None)
+            if browser_user_data:
+                extra_browser_args += [f"--user-data-dir={browser_user_data}"]
+        else:
+            browser_binary_path = None
+        browser = CustomBrowser(
+            config=BrowserConfig(
+                headless=False,
+                browser_binary_path=browser_binary_path,
+                extra_browser_args=extra_browser_args,
+                new_context_config=BrowserContextConfig(
+                    window_width=window_w,
+                    window_height=window_h,
+                )
+            )
+        )
+        browser_context = await browser.new_context(
+            config=BrowserContextConfig(
+                trace_path=None,
+                save_recording_path=None,
+                save_downloads_path="./tmp/downloads",
+                window_height=window_h,
+                window_width=window_w,
+                force_new_context=True
+            )
+        )
+        agents = [
+            BrowserUseAgent(task=task, llm=llm, browser=browser, controller=controller)
+            for task in [
+                'Search Google for weather in Tokyo',
+                # 'Check Reddit front page title',
+                # 'Find NASA image of the day',
+                # 'Check top story on CNN',
+                # 'Search latest SpaceX launch date',
+                # 'Look up population of Paris',
+                'Find current time in Sydney',
+                'Check who won last Super Bowl',
+                # 'Search trending topics on Twitter',
+            ]
+        ]
+        history = await asyncio.gather(*[agent.run() for agent in agents])
+        print("Final Result:")
+        pprint(history.final_result(), indent=4)
+        print("\nErrors:")
+        pprint(history.errors(), indent=4)
+        pdb.set_trace()
+    except Exception:
+        import traceback
+        traceback.print_exc()
+    finally:
+        if browser_context:
+            await browser_context.close()
+        if browser:
+            await browser.close()
+        if controller:
+            await controller.close_mcp_client()
+async def test_deep_research_agent():
+    from src.agent.deep_research.deep_research_agent import DeepResearchAgent, PLAN_FILENAME, REPORT_FILENAME
+    from src.utils import llm_provider
+    llm = llm_provider.get_llm_model(
+        provider="openai",
+        model_name="gpt-4o",
+        temperature=0.5
+    )
+    # llm = llm_provider.get_llm_model(
+    #     provider="bedrock",
+    # )
+    mcp_server_config = {
+        "mcpServers": {
+            "desktop-commander": {
+                "command": "npx",
+                "args": [
+                    "-y",
+                    "@wonderwhy-er/desktop-commander"
+                ]
+            },
+        }
+    }
+    browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
+    agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
+    research_topic = "Give me investment advices of nvidia and tesla."
+    task_id_to_resume = ""  # Set this to resume a previous task ID
+    print(f"Starting research on: {research_topic}")
+    try:
+        # Call run and wait for the final result dictionary
+        result = await agent.run(research_topic,
+                                 task_id=task_id_to_resume,
+                                 save_dir="./tmp/deep_research",
+                                 max_parallel_browsers=1,
+                                 )
+        print("\n--- Research Process Ended ---")
+        print(f"Status: {result.get('status')}")
+        print(f"Message: {result.get('message')}")
+        print(f"Task ID: {result.get('task_id')}")
+        # Check the final state for the report
+        final_state = result.get('final_state', {})
+        if final_state:
+            print("\n--- Final State Summary ---")
+            print(
+                f"  Plan Steps Completed: {sum(1 for item in final_state.get('research_plan', []) if item.get('status') == 'completed')}")
+            print(f"  Total Search Results Logged: {len(final_state.get('search_results', []))}")
+            if final_state.get("final_report"):
+                print("  Final Report: Generated (content omitted). You can find it in the output directory.")
+                # print("\n--- Final Report ---") # Optionally print report
+                # print(final_state["final_report"])
+            else:
+                print("  Final Report: Not generated.")
+        else:
+            print("Final state information not available.")
+    except Exception as e:
+        print(f"\n--- An unhandled error occurred outside the agent run ---")
+        print(e)
+if __name__ == "__main__":
+    asyncio.run(test_browser_use_agent())
+    # asyncio.run(test_browser_use_parallel())
+    # asyncio.run(test_deep_research_agent())

tests/test_controller.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import asyncio
+import pdb
+import sys
+import time
+sys.path.append(".")
+from dotenv import load_dotenv
+load_dotenv()
+async def test_mcp_client():
+    from src.utils.mcp_client import setup_mcp_client_and_tools, create_tool_param_model
+    test_server_config = {
+        "mcpServers": {
+            # "markitdown": {
+            #     "command": "docker",
+            #     "args": [
+            #         "run",
+            #         "--rm",
+            #         "-i",
+            #         "markitdown-mcp:latest"
+            #     ]
+            # },
+            "desktop-commander": {
+                "command": "npx",
+                "args": [
+                    "-y",
+                    "@wonderwhy-er/desktop-commander"
+                ]
+            },
+            # "filesystem": {
+            #     "command": "npx",
+            #     "args": [
+            #         "-y",
+            #         "@modelcontextprotocol/server-filesystem",
+            #         "/Users/xxx/ai_workspace",
+            #     ]
+            # },
+        }
+    }
+    mcp_tools, mcp_client = await setup_mcp_client_and_tools(test_server_config)
+    for tool in mcp_tools:
+        tool_param_model = create_tool_param_model(tool)
+        print(tool.name)
+        print(tool.description)
+        print(tool_param_model.model_json_schema())
+    pdb.set_trace()
+async def test_controller_with_mcp():
+    import os
+    from src.controller.custom_controller import CustomController
+    from browser_use.controller.registry.views import ActionModel
+    mcp_server_config = {
+        "mcpServers": {
+            # "markitdown": {
+            #     "command": "docker",
+            #     "args": [
+            #         "run",
+            #         "--rm",
+            #         "-i",
+            #         "markitdown-mcp:latest"
+            #     ]
+            # },
+            "desktop-commander": {
+                "command": "npx",
+                "args": [
+                    "-y",
+                    "@wonderwhy-er/desktop-commander"
+                ]
+            },
+            # "filesystem": {
+            #     "command": "npx",
+            #     "args": [
+            #         "-y",
+            #         "@modelcontextprotocol/server-filesystem",
+            #         "/Users/xxx/ai_workspace",
+            #     ]
+            # },
+        }
+    }
+    controller = CustomController()
+    await controller.setup_mcp_client(mcp_server_config)
+    action_name = "mcp.desktop-commander.execute_command"
+    action_info = controller.registry.registry.actions[action_name]
+    param_model = action_info.param_model
+    print(param_model.model_json_schema())
+    params = {"command": f"python ./tmp/test.py"
+              }
+    validated_params = param_model(**params)
+    ActionModel_ = controller.registry.create_action_model()
+    # Create ActionModel instance with the validated parameters
+    action_model = ActionModel_(**{action_name: validated_params})
+    result = await controller.act(action_model)
+    result = result.extracted_content
+    print(result)
+    if result and "Command is still running. Use read_output to get more output." in result and "PID" in \
+            result.split("\n")[0]:
+        pid = int(result.split("\n")[0].split("PID")[-1].strip())
+        action_name = "mcp.desktop-commander.read_output"
+        action_info = controller.registry.registry.actions[action_name]
+        param_model = action_info.param_model
+        print(param_model.model_json_schema())
+        params = {"pid": pid}
+        validated_params = param_model(**params)
+        action_model = ActionModel_(**{action_name: validated_params})
+        output_result = ""
+        while True:
+            time.sleep(1)
+            result = await controller.act(action_model)
+            result = result.extracted_content
+            if result:
+                pdb.set_trace()
+                output_result = result
+                break
+        print(output_result)
+        pdb.set_trace()
+    await controller.close_mcp_client()
+    pdb.set_trace()
+if __name__ == '__main__':
+    # asyncio.run(test_mcp_client())
+    asyncio.run(test_controller_with_mcp())

tests/test_llm_api.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import os
+import pdb
+from dataclasses import dataclass
+from dotenv import load_dotenv
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_ollama import ChatOllama
+load_dotenv()
+import sys
+sys.path.append(".")
+@dataclass
+class LLMConfig:
+    provider: str
+    model_name: str
+    temperature: float = 0.8
+    base_url: str = None
+    api_key: str = None
+def create_message_content(text, image_path=None):
+    content = [{"type": "text", "text": text}]
+    image_format = "png" if image_path and image_path.endswith(".png") else "jpeg"
+    if image_path:
+        from src.utils import utils
+        image_data = utils.encode_image(image_path)
+        content.append({
+            "type": "image_url",
+            "image_url": {"url": f"data:image/{image_format};base64,{image_data}"}
+        })
+    return content
+def get_env_value(key, provider):
+    env_mappings = {
+        "openai": {"api_key": "OPENAI_API_KEY", "base_url": "OPENAI_ENDPOINT"},
+        "azure_openai": {"api_key": "AZURE_OPENAI_API_KEY", "base_url": "AZURE_OPENAI_ENDPOINT"},
+        "google": {"api_key": "GOOGLE_API_KEY"},
+        "deepseek": {"api_key": "DEEPSEEK_API_KEY", "base_url": "DEEPSEEK_ENDPOINT"},
+        "mistral": {"api_key": "MISTRAL_API_KEY", "base_url": "MISTRAL_ENDPOINT"},
+        "alibaba": {"api_key": "ALIBABA_API_KEY", "base_url": "ALIBABA_ENDPOINT"},
+        "moonshot": {"api_key": "MOONSHOT_API_KEY", "base_url": "MOONSHOT_ENDPOINT"},
+        "ibm": {"api_key": "IBM_API_KEY", "base_url": "IBM_ENDPOINT"}
+    }
+    if provider in env_mappings and key in env_mappings[provider]:
+        return os.getenv(env_mappings[provider][key], "")
+    return ""
+def test_llm(config, query, image_path=None, system_message=None):
+    from src.utils import utils, llm_provider
+    # Special handling for Ollama-based models
+    if config.provider == "ollama":
+        if "deepseek-r1" in config.model_name:
+            from src.utils.llm_provider import DeepSeekR1ChatOllama
+            llm = DeepSeekR1ChatOllama(model=config.model_name)
+        else:
+            llm = ChatOllama(model=config.model_name)
+        ai_msg = llm.invoke(query)
+        print(ai_msg.content)
+        if "deepseek-r1" in config.model_name:
+            pdb.set_trace()
+        return
+    # For other providers, use the standard configuration
+    llm = llm_provider.get_llm_model(
+        provider=config.provider,
+        model_name=config.model_name,
+        temperature=config.temperature,
+        base_url=config.base_url or get_env_value("base_url", config.provider),
+        api_key=config.api_key or get_env_value("api_key", config.provider)
+    )
+    # Prepare messages for non-Ollama models
+    messages = []
+    if system_message:
+        messages.append(SystemMessage(content=create_message_content(system_message)))
+    messages.append(HumanMessage(content=create_message_content(query, image_path)))
+    ai_msg = llm.invoke(messages)
+    # Handle different response types
+    if hasattr(ai_msg, "reasoning_content"):
+        print(ai_msg.reasoning_content)
+    print(ai_msg.content)
+def test_openai_model():
+    config = LLMConfig(provider="openai", model_name="gpt-4o")
+    test_llm(config, "Describe this image", "assets/examples/test.png")
+def test_google_model():
+    # Enable your API key first if you haven't: https://ai.google.dev/palm_docs/oauth_quickstart
+    config = LLMConfig(provider="google", model_name="gemini-2.0-flash-exp")
+    test_llm(config, "Describe this image", "assets/examples/test.png")
+def test_azure_openai_model():
+    config = LLMConfig(provider="azure_openai", model_name="gpt-4o")
+    test_llm(config, "Describe this image", "assets/examples/test.png")
+def test_deepseek_model():
+    config = LLMConfig(provider="deepseek", model_name="deepseek-chat")
+    test_llm(config, "Who are you?")
+def test_deepseek_r1_model():
+    config = LLMConfig(provider="deepseek", model_name="deepseek-reasoner")
+    test_llm(config, "Which is greater, 9.11 or 9.8?", system_message="You are a helpful AI assistant.")
+def test_ollama_model():
+    config = LLMConfig(provider="ollama", model_name="qwen2.5:7b")
+    test_llm(config, "Sing a ballad of LangChain.")
+def test_deepseek_r1_ollama_model():
+    config = LLMConfig(provider="ollama", model_name="deepseek-r1:14b")
+    test_llm(config, "How many 'r's are in the word 'strawberry'?")
+def test_mistral_model():
+    config = LLMConfig(provider="mistral", model_name="pixtral-large-latest")
+    test_llm(config, "Describe this image", "assets/examples/test.png")
+def test_moonshot_model():
+    config = LLMConfig(provider="moonshot", model_name="moonshot-v1-32k-vision-preview")
+    test_llm(config, "Describe this image", "assets/examples/test.png")
+def test_ibm_model():
+    config = LLMConfig(provider="ibm", model_name="meta-llama/llama-4-maverick-17b-128e-instruct-fp8")
+    test_llm(config, "Describe this image", "assets/examples/test.png")
+def test_qwen_model():
+    config = LLMConfig(provider="alibaba", model_name="qwen-vl-max")
+    test_llm(config, "How many 'r's are in the word 'strawberry'?")
+if __name__ == "__main__":
+    # test_openai_model()
+    # test_google_model()
+    test_azure_openai_model()
+    # test_deepseek_model()
+    # test_ollama_model()
+    # test_deepseek_r1_model()
+    # test_deepseek_r1_ollama_model()
+    # test_mistral_model()
+    # test_ibm_model()
+    # test_qwen_model()

tests/test_playwright.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import pdb
+from dotenv import load_dotenv
+load_dotenv()
+def test_connect_browser():
+    import os
+    from playwright.sync_api import sync_playwright
+    chrome_exe = os.getenv("CHROME_PATH", "")
+    chrome_use_data = os.getenv("CHROME_USER_DATA", "")
+    with sync_playwright() as p:
+        browser = p.chromium.launch_persistent_context(
+            user_data_dir=chrome_use_data,
+            executable_path=chrome_exe,
+            headless=False  # Keep browser window visible
+        )
+        page = browser.new_page()
+        page.goto("https://mail.google.com/mail/u/0/#inbox")
+        page.wait_for_load_state()
+        input("Press the Enter key to close the browser...")
+        browser.close()
+if __name__ == '__main__':
+    test_connect_browser()

webui.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from dotenv import load_dotenv
+load_dotenv()
+import argparse
+from src.webui.interface import theme_map, create_ui
+def main():
+    parser = argparse.ArgumentParser(description="Gradio WebUI for Browser Agent")
+    parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
+    parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
+    parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
+    args = parser.parse_args()
+    demo = create_ui(theme_name=args.theme)
+    demo.queue().launch(server_name=args.ip, server_port=args.port)
+if __name__ == '__main__':
+    main()