Spaces:

joaomorossini
/

Project_Management_Agency_Swarm

Runtime error

File size: 9,249 Bytes

670dd87

import base64
import time

from selenium.webdriver.common.by import By
from selenium.webdriver.support.expected_conditions import (
    frame_to_be_available_and_switch_to_it,
    presence_of_element_located,
)
from selenium.webdriver.support.wait import WebDriverWait

from agency_swarm.tools import BaseTool
from agency_swarm.util import get_openai_client

from .util import get_b64_screenshot, remove_highlight_and_labels
from .util.selenium import get_web_driver


class SolveCaptcha(BaseTool):
    """
    This tool asks a human to solve captcha on the current webpage. Make sure that captcha is visible before running it.
    """

    def run(self):
        wd = get_web_driver()

        try:
            WebDriverWait(wd, 10).until(
                frame_to_be_available_and_switch_to_it(
                    (By.XPATH, "//iframe[@title='reCAPTCHA']")
                )
            )

            element = WebDriverWait(wd, 3).until(
                presence_of_element_located((By.ID, "recaptcha-anchor"))
            )
        except Exception as e:
            return "Could not find captcha checkbox"

        try:
            # Scroll the element into view
            wd.execute_script("arguments[0].scrollIntoView(true);", element)
            time.sleep(1)  # Give some time for the scrolling to complete

            # Click the element using JavaScript
            wd.execute_script("arguments[0].click();", element)
        except Exception as e:
            return f"Could not click captcha checkbox: {str(e)}"

        try:
            # Now check if the reCAPTCHA is checked
            WebDriverWait(wd, 3).until(
                lambda d: d.find_element(
                    By.CLASS_NAME, "recaptcha-checkbox"
                ).get_attribute("aria-checked")
                == "true"
            )

            return "Success"
        except Exception as e:
            pass

        wd.switch_to.default_content()

        client = get_openai_client()

        WebDriverWait(wd, 10).until(
            frame_to_be_available_and_switch_to_it(
                (
                    By.XPATH,
                    "//iframe[@title='recaptcha challenge expires in two minutes']",
                )
            )
        )

        time.sleep(2)

        attempts = 0
        while attempts < 5:
            tiles = wd.find_elements(By.CLASS_NAME, "rc-imageselect-tile")

            # filter out tiles with rc-imageselect-dynamic-selected class
            tiles = [
                tile
                for tile in tiles
                if not tile.get_attribute("class").endswith(
                    "rc-imageselect-dynamic-selected"
                )
            ]

            image_content = []
            i = 0
            for tile in tiles:
                i += 1
                screenshot = get_b64_screenshot(wd, tile)

                image_content.append(
                    {
                        "type": "text",
                        "text": f"Image {i}:",
                    }
                )
                image_content.append(
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{screenshot}",
                            "detail": "high",
                        },
                    },
                )
            # highlight all titles with rc-imageselect-tile class but not with rc-imageselect-dynamic-selected
            # wd = highlight_elements_with_labels(wd, 'td.rc-imageselect-tile:not(.rc-imageselect-dynamic-selected)')

            # screenshot = get_b64_screenshot(wd, wd.find_element(By.ID, "rc-imageselect"))

            task_text = (
                wd.find_element(By.CLASS_NAME, "rc-imageselect-instructions")
                .text.strip()
                .replace("\n", " ")
            )

            continuous_task = "once there are none left" in task_text.lower()

            task_text = task_text.replace("Click verify", "Output 0")
            task_text = task_text.replace("click skip", "Output 0")
            task_text = task_text.replace("once", "if")
            task_text = task_text.replace("none left", "none")
            task_text = task_text.replace("all", "only")
            task_text = task_text.replace("squares", "images")

            additional_info = ""
            if len(tiles) > 9:
                additional_info = (
                    "Keep in mind that all images are a part of a bigger image "
                    "from left to right, and top to bottom. The grid is 4x4. "
                )

            messages = [
                {
                    "role": "system",
                    "content": f"""You are an advanced AI designed to support users with visual impairments.
                    User will provide you with {i} images numbered from 1 to {i}. Your task is to output
                    the numbers of the images that contain the requested object, or at least some part of the requested
                    object. {additional_info}If there are no individual images that satisfy this condition, output 0.
                    """.replace("\n", ""),
                },
                {
                    "role": "user",
                    "content": [
                        *image_content,
                        {
                            "type": "text",
                            "text": f"{task_text}. Only output numbers separated by commas and nothing else. "
                            f"Output 0 if there are none.",
                        },
                    ],
                },
            ]

            response = client.chat.completions.create(
                model="gpt-4o",
                messages=messages,
                max_tokens=1024,
                temperature=0.0,
            )

            message = response.choices[0].message
            message_text = message.content

            # check if 0 is in the message
            if "0" in message_text and "10" not in message_text:
                # Find the button by its ID
                verify_button = wd.find_element(By.ID, "recaptcha-verify-button")

                verify_button_text = verify_button.text

                # Click the button
                wd.execute_script("arguments[0].click();", verify_button)

                time.sleep(1)

                try:
                    if self.verify_checkbox(wd):
                        return "Success. Captcha solved."
                except Exception as e:
                    print("Not checked")
                    pass

            else:
                numbers = [
                    int(s.strip())
                    for s in message_text.split(",")
                    if s.strip().isdigit()
                ]

                # Click the tiles based on the provided numbers
                for number in numbers:
                    wd.execute_script("arguments[0].click();", tiles[number - 1])
                    time.sleep(0.5)

                time.sleep(3)

                if not continuous_task:
                    # Find the button by its ID
                    verify_button = wd.find_element(By.ID, "recaptcha-verify-button")

                    verify_button_text = verify_button.text

                    # Click the button
                    wd.execute_script("arguments[0].click();", verify_button)

                    try:
                        if self.verify_checkbox(wd):
                            return "Success. Captcha solved."
                    except Exception as e:
                        pass
                else:
                    continue

            if "verify" in verify_button_text.lower():
                attempts += 1

        wd = remove_highlight_and_labels(wd)

        wd.switch_to.default_content()

        # close captcha
        try:
            element = WebDriverWait(wd, 3).until(
                presence_of_element_located((By.XPATH, "//iframe[@title='reCAPTCHA']"))
            )

            wd.execute_script(
                f"document.elementFromPoint({element.location['x']}, {element.location['y']-10}).click();"
            )
        except Exception as e:
            print(e)
            pass

        return "Could not solve captcha."

    def verify_checkbox(self, wd):
        wd.switch_to.default_content()

        try:
            WebDriverWait(wd, 10).until(
                frame_to_be_available_and_switch_to_it(
                    (By.XPATH, "//iframe[@title='reCAPTCHA']")
                )
            )

            WebDriverWait(wd, 5).until(
                lambda d: d.find_element(
                    By.CLASS_NAME, "recaptcha-checkbox"
                ).get_attribute("aria-checked")
                == "true"
            )

            return True
        except Exception as e:
            wd.switch_to.default_content()

            WebDriverWait(wd, 10).until(
                frame_to_be_available_and_switch_to_it(
                    (
                        By.XPATH,
                        "//iframe[@title='recaptcha challenge expires in two minutes']",
                    )
                )
            )

        return False