File size: 4,034 Bytes
de68d43
 
5bf8ac9
de68d43
 
 
 
 
 
 
 
 
5bf8ac9
de68d43
 
5bf8ac9
 
de68d43
5bf8ac9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9ef651
 
5bf8ac9
 
a9ef651
5bf8ac9
 
a9ef651
 
 
5bf8ac9
a9ef651
de68d43
 
c16aac6
 
8beead4
5bf8ac9
c16aac6
5bf8ac9
 
 
 
 
 
 
 
 
 
 
 
 
 
c16aac6
5bf8ac9
 
 
 
 
c16aac6
5bf8ac9
c16aac6
5bf8ac9
a9429bd
5bf8ac9
 
de68d43
 
 
 
 
 
5bf8ac9
 
 
de68d43
 
 
a9ef651
de68d43
 
 
 
 
 
 
 
 
 
 
 
 
 
5bf8ac9
 
de68d43
5bf8ac9
de68d43
a9ef651
5bf8ac9
a9ef651
 
 
5bf8ac9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from __future__ import annotations

from typing import AsyncIterator, Awaitable, ClassVar

from playwright.async_api import (  # noqa: F401
    Browser,
    BrowserContext,
    Page,
    TimeoutError,
    async_playwright,
)

from .models import GetContentModel, PageModel, ScreenshotModel  # noqa: TCH001


class AsyncMixin:
    """experimental: making awaitable class."""

    async def ainit(self) -> None:
        pass

    def __await__(self) -> AsyncIterator[Awaitable]:
        return self.ainit().__await__()


class PlaywrightInstance(AsyncMixin):
    """This class is designed to keep playwright browser instance open for reusability and scalability handling api requests."""  # noqa: E501

    HEADERS: str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"  # noqa: E501

    FIREFOX_USER_PREFS: ClassVar[dict[str, int | str]] = {
        "extensions.enabledScopes": 1,
        "extensions.autoDisableScopes": 1,
        "dom.webdriver.enabled": False,
        "useAutomationExtension": False,
        "general.useragent.override": HEADERS,
    }

    def __init__(self) -> None:
        self.playwright: async_playwright = None
        self.browser: Browser = None

    async def ainit(self) -> PlaywrightInstance:
        """starts playwright and browser instance."""
        if not self.playwright:
            self.playwright = await async_playwright().start()
            self.browser = await self.playwright.firefox.launch(
                firefox_user_prefs=self.FIREFOX_USER_PREFS,
            )
        return self

    async def new_context_page(
        self,
        browser: Browser,
        screenshot_model: GetContentModel,
        page_model: PageModel,
    ) -> tuple[BrowserContext, Page]:
        """create a brwoser or new browser context page.

        Parameters:
            browser (Browser):
                The Playwright Browser instance to create a new context in.
            screenshot_model (GetContentModel):
                A pydantic BaseModel instance containing the configuration for the screenshot.
            page_model (PageModel):
                A pydantic BaseModel instance containing the configuration for the page.

        Returns:
            tuple: BrowserContext and Page
        """
        params = {
            "color_scheme": page_model.color_scheme,
            "java_script_enabled": page_model.java_script_enabled,
            "no_viewport": page_model.no_viewport,
            "proxy": page_model.proxy.model_dump() if page_model.proxy else None,
            "viewport": page_model.viewport.model_dump() if page_model.viewport else None,
        }

        if not screenshot_model.new_browser:
            return None, await self.browser.new_page(**params)

        new_context = await browser.new_context(**params)
        return new_context, await new_context.new_page()

    async def screenshot(
        self,
        screenshot_model: ScreenshotModel,
        page_model: PageModel,
    ) -> bytes:
        context, page = await self.new_context_page(
            screenshot_model=screenshot_model, browser=self.browser, page_model=page_model,
        )

        await page.goto(str(screenshot_model.url))

        await page.wait_for_timeout(screenshot_model.ms_delay)

        screenshot_locator = (
            page.locator(screenshot_model.query_selector)
            if screenshot_model.query_selector
            else None
        )

        if screenshot_locator:
            screenshot_data: bytes = await screenshot_locator.screenshot()
        else:
            screenshot_data: bytes = await page.screenshot(full_page=screenshot_model.full_page)

        await page.close()

        if context:
            await context.close()

        return screenshot_data

    async def close_instance(self) -> None:
        """for manual closing of playwright if needed"""
        if self.playwright:
            await self.browser.close()
            await self.playwright.stop()