Commit
·
820622d
1
Parent(s):
99b81db
Building new logic
Browse files- screenshot.py +65 -62
screenshot.py
CHANGED
@@ -3,74 +3,77 @@ from pydantic import BaseModel
|
|
3 |
from playwright.async_api import async_playwright
|
4 |
import asyncio
|
5 |
import base64
|
6 |
-
import
|
7 |
-
from typing import List, Optional
|
8 |
-
from urllib.parse import urlparse
|
9 |
|
10 |
-
app = FastAPI(
|
11 |
-
logger = logging.getLogger("browser-api")
|
12 |
|
13 |
-
# Pydantic Models
|
14 |
-
class ScreenshotRequest(BaseModel):
|
15 |
-
url: str
|
16 |
-
full_page: bool = True
|
17 |
-
device: Optional[str] = "desktop" # mobile/tablet/desktop
|
18 |
-
format: str = "png" # png/jpeg/pdf
|
19 |
-
delay_ms: int = 2000 # wait after load
|
20 |
|
21 |
-
class
|
22 |
url: str
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
"desktop": {"width": 1366, "height": 768, "mobile": False}
|
31 |
-
}
|
32 |
|
33 |
-
@app.on_event("startup")
|
34 |
-
async def init_browser():
|
35 |
-
app.state.playwright = await async_playwright().start()
|
36 |
-
app.state.browser = await app.state.playwright.chromium.launch()
|
37 |
|
38 |
-
@app.
|
39 |
-
async def
|
40 |
-
"""Capture website screenshot with device emulation"""
|
41 |
-
if not valid_url(req.url):
|
42 |
-
raise HTTPException(400, "Invalid URL")
|
43 |
-
|
44 |
-
device = DEVICES.get(req.device, DEVICES["desktop"])
|
45 |
-
browser = app.state.browser
|
46 |
-
|
47 |
try:
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
except Exception as e:
|
61 |
-
logger.error(f"Screenshot failed: {str(e)}")
|
62 |
-
raise HTTPException(500, "Capture failed")
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
# Implementation similar to screenshot but:
|
68 |
-
# 1. Execute provided JS scripts
|
69 |
-
# 2. Extract DOM elements by CSS selectors
|
70 |
-
# 3. Return structured JSON data
|
71 |
-
pass
|
72 |
|
73 |
-
#
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from playwright.async_api import async_playwright
|
4 |
import asyncio
|
5 |
import base64
|
6 |
+
import time
|
|
|
|
|
7 |
|
8 |
+
app = FastAPI()
|
|
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
class AnalysisResult(BaseModel):
|
12 |
url: str
|
13 |
+
load_time: float
|
14 |
+
title: Optional[str]
|
15 |
+
meta_description: Optional[str]
|
16 |
+
og_image: Optional[str]
|
17 |
+
seo_flags: List[str]
|
18 |
+
accessibility_flags: List[str]
|
19 |
+
screenshot_base64: str
|
|
|
|
|
20 |
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
@app.get("/analyze", response_model=AnalysisResult)
|
23 |
+
async def analyze_website(url: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
try:
|
25 |
+
async with async_playwright() as p:
|
26 |
+
browser = await p.chromium.launch(headless=True)
|
27 |
+
page = await browser.new_page()
|
28 |
+
|
29 |
+
# Start timing
|
30 |
+
start_time = time.time()
|
31 |
+
response = await page.goto(url, timeout=30000)
|
32 |
+
load_time = round(time.time() - start_time, 2)
|
33 |
+
|
34 |
+
# Wait for content
|
35 |
+
await page.wait_for_load_state("networkidle")
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
# Screenshot
|
38 |
+
screenshot = await page.screenshot(full_page=True)
|
39 |
+
screenshot_base64 = base64.b64encode(screenshot).decode("utf-8")
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
# Title and meta info
|
42 |
+
title = await page.title()
|
43 |
+
meta_description = await page.eval_on_selector("meta[name='description']", "el => el.content") if await page.query_selector("meta[name='description']") else None
|
44 |
+
og_image = await page.eval_on_selector("meta[property='og:image']", "el => el.content") if await page.query_selector("meta[property='og:image']") else None
|
45 |
+
|
46 |
+
# SEO flags
|
47 |
+
seo_flags = []
|
48 |
+
if not title:
|
49 |
+
seo_flags.append("Missing <title>")
|
50 |
+
if not meta_description:
|
51 |
+
seo_flags.append("Missing meta description")
|
52 |
+
if not await page.query_selector("h1"):
|
53 |
+
seo_flags.append("Missing <h1> tag")
|
54 |
+
if not og_image:
|
55 |
+
seo_flags.append("Missing Open Graph image")
|
56 |
+
|
57 |
+
# Accessibility flags
|
58 |
+
accessibility_flags = []
|
59 |
+
images = await page.query_selector_all("img")
|
60 |
+
for img in images:
|
61 |
+
has_alt = await img.get_attribute("alt")
|
62 |
+
if not has_alt:
|
63 |
+
accessibility_flags.append("Image without alt attribute")
|
64 |
+
break
|
65 |
+
|
66 |
+
await browser.close()
|
67 |
+
|
68 |
+
return AnalysisResult(
|
69 |
+
url=url,
|
70 |
+
load_time=load_time,
|
71 |
+
title=title,
|
72 |
+
meta_description=meta_description,
|
73 |
+
og_image=og_image,
|
74 |
+
seo_flags=seo_flags,
|
75 |
+
accessibility_flags=accessibility_flags,
|
76 |
+
screenshot_base64=screenshot_base64
|
77 |
+
)
|
78 |
+
except Exception as e:
|
79 |
+
raise HTTPException(status_code=500, detail=str(e))
|