Commit
·
99b81db
1
Parent(s):
e736965
Building new logic
Browse files- Dockerfile +1 -1
- screenshot.py +76 -0
Dockerfile
CHANGED
@@ -53,4 +53,4 @@ RUN python -m playwright install chromium
|
|
53 |
EXPOSE 7860
|
54 |
|
55 |
# Run the FastAPI application
|
56 |
-
CMD ["python", "-m", "uvicorn", "
|
|
|
53 |
EXPOSE 7860
|
54 |
|
55 |
# Run the FastAPI application
|
56 |
+
CMD ["python", "-m", "uvicorn", "screenshot:app", "--host", "0.0.0.0", "--port", "7860"]
|
screenshot.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Query
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from playwright.async_api import async_playwright
|
4 |
+
import asyncio
|
5 |
+
import base64
|
6 |
+
import logging
|
7 |
+
from typing import List, Optional
|
8 |
+
from urllib.parse import urlparse
|
9 |
+
|
10 |
+
app = FastAPI(title="BrowserAutomation API", version="1.0")
|
11 |
+
logger = logging.getLogger("browser-api")
|
12 |
+
|
13 |
+
# Pydantic Models
|
14 |
+
class ScreenshotRequest(BaseModel):
|
15 |
+
url: str
|
16 |
+
full_page: bool = True
|
17 |
+
device: Optional[str] = "desktop" # mobile/tablet/desktop
|
18 |
+
format: str = "png" # png/jpeg/pdf
|
19 |
+
delay_ms: int = 2000 # wait after load
|
20 |
+
|
21 |
+
class ScrapeRequest(BaseModel):
|
22 |
+
url: str
|
23 |
+
extract_scripts: List[str] = [] # JS to execute
|
24 |
+
css_selectors: List[str] = [] # Elements to extract
|
25 |
+
|
26 |
+
# Device presets
|
27 |
+
DEVICES = {
|
28 |
+
"mobile": {"width": 375, "height": 812, "mobile": True},
|
29 |
+
"tablet": {"width": 768, "height": 1024, "mobile": True},
|
30 |
+
"desktop": {"width": 1366, "height": 768, "mobile": False}
|
31 |
+
}
|
32 |
+
|
33 |
+
@app.on_event("startup")
|
34 |
+
async def init_browser():
|
35 |
+
app.state.playwright = await async_playwright().start()
|
36 |
+
app.state.browser = await app.state.playwright.chromium.launch()
|
37 |
+
|
38 |
+
@app.post("/screenshot")
|
39 |
+
async def capture_screenshot(req: ScreenshotRequest):
|
40 |
+
"""Capture website screenshot with device emulation"""
|
41 |
+
if not valid_url(req.url):
|
42 |
+
raise HTTPException(400, "Invalid URL")
|
43 |
+
|
44 |
+
device = DEVICES.get(req.device, DEVICES["desktop"])
|
45 |
+
browser = app.state.browser
|
46 |
+
|
47 |
+
try:
|
48 |
+
context = await browser.new_context(**device)
|
49 |
+
page = await context.new_page()
|
50 |
+
await page.goto(req.url)
|
51 |
+
await asyncio.sleep(req.delay_ms / 1000)
|
52 |
+
|
53 |
+
if req.format == "pdf":
|
54 |
+
pdf = await page.pdf()
|
55 |
+
return Response(content=pdf, media_type="application/pdf")
|
56 |
+
else:
|
57 |
+
screenshot = await page.screenshot(full_page=req.full_page, type=req.format)
|
58 |
+
return {"image": base64.b64encode(screenshot).decode()}
|
59 |
+
|
60 |
+
except Exception as e:
|
61 |
+
logger.error(f"Screenshot failed: {str(e)}")
|
62 |
+
raise HTTPException(500, "Capture failed")
|
63 |
+
|
64 |
+
@app.post("/scrape")
|
65 |
+
async def scrape_page(req: ScrapeRequest):
|
66 |
+
"""Execute JS and extract page content"""
|
67 |
+
# Implementation similar to screenshot but:
|
68 |
+
# 1. Execute provided JS scripts
|
69 |
+
# 2. Extract DOM elements by CSS selectors
|
70 |
+
# 3. Return structured JSON data
|
71 |
+
pass
|
72 |
+
|
73 |
+
# Helper function
|
74 |
+
def valid_url(url: str) -> bool:
|
75 |
+
parsed = urlparse(url)
|
76 |
+
return all([parsed.scheme, parsed.netloc])
|