apexherbert200 commited on
Commit
820622d
·
1 Parent(s): 99b81db

Building new logic

Browse files
Files changed (1) hide show
  1. screenshot.py +65 -62
screenshot.py CHANGED
@@ -3,74 +3,77 @@ from pydantic import BaseModel
3
  from playwright.async_api import async_playwright
4
  import asyncio
5
  import base64
6
- import logging
7
- from typing import List, Optional
8
- from urllib.parse import urlparse
9
 
10
- app = FastAPI(title="BrowserAutomation API", version="1.0")
11
- logger = logging.getLogger("browser-api")
12
 
13
- # Pydantic Models
14
- class ScreenshotRequest(BaseModel):
15
- url: str
16
- full_page: bool = True
17
- device: Optional[str] = "desktop" # mobile/tablet/desktop
18
- format: str = "png" # png/jpeg/pdf
19
- delay_ms: int = 2000 # wait after load
20
 
21
- class ScrapeRequest(BaseModel):
22
  url: str
23
- extract_scripts: List[str] = [] # JS to execute
24
- css_selectors: List[str] = [] # Elements to extract
25
-
26
- # Device presets
27
- DEVICES = {
28
- "mobile": {"width": 375, "height": 812, "mobile": True},
29
- "tablet": {"width": 768, "height": 1024, "mobile": True},
30
- "desktop": {"width": 1366, "height": 768, "mobile": False}
31
- }
32
 
33
- @app.on_event("startup")
34
- async def init_browser():
35
- app.state.playwright = await async_playwright().start()
36
- app.state.browser = await app.state.playwright.chromium.launch()
37
 
38
- @app.post("/screenshot")
39
- async def capture_screenshot(req: ScreenshotRequest):
40
- """Capture website screenshot with device emulation"""
41
- if not valid_url(req.url):
42
- raise HTTPException(400, "Invalid URL")
43
-
44
- device = DEVICES.get(req.device, DEVICES["desktop"])
45
- browser = app.state.browser
46
-
47
  try:
48
- context = await browser.new_context(**device)
49
- page = await context.new_page()
50
- await page.goto(req.url)
51
- await asyncio.sleep(req.delay_ms / 1000)
52
-
53
- if req.format == "pdf":
54
- pdf = await page.pdf()
55
- return Response(content=pdf, media_type="application/pdf")
56
- else:
57
- screenshot = await page.screenshot(full_page=req.full_page, type=req.format)
58
- return {"image": base64.b64encode(screenshot).decode()}
59
-
60
- except Exception as e:
61
- logger.error(f"Screenshot failed: {str(e)}")
62
- raise HTTPException(500, "Capture failed")
63
 
64
- @app.post("/scrape")
65
- async def scrape_page(req: ScrapeRequest):
66
- """Execute JS and extract page content"""
67
- # Implementation similar to screenshot but:
68
- # 1. Execute provided JS scripts
69
- # 2. Extract DOM elements by CSS selectors
70
- # 3. Return structured JSON data
71
- pass
72
 
73
- # Helper function
74
- def valid_url(url: str) -> bool:
75
- parsed = urlparse(url)
76
- return all([parsed.scheme, parsed.netloc])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from playwright.async_api import async_playwright
4
  import asyncio
5
  import base64
6
+ import time
 
 
7
 
8
+ app = FastAPI()
 
9
 
 
 
 
 
 
 
 
10
 
11
+ class AnalysisResult(BaseModel):
12
  url: str
13
+ load_time: float
14
+ title: Optional[str]
15
+ meta_description: Optional[str]
16
+ og_image: Optional[str]
17
+ seo_flags: List[str]
18
+ accessibility_flags: List[str]
19
+ screenshot_base64: str
 
 
20
 
 
 
 
 
21
 
22
+ @app.get("/analyze", response_model=AnalysisResult)
23
+ async def analyze_website(url: str):
 
 
 
 
 
 
 
24
  try:
25
+ async with async_playwright() as p:
26
+ browser = await p.chromium.launch(headless=True)
27
+ page = await browser.new_page()
28
+
29
+ # Start timing
30
+ start_time = time.time()
31
+ response = await page.goto(url, timeout=30000)
32
+ load_time = round(time.time() - start_time, 2)
33
+
34
+ # Wait for content
35
+ await page.wait_for_load_state("networkidle")
 
 
 
 
36
 
37
+ # Screenshot
38
+ screenshot = await page.screenshot(full_page=True)
39
+ screenshot_base64 = base64.b64encode(screenshot).decode("utf-8")
 
 
 
 
 
40
 
41
+ # Title and meta info
42
+ title = await page.title()
43
+ meta_description = await page.eval_on_selector("meta[name='description']", "el => el.content") if await page.query_selector("meta[name='description']") else None
44
+ og_image = await page.eval_on_selector("meta[property='og:image']", "el => el.content") if await page.query_selector("meta[property='og:image']") else None
45
+
46
+ # SEO flags
47
+ seo_flags = []
48
+ if not title:
49
+ seo_flags.append("Missing <title>")
50
+ if not meta_description:
51
+ seo_flags.append("Missing meta description")
52
+ if not await page.query_selector("h1"):
53
+ seo_flags.append("Missing <h1> tag")
54
+ if not og_image:
55
+ seo_flags.append("Missing Open Graph image")
56
+
57
+ # Accessibility flags
58
+ accessibility_flags = []
59
+ images = await page.query_selector_all("img")
60
+ for img in images:
61
+ has_alt = await img.get_attribute("alt")
62
+ if not has_alt:
63
+ accessibility_flags.append("Image without alt attribute")
64
+ break
65
+
66
+ await browser.close()
67
+
68
+ return AnalysisResult(
69
+ url=url,
70
+ load_time=load_time,
71
+ title=title,
72
+ meta_description=meta_description,
73
+ og_image=og_image,
74
+ seo_flags=seo_flags,
75
+ accessibility_flags=accessibility_flags,
76
+ screenshot_base64=screenshot_base64
77
+ )
78
+ except Exception as e:
79
+ raise HTTPException(status_code=500, detail=str(e))