from fastapi import FastAPI, HTTPException from pydantic import BaseModel from playwright.async_api import async_playwright import asyncio import base64 from typing import List, Optional app = FastAPI() class ScrapeRequest(BaseModel): url: str screenshot: bool = True get_links: bool = True get_content: bool = True class LinkInfo(BaseModel): text: str href: str class ScrapeResponse(BaseModel): content: Optional[str] = None screenshot: Optional[str] = None links: Optional[List[LinkInfo]] = None @app.post("/scrape") async def scrape_page(request: ScrapeRequest): async with async_playwright() as p: browser = await p.chromium.launch() page = await browser.new_page() try: await page.goto(request.url, wait_until="networkidle") response = ScrapeResponse() # Get page content if request.get_content: response.content = await page.content() # Get screenshot if request.screenshot: screenshot_bytes = await page.screenshot() response.screenshot = base64.b64encode(screenshot_bytes).decode('utf-8') # Get links if request.get_links: links = await page.evaluate(""" () => { return Array.from(document.querySelectorAll('a')).map(a => { return { text: a.innerText.trim(), href: a.href } }); } """) response.links = [LinkInfo(**link) for link in links] await browser.close() return response except Exception as e: await browser.close() raise HTTPException(status_code=500, detail=str(e))