# main.py from fastapi import FastAPI, HTTPException, Query from pydantic import BaseModel from typing import List, Optional import base64 import json from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError import asyncio app = FastAPI(title="Web Analyzer API") class ScreenshotResponse(BaseModel): screenshot: str class MetadataResponse(BaseModel): title: Optional[str] description: Optional[str] og: dict twitter: dict canonical: Optional[str] async def get_page(url): pw = await async_playwright().start() browser = await pw.chromium.launch(headless=True) page = await browser.new_page() try: await page.goto(url, timeout=30000) except PlaywrightTimeoutError: raise HTTPException(status_code=504, detail="Page load timed out") return page, browser, pw @app.get("/metadata", response_model=MetadataResponse) async def get_metadata(url: str): page, browser, pw = await get_page(url) try: title = await page.title() desc = await page.get_attribute("meta[name='description']", "content") og = {} twitter = {} for prop in ["title", "description", "image"]: og[f"og:{prop}"] = await page.get_attribute(f"meta[property='og:{prop}']", "content") twitter[f"twitter:{prop}"] = await page.get_attribute(f"meta[name='twitter:{prop}']", "content") canonical = await page.get_attribute("link[rel='canonical']", "href") return { "title": title, "description": desc, "og": og, "twitter": twitter, "canonical": canonical } finally: await browser.close() await pw.stop() @app.get("/screenshot", response_model=ScreenshotResponse) async def get_screenshot(url: str): page, browser, pw = await get_page(url) try: image_bytes = await page.screenshot(full_page=True) image_base64 = base64.b64encode(image_bytes).decode() return {"screenshot": image_base64} finally: await browser.close() await pw.stop() @app.get("/seo") async def seo_audit(url: str): page, browser, pw = await get_page(url) try: h1_count = await page.locator("h1").count() imgs = await page.query_selector_all("img") missing_alts = [await img.get_attribute("src") for img in imgs if not await img.get_attribute("alt")] anchors = await page.query_selector_all("a[href]") internal, external = 0, 0 for a in anchors: href = await a.get_attribute("href") if href and href.startswith("http"): if url in href: internal += 1 else: external += 1 robots = await page.get_attribute("meta[name='robots']", "content") canonical = await page.get_attribute("link[rel='canonical']", "href") return { "h1_count": h1_count, "missing_image_alts": missing_alts, "internal_links": internal, "external_links": external, "robots_meta": robots, "has_canonical": bool(canonical) } finally: await browser.close() await pw.stop() @app.get("/performance") async def performance_metrics(url: str): page, browser, pw = await get_page(url) try: nav_timing = await page.evaluate("JSON.stringify(performance.getEntriesByType('navigation'))") timing = json.loads(nav_timing)[0] fcp = await page.evaluate("performance.getEntriesByName('first-contentful-paint')[0]?.startTime") lcp = await page.evaluate("performance.getEntriesByType('largest-contentful-paint')[0]?.renderTime") cls_entries = await page.evaluate("JSON.stringify(performance.getEntriesByType('layout-shift'))") cls = sum(e['value'] for e in json.loads(cls_entries)) return { "page_load_time_ms": timing['duration'], "first_contentful_paint": fcp, "largest_contentful_paint": lcp, "cumulative_layout_shift": cls } finally: await browser.close() await pw.stop() @app.get("/structured-data") async def structured_data(url: str): page, browser, pw = await get_page(url) try: scripts = await page.query_selector_all("script[type='application/ld+json']") json_ld_list = [] for s in scripts: text = await s.inner_text() try: data = json.loads(text) json_ld_list.append(data) except Exception: continue types = [] for obj in json_ld_list: if isinstance(obj, dict) and "@type" in obj: types.append(obj["@type"]) return { "schema_found": bool(json_ld_list), "types": types, "schema": json_ld_list } finally: await browser.close() await pw.stop() @app.get("/accessibility") async def accessibility_check(url: str): page, browser, pw = await get_page(url) try: imgs = await page.query_selector_all("img") missing_alt = len([img for img in imgs if not await img.get_attribute("alt")]) buttons = await page.query_selector_all("button") missing_labels = len([b for b in buttons if not await b.get_attribute("aria-label") and not await b.inner_text()]) landmarks = [] for tag in ["main", "nav", "footer", "header"]: if await page.query_selector(tag): landmarks.append(tag) return { "images_missing_alt": missing_alt, "buttons_missing_label": missing_labels, "landmarks": landmarks } finally: await browser.close() await pw.stop()