File size: 5,842 Bytes
2f927bc 99b81db 2f927bc 99b81db 2f927bc c9d5bf0 99b81db 2f927bc 99b81db 2f927bc 820622d 2f927bc 99b81db 2f927bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# main.py
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
from typing import List, Optional
import base64
import json
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
import asyncio
app = FastAPI(title="Web Analyzer API")
class ScreenshotResponse(BaseModel):
screenshot: str
class MetadataResponse(BaseModel):
title: Optional[str]
description: Optional[str]
og: dict
twitter: dict
canonical: Optional[str]
async def get_page(url):
pw = await async_playwright().start()
browser = await pw.chromium.launch(headless=True)
page = await browser.new_page()
try:
await page.goto(url, timeout=30000)
except PlaywrightTimeoutError:
raise HTTPException(status_code=504, detail="Page load timed out")
return page, browser, pw
@app.get("/metadata", response_model=MetadataResponse)
async def get_metadata(url: str):
page, browser, pw = await get_page(url)
try:
title = await page.title()
desc = await page.get_attribute("meta[name='description']", "content")
og = {}
twitter = {}
for prop in ["title", "description", "image"]:
og[f"og:{prop}"] = await page.get_attribute(f"meta[property='og:{prop}']", "content")
twitter[f"twitter:{prop}"] = await page.get_attribute(f"meta[name='twitter:{prop}']", "content")
canonical = await page.get_attribute("link[rel='canonical']", "href")
return {
"title": title,
"description": desc,
"og": og,
"twitter": twitter,
"canonical": canonical
}
finally:
await browser.close()
await pw.stop()
@app.get("/screenshot", response_model=ScreenshotResponse)
async def get_screenshot(url: str):
page, browser, pw = await get_page(url)
try:
image_bytes = await page.screenshot(full_page=True)
image_base64 = base64.b64encode(image_bytes).decode()
return {"screenshot": image_base64}
finally:
await browser.close()
await pw.stop()
@app.get("/seo")
async def seo_audit(url: str):
page, browser, pw = await get_page(url)
try:
h1_count = await page.locator("h1").count()
imgs = await page.query_selector_all("img")
missing_alts = [await img.get_attribute("src") for img in imgs if not await img.get_attribute("alt")]
anchors = await page.query_selector_all("a[href]")
internal, external = 0, 0
for a in anchors:
href = await a.get_attribute("href")
if href and href.startswith("http"):
if url in href:
internal += 1
else:
external += 1
robots = await page.get_attribute("meta[name='robots']", "content")
canonical = await page.get_attribute("link[rel='canonical']", "href")
return {
"h1_count": h1_count,
"missing_image_alts": missing_alts,
"internal_links": internal,
"external_links": external,
"robots_meta": robots,
"has_canonical": bool(canonical)
}
finally:
await browser.close()
await pw.stop()
@app.get("/performance")
async def performance_metrics(url: str):
page, browser, pw = await get_page(url)
try:
nav_timing = await page.evaluate("JSON.stringify(performance.getEntriesByType('navigation'))")
timing = json.loads(nav_timing)[0]
fcp = await page.evaluate("performance.getEntriesByName('first-contentful-paint')[0]?.startTime")
lcp = await page.evaluate("performance.getEntriesByType('largest-contentful-paint')[0]?.renderTime")
cls_entries = await page.evaluate("JSON.stringify(performance.getEntriesByType('layout-shift'))")
cls = sum(e['value'] for e in json.loads(cls_entries))
return {
"page_load_time_ms": timing['duration'],
"first_contentful_paint": fcp,
"largest_contentful_paint": lcp,
"cumulative_layout_shift": cls
}
finally:
await browser.close()
await pw.stop()
@app.get("/structured-data")
async def structured_data(url: str):
page, browser, pw = await get_page(url)
try:
scripts = await page.query_selector_all("script[type='application/ld+json']")
json_ld_list = []
for s in scripts:
text = await s.inner_text()
try:
data = json.loads(text)
json_ld_list.append(data)
except Exception:
continue
types = []
for obj in json_ld_list:
if isinstance(obj, dict) and "@type" in obj:
types.append(obj["@type"])
return {
"schema_found": bool(json_ld_list),
"types": types,
"schema": json_ld_list
}
finally:
await browser.close()
await pw.stop()
@app.get("/accessibility")
async def accessibility_check(url: str):
page, browser, pw = await get_page(url)
try:
imgs = await page.query_selector_all("img")
missing_alt = len([img for img in imgs if not await img.get_attribute("alt")])
buttons = await page.query_selector_all("button")
missing_labels = len([b for b in buttons if not await b.get_attribute("aria-label") and not await b.inner_text()])
landmarks = []
for tag in ["main", "nav", "footer", "header"]:
if await page.query_selector(tag):
landmarks.append(tag)
return {
"images_missing_alt": missing_alt,
"buttons_missing_label": missing_labels,
"landmarks": landmarks
}
finally:
await browser.close()
await pw.stop()
|