|
from fastapi import FastAPI, HTTPException, Query |
|
from pydantic import BaseModel |
|
from playwright.async_api import async_playwright |
|
import asyncio |
|
import base64 |
|
import logging |
|
from typing import List, Optional |
|
from urllib.parse import urlparse |
|
|
|
app = FastAPI( |
|
title="Website Quality & Compliance Analyzer", |
|
description="API that analyzes websites for SEO, accessibility, compliance and technical quality", |
|
version="1.0.0" |
|
) |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
class SEOResult(BaseModel): |
|
title: Optional[str] = None |
|
meta_description: Optional[str] = None |
|
h1_tags: List[str] = [] |
|
canonical_url: Optional[str] = None |
|
robots_txt_present: bool = False |
|
sitemap_present: bool = False |
|
|
|
class AccessibilityResult(BaseModel): |
|
missing_alt_tags: int = 0 |
|
images_without_alt: List[str] = [] |
|
aria_roles: List[str] = [] |
|
contrast_issues: List[str] = [] |
|
|
|
class ComplianceResult(BaseModel): |
|
has_cookie_banner: bool = False |
|
gdpr_compliant: Optional[bool] = None |
|
has_privacy_policy: bool = False |
|
has_terms_of_service: bool = False |
|
|
|
class TechnicalResult(BaseModel): |
|
tech_stack: List[str] = [] |
|
viewport_meta: Optional[str] = None |
|
doctype: Optional[str] = None |
|
is_https: bool = False |
|
has_analytics: bool = False |
|
|
|
class BrokenLink(BaseModel): |
|
url: str |
|
status: Optional[int] = None |
|
text: Optional[str] = None |
|
|
|
class AnalysisRequest(BaseModel): |
|
url: str |
|
screenshot: bool = False |
|
mobile_test: bool = False |
|
check_broken_links: bool = False |
|
depth: int = 1 |
|
|
|
class AnalysisResponse(BaseModel): |
|
url: str |
|
seo: SEOResult |
|
accessibility: AccessibilityResult |
|
compliance: ComplianceResult |
|
technical: TechnicalResult |
|
broken_links: List[BrokenLink] = [] |
|
mobile_friendly: Optional[bool] = None |
|
screenshot_base64: Optional[str] = None |
|
load_time: Optional[float] = None |
|
success: bool |
|
error: Optional[str] = None |
|
|
|
async def analyze_page(page, url: str, options: AnalysisRequest): |
|
result = { |
|
"url": url, |
|
"seo": {}, |
|
"accessibility": {}, |
|
"compliance": {}, |
|
"technical": {}, |
|
"broken_links": [], |
|
"success": True |
|
} |
|
|
|
|
|
title = await page.title() |
|
meta_description = await page.evaluate('''() => { |
|
const meta = document.querySelector('meta[name="description"]'); |
|
return meta ? meta.content : null; |
|
}''') |
|
|
|
h1_tags = await page.evaluate('''() => { |
|
return Array.from(document.querySelectorAll('h1')).map(h => h.textContent.trim()); |
|
}''') |
|
|
|
result["seo"] = { |
|
"title": title, |
|
"meta_description": meta_description, |
|
"h1_tags": h1_tags |
|
} |
|
|
|
|
|
images_without_alt = await page.evaluate('''() => { |
|
return Array.from(document.querySelectorAll('img:not([alt])')) |
|
.map(img => img.src); |
|
}''') |
|
|
|
result["accessibility"] = { |
|
"missing_alt_tags": len(images_without_alt), |
|
"images_without_alt": images_without_alt |
|
} |
|
|
|
|
|
has_cookie_banner = await page.evaluate('''() => { |
|
const keywords = ['cookie', 'gdpr', 'privacy', 'consent']; |
|
const elements = document.querySelectorAll('*'); |
|
for (let el of elements) { |
|
const text = el.textContent.toLowerCase(); |
|
if (keywords.some(kw => text.includes(kw))) { |
|
return true; |
|
} |
|
} |
|
return false; |
|
}''') |
|
|
|
result["compliance"] = { |
|
"has_cookie_banner": has_cookie_banner |
|
} |
|
|
|
|
|
tech_stack = [] |
|
|
|
|
|
libraries = await page.evaluate('''() => { |
|
const libs = []; |
|
if (window.jQuery) libs.push('jQuery'); |
|
if (window.React) libs.push('React'); |
|
if (window.Vue) libs.push('Vue'); |
|
if (window.angular) libs.push('Angular'); |
|
return libs; |
|
}''') |
|
|
|
tech_stack.extend(libraries) |
|
|
|
|
|
has_analytics = await page.evaluate('''() => { |
|
return !!document.querySelector('script[src*="google-analytics.com"], script[src*="googletagmanager.com"]'); |
|
}''') |
|
|
|
is_https = url.startswith('https://') |
|
|
|
result["technical"] = { |
|
"tech_stack": tech_stack, |
|
"is_https": is_https, |
|
"has_analytics": has_analytics |
|
} |
|
|
|
|
|
if options.check_broken_links and options.depth > 0: |
|
links = await page.evaluate('''() => { |
|
return Array.from(document.querySelectorAll('a[href]')).map(a => ({ |
|
href: a.href, |
|
text: a.textContent.trim() |
|
})); |
|
}''') |
|
|
|
|
|
domain = urlparse(url).netloc |
|
internal_links = [ |
|
link for link in links |
|
if link['href'].startswith('http') and domain in link['href'] |
|
][:10] |
|
|
|
broken_links = [] |
|
for link in internal_links: |
|
try: |
|
response = await page.goto(link['href'], wait_until="domcontentloaded") |
|
status = response.status if response else None |
|
if status and status >= 400: |
|
broken_links.append({ |
|
"url": link['href'], |
|
"status": status, |
|
"text": link['text'] |
|
}) |
|
except Exception as e: |
|
broken_links.append({ |
|
"url": link['href'], |
|
"status": None, |
|
"text": link['text'] |
|
}) |
|
|
|
result["broken_links"] = broken_links |
|
|
|
return result |
|
|
|
@app.post("/analyze", response_model=AnalysisResponse) |
|
async def analyze_website(request: AnalysisRequest): |
|
"""Analyze a website for quality and compliance metrics""" |
|
async with async_playwright() as p: |
|
try: |
|
browser = await p.chromium.launch() |
|
context = await browser.new_context() |
|
page = await context.new_page() |
|
|
|
|
|
start_time = asyncio.get_event_loop().time() |
|
|
|
|
|
response = await page.goto(request.url, wait_until="domcontentloaded") |
|
if not response or response.status >= 400: |
|
raise HTTPException(status_code=400, detail=f"Failed to load page. Status: {response.status if response else 'unknown'}") |
|
|
|
|
|
if request.mobile_test: |
|
mobile_viewport = {'width': 375, 'height': 667} |
|
await page.set_viewport_size(mobile_viewport) |
|
result = await analyze_page(page, request.url, request) |
|
result["mobile_friendly"] = True |
|
else: |
|
result = await analyze_page(page, request.url, request) |
|
|
|
|
|
if request.screenshot: |
|
screenshot = await page.screenshot(full_page=True) |
|
result["screenshot_base64"] = base64.b64encode(screenshot).decode('utf-8') |
|
|
|
|
|
end_time = asyncio.get_event_loop().time() |
|
result["load_time"] = end_time - start_time |
|
|
|
await browser.close() |
|
return result |
|
|
|
except Exception as e: |
|
logger.error(f"Error analyzing website: {str(e)}") |
|
await browser.close() |
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
@app.get("/analyze", response_model=AnalysisResponse) |
|
async def analyze_website_get( |
|
url: str = Query(..., description="URL to analyze"), |
|
screenshot: bool = Query(False, description="Include screenshot"), |
|
mobile_test: bool = Query(False, description="Test mobile responsiveness"), |
|
check_broken_links: bool = Query(False, description="Check for broken links"), |
|
depth: int = Query(1, description="Depth for broken links check") |
|
): |
|
"""GET endpoint for website analysis""" |
|
request = AnalysisRequest( |
|
url=url, |
|
screenshot=screenshot, |
|
mobile_test=mobile_test, |
|
check_broken_links=check_broken_links, |
|
depth=depth |
|
) |
|
return await analyze_website(request) |
|
|
|
if __name__ == "__main__": |
|
import uvicorn |
|
uvicorn.run(app, host="0.0.0.0", port=8000) |