from fastapi import FastAPI, HTTPException, Query from pydantic import BaseModel from playwright.async_api import async_playwright import asyncio import base64 import logging from typing import List, Optional from urllib.parse import urlparse app = FastAPI( title="Website Quality & Compliance Analyzer", description="API that analyzes websites for SEO, accessibility, compliance and technical quality", version="1.0.0" ) # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class SEOResult(BaseModel): title: Optional[str] = None meta_description: Optional[str] = None h1_tags: List[str] = [] canonical_url: Optional[str] = None robots_txt_present: bool = False sitemap_present: bool = False class AccessibilityResult(BaseModel): missing_alt_tags: int = 0 images_without_alt: List[str] = [] aria_roles: List[str] = [] contrast_issues: List[str] = [] class ComplianceResult(BaseModel): has_cookie_banner: bool = False gdpr_compliant: Optional[bool] = None has_privacy_policy: bool = False has_terms_of_service: bool = False class TechnicalResult(BaseModel): tech_stack: List[str] = [] viewport_meta: Optional[str] = None doctype: Optional[str] = None is_https: bool = False has_analytics: bool = False class BrokenLink(BaseModel): url: str status: Optional[int] = None text: Optional[str] = None class AnalysisRequest(BaseModel): url: str screenshot: bool = False mobile_test: bool = False check_broken_links: bool = False depth: int = 1 # How many levels deep to check broken links class AnalysisResponse(BaseModel): url: str seo: SEOResult accessibility: AccessibilityResult compliance: ComplianceResult technical: TechnicalResult broken_links: List[BrokenLink] = [] mobile_friendly: Optional[bool] = None screenshot_base64: Optional[str] = None load_time: Optional[float] = None success: bool error: Optional[str] = None async def analyze_page(page, url: str, options: AnalysisRequest): result = { "url": url, "seo": {}, "accessibility": {}, "compliance": {}, "technical": {}, "broken_links": [], "success": True } # Basic SEO checks title = await page.title() meta_description = await page.evaluate('''() => { const meta = document.querySelector('meta[name="description"]'); return meta ? meta.content : null; }''') h1_tags = await page.evaluate('''() => { return Array.from(document.querySelectorAll('h1')).map(h => h.textContent.trim()); }''') result["seo"] = { "title": title, "meta_description": meta_description, "h1_tags": h1_tags } # Accessibility checks images_without_alt = await page.evaluate('''() => { return Array.from(document.querySelectorAll('img:not([alt])')) .map(img => img.src); }''') result["accessibility"] = { "missing_alt_tags": len(images_without_alt), "images_without_alt": images_without_alt } # Compliance checks has_cookie_banner = await page.evaluate('''() => { const keywords = ['cookie', 'gdpr', 'privacy', 'consent']; const elements = document.querySelectorAll('*'); for (let el of elements) { const text = el.textContent.toLowerCase(); if (keywords.some(kw => text.includes(kw))) { return true; } } return false; }''') result["compliance"] = { "has_cookie_banner": has_cookie_banner } # Technical checks tech_stack = [] # Check for common JS libraries libraries = await page.evaluate('''() => { const libs = []; if (window.jQuery) libs.push('jQuery'); if (window.React) libs.push('React'); if (window.Vue) libs.push('Vue'); if (window.angular) libs.push('Angular'); return libs; }''') tech_stack.extend(libraries) # Check for analytics has_analytics = await page.evaluate('''() => { return !!document.querySelector('script[src*="google-analytics.com"], script[src*="googletagmanager.com"]'); }''') is_https = url.startswith('https://') result["technical"] = { "tech_stack": tech_stack, "is_https": is_https, "has_analytics": has_analytics } # Broken links check (if requested) if options.check_broken_links and options.depth > 0: links = await page.evaluate('''() => { return Array.from(document.querySelectorAll('a[href]')).map(a => ({ href: a.href, text: a.textContent.trim() })); }''') # Filter out external links and non-http links domain = urlparse(url).netloc internal_links = [ link for link in links if link['href'].startswith('http') and domain in link['href'] ][:10] # Limit to 10 links for demo purposes broken_links = [] for link in internal_links: try: response = await page.goto(link['href'], wait_until="domcontentloaded") status = response.status if response else None if status and status >= 400: broken_links.append({ "url": link['href'], "status": status, "text": link['text'] }) except Exception as e: broken_links.append({ "url": link['href'], "status": None, "text": link['text'] }) result["broken_links"] = broken_links return result @app.post("/analyze", response_model=AnalysisResponse) async def analyze_website(request: AnalysisRequest): """Analyze a website for quality and compliance metrics""" async with async_playwright() as p: try: browser = await p.chromium.launch() context = await browser.new_context() page = await context.new_page() # Start timing start_time = asyncio.get_event_loop().time() # Navigate to the page response = await page.goto(request.url, wait_until="domcontentloaded") if not response or response.status >= 400: raise HTTPException(status_code=400, detail=f"Failed to load page. Status: {response.status if response else 'unknown'}") # Mobile test if requested if request.mobile_test: mobile_viewport = {'width': 375, 'height': 667} await page.set_viewport_size(mobile_viewport) result = await analyze_page(page, request.url, request) result["mobile_friendly"] = True # Basic check - would need more sophisticated testing else: result = await analyze_page(page, request.url, request) # Screenshot if requested if request.screenshot: screenshot = await page.screenshot(full_page=True) result["screenshot_base64"] = base64.b64encode(screenshot).decode('utf-8') # Calculate load time end_time = asyncio.get_event_loop().time() result["load_time"] = end_time - start_time await browser.close() return result except Exception as e: logger.error(f"Error analyzing website: {str(e)}") await browser.close() raise HTTPException(status_code=500, detail=str(e)) @app.get("/analyze", response_model=AnalysisResponse) async def analyze_website_get( url: str = Query(..., description="URL to analyze"), screenshot: bool = Query(False, description="Include screenshot"), mobile_test: bool = Query(False, description="Test mobile responsiveness"), check_broken_links: bool = Query(False, description="Check for broken links"), depth: int = Query(1, description="Depth for broken links check") ): """GET endpoint for website analysis""" request = AnalysisRequest( url=url, screenshot=screenshot, mobile_test=mobile_test, check_broken_links=check_broken_links, depth=depth ) return await analyze_website(request) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)