File size: 8,669 Bytes
43614ad e736965 a8c57d0 e736965 43614ad 36e0003 e736965 36e0003 e736965 358f05c e736965 43614ad e736965 43614ad e736965 358f05c e736965 43614ad e736965 43614ad e736965 43614ad e736965 bcb8c1d e736965 43614ad e736965 43614ad e736965 43614ad e736965 43614ad e736965 43614ad e736965 43614ad e736965 358f05c e736965 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 |
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
from playwright.async_api import async_playwright
import asyncio
import base64
import logging
from typing import List, Optional
from urllib.parse import urlparse
app = FastAPI(
title="Website Quality & Compliance Analyzer",
description="API that analyzes websites for SEO, accessibility, compliance and technical quality",
version="1.0.0"
)
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class SEOResult(BaseModel):
title: Optional[str] = None
meta_description: Optional[str] = None
h1_tags: List[str] = []
canonical_url: Optional[str] = None
robots_txt_present: bool = False
sitemap_present: bool = False
class AccessibilityResult(BaseModel):
missing_alt_tags: int = 0
images_without_alt: List[str] = []
aria_roles: List[str] = []
contrast_issues: List[str] = []
class ComplianceResult(BaseModel):
has_cookie_banner: bool = False
gdpr_compliant: Optional[bool] = None
has_privacy_policy: bool = False
has_terms_of_service: bool = False
class TechnicalResult(BaseModel):
tech_stack: List[str] = []
viewport_meta: Optional[str] = None
doctype: Optional[str] = None
is_https: bool = False
has_analytics: bool = False
class BrokenLink(BaseModel):
url: str
status: Optional[int] = None
text: Optional[str] = None
class AnalysisRequest(BaseModel):
url: str
screenshot: bool = False
mobile_test: bool = False
check_broken_links: bool = False
depth: int = 1 # How many levels deep to check broken links
class AnalysisResponse(BaseModel):
url: str
seo: SEOResult
accessibility: AccessibilityResult
compliance: ComplianceResult
technical: TechnicalResult
broken_links: List[BrokenLink] = []
mobile_friendly: Optional[bool] = None
screenshot_base64: Optional[str] = None
load_time: Optional[float] = None
success: bool
error: Optional[str] = None
async def analyze_page(page, url: str, options: AnalysisRequest):
result = {
"url": url,
"seo": {},
"accessibility": {},
"compliance": {},
"technical": {},
"broken_links": [],
"success": True
}
# Basic SEO checks
title = await page.title()
meta_description = await page.evaluate('''() => {
const meta = document.querySelector('meta[name="description"]');
return meta ? meta.content : null;
}''')
h1_tags = await page.evaluate('''() => {
return Array.from(document.querySelectorAll('h1')).map(h => h.textContent.trim());
}''')
result["seo"] = {
"title": title,
"meta_description": meta_description,
"h1_tags": h1_tags
}
# Accessibility checks
images_without_alt = await page.evaluate('''() => {
return Array.from(document.querySelectorAll('img:not([alt])'))
.map(img => img.src);
}''')
result["accessibility"] = {
"missing_alt_tags": len(images_without_alt),
"images_without_alt": images_without_alt
}
# Compliance checks
has_cookie_banner = await page.evaluate('''() => {
const keywords = ['cookie', 'gdpr', 'privacy', 'consent'];
const elements = document.querySelectorAll('*');
for (let el of elements) {
const text = el.textContent.toLowerCase();
if (keywords.some(kw => text.includes(kw))) {
return true;
}
}
return false;
}''')
result["compliance"] = {
"has_cookie_banner": has_cookie_banner
}
# Technical checks
tech_stack = []
# Check for common JS libraries
libraries = await page.evaluate('''() => {
const libs = [];
if (window.jQuery) libs.push('jQuery');
if (window.React) libs.push('React');
if (window.Vue) libs.push('Vue');
if (window.angular) libs.push('Angular');
return libs;
}''')
tech_stack.extend(libraries)
# Check for analytics
has_analytics = await page.evaluate('''() => {
return !!document.querySelector('script[src*="google-analytics.com"], script[src*="googletagmanager.com"]');
}''')
is_https = url.startswith('https://')
result["technical"] = {
"tech_stack": tech_stack,
"is_https": is_https,
"has_analytics": has_analytics
}
# Broken links check (if requested)
if options.check_broken_links and options.depth > 0:
links = await page.evaluate('''() => {
return Array.from(document.querySelectorAll('a[href]')).map(a => ({
href: a.href,
text: a.textContent.trim()
}));
}''')
# Filter out external links and non-http links
domain = urlparse(url).netloc
internal_links = [
link for link in links
if link['href'].startswith('http') and domain in link['href']
][:10] # Limit to 10 links for demo purposes
broken_links = []
for link in internal_links:
try:
response = await page.goto(link['href'], wait_until="domcontentloaded")
status = response.status if response else None
if status and status >= 400:
broken_links.append({
"url": link['href'],
"status": status,
"text": link['text']
})
except Exception as e:
broken_links.append({
"url": link['href'],
"status": None,
"text": link['text']
})
result["broken_links"] = broken_links
return result
@app.post("/analyze", response_model=AnalysisResponse)
async def analyze_website(request: AnalysisRequest):
"""Analyze a website for quality and compliance metrics"""
async with async_playwright() as p:
try:
browser = await p.chromium.launch()
context = await browser.new_context()
page = await context.new_page()
# Start timing
start_time = asyncio.get_event_loop().time()
# Navigate to the page
response = await page.goto(request.url, wait_until="domcontentloaded")
if not response or response.status >= 400:
raise HTTPException(status_code=400, detail=f"Failed to load page. Status: {response.status if response else 'unknown'}")
# Mobile test if requested
if request.mobile_test:
mobile_viewport = {'width': 375, 'height': 667}
await page.set_viewport_size(mobile_viewport)
result = await analyze_page(page, request.url, request)
result["mobile_friendly"] = True # Basic check - would need more sophisticated testing
else:
result = await analyze_page(page, request.url, request)
# Screenshot if requested
if request.screenshot:
screenshot = await page.screenshot(full_page=True)
result["screenshot_base64"] = base64.b64encode(screenshot).decode('utf-8')
# Calculate load time
end_time = asyncio.get_event_loop().time()
result["load_time"] = end_time - start_time
await browser.close()
return result
except Exception as e:
logger.error(f"Error analyzing website: {str(e)}")
await browser.close()
raise HTTPException(status_code=500, detail=str(e))
@app.get("/analyze", response_model=AnalysisResponse)
async def analyze_website_get(
url: str = Query(..., description="URL to analyze"),
screenshot: bool = Query(False, description="Include screenshot"),
mobile_test: bool = Query(False, description="Test mobile responsiveness"),
check_broken_links: bool = Query(False, description="Check for broken links"),
depth: int = Query(1, description="Depth for broken links check")
):
"""GET endpoint for website analysis"""
request = AnalysisRequest(
url=url,
screenshot=screenshot,
mobile_test=mobile_test,
check_broken_links=check_broken_links,
depth=depth
)
return await analyze_website(request)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000) |