apexherbert200's picture
Building new logic
e736965
raw
history blame
8.67 kB
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
from playwright.async_api import async_playwright
import asyncio
import base64
import logging
from typing import List, Optional
from urllib.parse import urlparse
app = FastAPI(
title="Website Quality & Compliance Analyzer",
description="API that analyzes websites for SEO, accessibility, compliance and technical quality",
version="1.0.0"
)
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class SEOResult(BaseModel):
title: Optional[str] = None
meta_description: Optional[str] = None
h1_tags: List[str] = []
canonical_url: Optional[str] = None
robots_txt_present: bool = False
sitemap_present: bool = False
class AccessibilityResult(BaseModel):
missing_alt_tags: int = 0
images_without_alt: List[str] = []
aria_roles: List[str] = []
contrast_issues: List[str] = []
class ComplianceResult(BaseModel):
has_cookie_banner: bool = False
gdpr_compliant: Optional[bool] = None
has_privacy_policy: bool = False
has_terms_of_service: bool = False
class TechnicalResult(BaseModel):
tech_stack: List[str] = []
viewport_meta: Optional[str] = None
doctype: Optional[str] = None
is_https: bool = False
has_analytics: bool = False
class BrokenLink(BaseModel):
url: str
status: Optional[int] = None
text: Optional[str] = None
class AnalysisRequest(BaseModel):
url: str
screenshot: bool = False
mobile_test: bool = False
check_broken_links: bool = False
depth: int = 1 # How many levels deep to check broken links
class AnalysisResponse(BaseModel):
url: str
seo: SEOResult
accessibility: AccessibilityResult
compliance: ComplianceResult
technical: TechnicalResult
broken_links: List[BrokenLink] = []
mobile_friendly: Optional[bool] = None
screenshot_base64: Optional[str] = None
load_time: Optional[float] = None
success: bool
error: Optional[str] = None
async def analyze_page(page, url: str, options: AnalysisRequest):
result = {
"url": url,
"seo": {},
"accessibility": {},
"compliance": {},
"technical": {},
"broken_links": [],
"success": True
}
# Basic SEO checks
title = await page.title()
meta_description = await page.evaluate('''() => {
const meta = document.querySelector('meta[name="description"]');
return meta ? meta.content : null;
}''')
h1_tags = await page.evaluate('''() => {
return Array.from(document.querySelectorAll('h1')).map(h => h.textContent.trim());
}''')
result["seo"] = {
"title": title,
"meta_description": meta_description,
"h1_tags": h1_tags
}
# Accessibility checks
images_without_alt = await page.evaluate('''() => {
return Array.from(document.querySelectorAll('img:not([alt])'))
.map(img => img.src);
}''')
result["accessibility"] = {
"missing_alt_tags": len(images_without_alt),
"images_without_alt": images_without_alt
}
# Compliance checks
has_cookie_banner = await page.evaluate('''() => {
const keywords = ['cookie', 'gdpr', 'privacy', 'consent'];
const elements = document.querySelectorAll('*');
for (let el of elements) {
const text = el.textContent.toLowerCase();
if (keywords.some(kw => text.includes(kw))) {
return true;
}
}
return false;
}''')
result["compliance"] = {
"has_cookie_banner": has_cookie_banner
}
# Technical checks
tech_stack = []
# Check for common JS libraries
libraries = await page.evaluate('''() => {
const libs = [];
if (window.jQuery) libs.push('jQuery');
if (window.React) libs.push('React');
if (window.Vue) libs.push('Vue');
if (window.angular) libs.push('Angular');
return libs;
}''')
tech_stack.extend(libraries)
# Check for analytics
has_analytics = await page.evaluate('''() => {
return !!document.querySelector('script[src*="google-analytics.com"], script[src*="googletagmanager.com"]');
}''')
is_https = url.startswith('https://')
result["technical"] = {
"tech_stack": tech_stack,
"is_https": is_https,
"has_analytics": has_analytics
}
# Broken links check (if requested)
if options.check_broken_links and options.depth > 0:
links = await page.evaluate('''() => {
return Array.from(document.querySelectorAll('a[href]')).map(a => ({
href: a.href,
text: a.textContent.trim()
}));
}''')
# Filter out external links and non-http links
domain = urlparse(url).netloc
internal_links = [
link for link in links
if link['href'].startswith('http') and domain in link['href']
][:10] # Limit to 10 links for demo purposes
broken_links = []
for link in internal_links:
try:
response = await page.goto(link['href'], wait_until="domcontentloaded")
status = response.status if response else None
if status and status >= 400:
broken_links.append({
"url": link['href'],
"status": status,
"text": link['text']
})
except Exception as e:
broken_links.append({
"url": link['href'],
"status": None,
"text": link['text']
})
result["broken_links"] = broken_links
return result
@app.post("/analyze", response_model=AnalysisResponse)
async def analyze_website(request: AnalysisRequest):
"""Analyze a website for quality and compliance metrics"""
async with async_playwright() as p:
try:
browser = await p.chromium.launch()
context = await browser.new_context()
page = await context.new_page()
# Start timing
start_time = asyncio.get_event_loop().time()
# Navigate to the page
response = await page.goto(request.url, wait_until="domcontentloaded")
if not response or response.status >= 400:
raise HTTPException(status_code=400, detail=f"Failed to load page. Status: {response.status if response else 'unknown'}")
# Mobile test if requested
if request.mobile_test:
mobile_viewport = {'width': 375, 'height': 667}
await page.set_viewport_size(mobile_viewport)
result = await analyze_page(page, request.url, request)
result["mobile_friendly"] = True # Basic check - would need more sophisticated testing
else:
result = await analyze_page(page, request.url, request)
# Screenshot if requested
if request.screenshot:
screenshot = await page.screenshot(full_page=True)
result["screenshot_base64"] = base64.b64encode(screenshot).decode('utf-8')
# Calculate load time
end_time = asyncio.get_event_loop().time()
result["load_time"] = end_time - start_time
await browser.close()
return result
except Exception as e:
logger.error(f"Error analyzing website: {str(e)}")
await browser.close()
raise HTTPException(status_code=500, detail=str(e))
@app.get("/analyze", response_model=AnalysisResponse)
async def analyze_website_get(
url: str = Query(..., description="URL to analyze"),
screenshot: bool = Query(False, description="Include screenshot"),
mobile_test: bool = Query(False, description="Test mobile responsiveness"),
check_broken_links: bool = Query(False, description="Check for broken links"),
depth: int = Query(1, description="Depth for broken links check")
):
"""GET endpoint for website analysis"""
request = AnalysisRequest(
url=url,
screenshot=screenshot,
mobile_test=mobile_test,
check_broken_links=check_broken_links,
depth=depth
)
return await analyze_website(request)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)