File size: 8,669 Bytes
43614ad
 
 
 
 
 
 
e736965
a8c57d0
e736965
 
 
 
 
 
 
43614ad
 
36e0003
e736965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36e0003
e736965
 
 
 
 
 
358f05c
e736965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43614ad
e736965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43614ad
e736965
 
 
358f05c
 
e736965
 
 
43614ad
e736965
 
43614ad
e736965
 
 
 
43614ad
e736965
 
 
 
 
 
 
 
bcb8c1d
e736965
 
 
 
43614ad
e736965
 
 
43614ad
 
e736965
 
43614ad
e736965
43614ad
e736965
43614ad
e736965
 
 
 
 
 
 
43614ad
e736965
 
 
 
 
 
 
 
 
358f05c
e736965
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
from playwright.async_api import async_playwright
import asyncio
import base64
import logging
from typing import List, Optional
from urllib.parse import urlparse

app = FastAPI(
    title="Website Quality & Compliance Analyzer",
    description="API that analyzes websites for SEO, accessibility, compliance and technical quality",
    version="1.0.0"
)

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class SEOResult(BaseModel):
    title: Optional[str] = None
    meta_description: Optional[str] = None
    h1_tags: List[str] = []
    canonical_url: Optional[str] = None
    robots_txt_present: bool = False
    sitemap_present: bool = False

class AccessibilityResult(BaseModel):
    missing_alt_tags: int = 0
    images_without_alt: List[str] = []
    aria_roles: List[str] = []
    contrast_issues: List[str] = []

class ComplianceResult(BaseModel):
    has_cookie_banner: bool = False
    gdpr_compliant: Optional[bool] = None
    has_privacy_policy: bool = False
    has_terms_of_service: bool = False

class TechnicalResult(BaseModel):
    tech_stack: List[str] = []
    viewport_meta: Optional[str] = None
    doctype: Optional[str] = None
    is_https: bool = False
    has_analytics: bool = False

class BrokenLink(BaseModel):
    url: str
    status: Optional[int] = None
    text: Optional[str] = None

class AnalysisRequest(BaseModel):
    url: str
    screenshot: bool = False
    mobile_test: bool = False
    check_broken_links: bool = False
    depth: int = 1  # How many levels deep to check broken links

class AnalysisResponse(BaseModel):
    url: str
    seo: SEOResult
    accessibility: AccessibilityResult
    compliance: ComplianceResult
    technical: TechnicalResult
    broken_links: List[BrokenLink] = []
    mobile_friendly: Optional[bool] = None
    screenshot_base64: Optional[str] = None
    load_time: Optional[float] = None
    success: bool
    error: Optional[str] = None

async def analyze_page(page, url: str, options: AnalysisRequest):
    result = {
        "url": url,
        "seo": {},
        "accessibility": {},
        "compliance": {},
        "technical": {},
        "broken_links": [],
        "success": True
    }
    
    # Basic SEO checks
    title = await page.title()
    meta_description = await page.evaluate('''() => {
        const meta = document.querySelector('meta[name="description"]');
        return meta ? meta.content : null;
    }''')
    
    h1_tags = await page.evaluate('''() => {
        return Array.from(document.querySelectorAll('h1')).map(h => h.textContent.trim());
    }''')
    
    result["seo"] = {
        "title": title,
        "meta_description": meta_description,
        "h1_tags": h1_tags
    }
    
    # Accessibility checks
    images_without_alt = await page.evaluate('''() => {
        return Array.from(document.querySelectorAll('img:not([alt])'))
            .map(img => img.src);
    }''')
    
    result["accessibility"] = {
        "missing_alt_tags": len(images_without_alt),
        "images_without_alt": images_without_alt
    }
    
    # Compliance checks
    has_cookie_banner = await page.evaluate('''() => {
        const keywords = ['cookie', 'gdpr', 'privacy', 'consent'];
        const elements = document.querySelectorAll('*');
        for (let el of elements) {
            const text = el.textContent.toLowerCase();
            if (keywords.some(kw => text.includes(kw))) {
                return true;
            }
        }
        return false;
    }''')
    
    result["compliance"] = {
        "has_cookie_banner": has_cookie_banner
    }
    
    # Technical checks
    tech_stack = []
    
    # Check for common JS libraries
    libraries = await page.evaluate('''() => {
        const libs = [];
        if (window.jQuery) libs.push('jQuery');
        if (window.React) libs.push('React');
        if (window.Vue) libs.push('Vue');
        if (window.angular) libs.push('Angular');
        return libs;
    }''')
    
    tech_stack.extend(libraries)
    
    # Check for analytics
    has_analytics = await page.evaluate('''() => {
        return !!document.querySelector('script[src*="google-analytics.com"], script[src*="googletagmanager.com"]');
    }''')
    
    is_https = url.startswith('https://')
    
    result["technical"] = {
        "tech_stack": tech_stack,
        "is_https": is_https,
        "has_analytics": has_analytics
    }
    
    # Broken links check (if requested)
    if options.check_broken_links and options.depth > 0:
        links = await page.evaluate('''() => {
            return Array.from(document.querySelectorAll('a[href]')).map(a => ({
                href: a.href,
                text: a.textContent.trim()
            }));
        }''')
        
        # Filter out external links and non-http links
        domain = urlparse(url).netloc
        internal_links = [
            link for link in links 
            if link['href'].startswith('http') and domain in link['href']
        ][:10]  # Limit to 10 links for demo purposes
        
        broken_links = []
        for link in internal_links:
            try:
                response = await page.goto(link['href'], wait_until="domcontentloaded")
                status = response.status if response else None
                if status and status >= 400:
                    broken_links.append({
                        "url": link['href'],
                        "status": status,
                        "text": link['text']
                    })
            except Exception as e:
                broken_links.append({
                    "url": link['href'],
                    "status": None,
                    "text": link['text']
                })
        
        result["broken_links"] = broken_links
    
    return result

@app.post("/analyze", response_model=AnalysisResponse)
async def analyze_website(request: AnalysisRequest):
    """Analyze a website for quality and compliance metrics"""
    async with async_playwright() as p:
        try:
            browser = await p.chromium.launch()
            context = await browser.new_context()
            page = await context.new_page()
            
            # Start timing
            start_time = asyncio.get_event_loop().time()
            
            # Navigate to the page
            response = await page.goto(request.url, wait_until="domcontentloaded")
            if not response or response.status >= 400:
                raise HTTPException(status_code=400, detail=f"Failed to load page. Status: {response.status if response else 'unknown'}")
            
            # Mobile test if requested
            if request.mobile_test:
                mobile_viewport = {'width': 375, 'height': 667}
                await page.set_viewport_size(mobile_viewport)
                result = await analyze_page(page, request.url, request)
                result["mobile_friendly"] = True  # Basic check - would need more sophisticated testing
            else:
                result = await analyze_page(page, request.url, request)
            
            # Screenshot if requested
            if request.screenshot:
                screenshot = await page.screenshot(full_page=True)
                result["screenshot_base64"] = base64.b64encode(screenshot).decode('utf-8')
            
            # Calculate load time
            end_time = asyncio.get_event_loop().time()
            result["load_time"] = end_time - start_time
            
            await browser.close()
            return result
            
        except Exception as e:
            logger.error(f"Error analyzing website: {str(e)}")
            await browser.close()
            raise HTTPException(status_code=500, detail=str(e))

@app.get("/analyze", response_model=AnalysisResponse)
async def analyze_website_get(
    url: str = Query(..., description="URL to analyze"),
    screenshot: bool = Query(False, description="Include screenshot"),
    mobile_test: bool = Query(False, description="Test mobile responsiveness"),
    check_broken_links: bool = Query(False, description="Check for broken links"),
    depth: int = Query(1, description="Depth for broken links check")
):
    """GET endpoint for website analysis"""
    request = AnalysisRequest(
        url=url,
        screenshot=screenshot,
        mobile_test=mobile_test,
        check_broken_links=check_broken_links,
        depth=depth
    )
    return await analyze_website(request)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)