apexherbert200 commited on
Commit
be7cc52
·
1 Parent(s): dd2c937

Using google search

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. test1.py +35 -250
Dockerfile CHANGED
@@ -53,4 +53,4 @@ RUN python -m playwright install chromium
53
  EXPOSE 7860
54
 
55
  # Run the FastAPI application
56
- CMD ["python", "-m", "uvicorn", "webrify:app", "--host", "0.0.0.0", "--port", "7860"]
 
53
  EXPOSE 7860
54
 
55
  # Run the FastAPI application
56
+ CMD ["python", "-m", "uvicorn", "test1:app", "--host", "0.0.0.0", "--port", "7860"]
test1.py CHANGED
@@ -1,257 +1,42 @@
1
- from fastapi import FastAPI, HTTPException, Query
2
- from pydantic import BaseModel
3
  from playwright.async_api import async_playwright
4
- import asyncio
5
- import base64
6
- import logging
7
- from typing import List, Optional
8
- from urllib.parse import urlparse
9
 
10
- app = FastAPI(
11
- title="Website Quality & Compliance Analyzer",
12
- description="API that analyzes websites for SEO, accessibility, compliance and technical quality",
13
- version="1.0.0"
14
- )
15
 
16
- # Configure logging
17
- logging.basicConfig(level=logging.INFO)
18
- logger = logging.getLogger(__name__)
19
-
20
- class SEOResult(BaseModel):
21
- title: Optional[str] = None
22
- meta_description: Optional[str] = None
23
- h1_tags: List[str] = []
24
- canonical_url: Optional[str] = None
25
- robots_txt_present: bool = False
26
- sitemap_present: bool = False
27
-
28
- class AccessibilityResult(BaseModel):
29
- missing_alt_tags: int = 0
30
- images_without_alt: List[str] = []
31
- aria_roles: List[str] = []
32
- contrast_issues: List[str] = []
33
-
34
- class ComplianceResult(BaseModel):
35
- has_cookie_banner: bool = False
36
- gdpr_compliant: Optional[bool] = None
37
- has_privacy_policy: bool = False
38
- has_terms_of_service: bool = False
39
-
40
- class TechnicalResult(BaseModel):
41
- tech_stack: List[str] = []
42
- viewport_meta: Optional[str] = None
43
- doctype: Optional[str] = None
44
- is_https: bool = False
45
- has_analytics: bool = False
46
-
47
- class BrokenLink(BaseModel):
48
- url: str
49
- status: Optional[int] = None
50
- text: Optional[str] = None
51
-
52
- class AnalysisRequest(BaseModel):
53
- url: str
54
- screenshot: bool = False
55
- mobile_test: bool = False
56
- check_broken_links: bool = False
57
- depth: int = 1 # How many levels deep to check broken links
58
-
59
- class AnalysisResponse(BaseModel):
60
- url: str
61
- seo: SEOResult
62
- accessibility: AccessibilityResult
63
- compliance: ComplianceResult
64
- technical: TechnicalResult
65
- broken_links: List[BrokenLink] = []
66
- mobile_friendly: Optional[bool] = None
67
- screenshot_base64: Optional[str] = None
68
- load_time: Optional[float] = None
69
- success: bool
70
- error: Optional[str] = None
71
-
72
- async def analyze_page(page, url: str, options: AnalysisRequest):
73
- result = {
74
- "url": url,
75
- "seo": {},
76
- "accessibility": {},
77
- "compliance": {},
78
- "technical": {},
79
- "broken_links": [],
80
- "success": True
81
- }
82
-
83
- # Basic SEO checks
84
- title = await page.title()
85
- meta_description = await page.evaluate('''() => {
86
- const meta = document.querySelector('meta[name="description"]');
87
- return meta ? meta.content : null;
88
- }''')
89
-
90
- h1_tags = await page.evaluate('''() => {
91
- return Array.from(document.querySelectorAll('h1')).map(h => h.textContent.trim());
92
- }''')
93
-
94
- result["seo"] = {
95
- "title": title,
96
- "meta_description": meta_description,
97
- "h1_tags": h1_tags
98
- }
99
-
100
- # Accessibility checks
101
- images_without_alt = await page.evaluate('''() => {
102
- return Array.from(document.querySelectorAll('img:not([alt])'))
103
- .map(img => img.src);
104
- }''')
105
-
106
- result["accessibility"] = {
107
- "missing_alt_tags": len(images_without_alt),
108
- "images_without_alt": images_without_alt
109
- }
110
-
111
- # Compliance checks
112
- has_cookie_banner = await page.evaluate('''() => {
113
- const keywords = ['cookie', 'gdpr', 'privacy', 'consent'];
114
- const elements = document.querySelectorAll('*');
115
- for (let el of elements) {
116
- const text = el.textContent.toLowerCase();
117
- if (keywords.some(kw => text.includes(kw))) {
118
- return true;
119
- }
120
- }
121
- return false;
122
- }''')
123
-
124
- result["compliance"] = {
125
- "has_cookie_banner": has_cookie_banner
126
- }
127
-
128
- # Technical checks
129
- tech_stack = []
130
-
131
- # Check for common JS libraries
132
- libraries = await page.evaluate('''() => {
133
- const libs = [];
134
- if (window.jQuery) libs.push('jQuery');
135
- if (window.React) libs.push('React');
136
- if (window.Vue) libs.push('Vue');
137
- if (window.angular) libs.push('Angular');
138
- return libs;
139
- }''')
140
-
141
- tech_stack.extend(libraries)
142
-
143
- # Check for analytics
144
- has_analytics = await page.evaluate('''() => {
145
- return !!document.querySelector('script[src*="google-analytics.com"], script[src*="googletagmanager.com"]');
146
- }''')
147
-
148
- is_https = url.startswith('https://')
149
-
150
- result["technical"] = {
151
- "tech_stack": tech_stack,
152
- "is_https": is_https,
153
- "has_analytics": has_analytics
154
- }
155
-
156
- # Broken links check (if requested)
157
- if options.check_broken_links and options.depth > 0:
158
- links = await page.evaluate('''() => {
159
- return Array.from(document.querySelectorAll('a[href]')).map(a => ({
160
- href: a.href,
161
- text: a.textContent.trim()
162
- }));
163
- }''')
164
-
165
- # Filter out external links and non-http links
166
- domain = urlparse(url).netloc
167
- internal_links = [
168
- link for link in links
169
- if link['href'].startswith('http') and domain in link['href']
170
- ][:10] # Limit to 10 links for demo purposes
171
-
172
- broken_links = []
173
- for link in internal_links:
174
- try:
175
- response = await page.goto(link['href'], wait_until="domcontentloaded")
176
- status = response.status if response else None
177
- if status and status >= 400:
178
- broken_links.append({
179
- "url": link['href'],
180
- "status": status,
181
- "text": link['text']
182
- })
183
- except Exception as e:
184
- broken_links.append({
185
- "url": link['href'],
186
- "status": None,
187
- "text": link['text']
188
- })
189
-
190
- result["broken_links"] = broken_links
191
-
192
- return result
193
 
194
- @app.post("/analyze", response_model=AnalysisResponse)
195
- async def analyze_website(request: AnalysisRequest):
196
- """Analyze a website for quality and compliance metrics"""
197
- async with async_playwright() as p:
198
  try:
199
- browser = await p.chromium.launch()
200
- context = await browser.new_context()
201
- page = await context.new_page()
202
-
203
- # Start timing
204
- start_time = asyncio.get_event_loop().time()
205
-
206
- # Navigate to the page
207
- response = await page.goto(request.url, wait_until="domcontentloaded")
208
- if not response or response.status >= 400:
209
- raise HTTPException(status_code=400, detail=f"Failed to load page. Status: {response.status if response else 'unknown'}")
210
-
211
- # Mobile test if requested
212
- if request.mobile_test:
213
- mobile_viewport = {'width': 375, 'height': 667}
214
- await page.set_viewport_size(mobile_viewport)
215
- result = await analyze_page(page, request.url, request)
216
- result["mobile_friendly"] = True # Basic check - would need more sophisticated testing
217
- else:
218
- result = await analyze_page(page, request.url, request)
219
-
220
- # Screenshot if requested
221
- if request.screenshot:
222
- screenshot = await page.screenshot(full_page=True)
223
- result["screenshot_base64"] = base64.b64encode(screenshot).decode('utf-8')
224
-
225
- # Calculate load time
226
- end_time = asyncio.get_event_loop().time()
227
- result["load_time"] = end_time - start_time
228
-
229
- await browser.close()
230
- return result
231
-
232
- except Exception as e:
233
- logger.error(f"Error analyzing website: {str(e)}")
234
- await browser.close()
235
- raise HTTPException(status_code=500, detail=str(e))
236
 
237
- @app.get("/analyze", response_model=AnalysisResponse)
238
- async def analyze_website_get(
239
- url: str = Query(..., description="URL to analyze"),
240
- screenshot: bool = Query(False, description="Include screenshot"),
241
- mobile_test: bool = Query(False, description="Test mobile responsiveness"),
242
- check_broken_links: bool = Query(False, description="Check for broken links"),
243
- depth: int = Query(1, description="Depth for broken links check")
244
- ):
245
- """GET endpoint for website analysis"""
246
- request = AnalysisRequest(
247
- url=url,
248
- screenshot=screenshot,
249
- mobile_test=mobile_test,
250
- check_broken_links=check_broken_links,
251
- depth=depth
252
- )
253
- return await analyze_website(request)
254
 
255
- if __name__ == "__main__":
256
- import uvicorn
257
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
 
2
  from playwright.async_api import async_playwright
 
 
 
 
 
3
 
4
+ app = FastAPI()
 
 
 
 
5
 
6
+ async def scrape_google(query: str):
7
+ url = (
8
+ "https://www.google.com/search"
9
+ f"?q={query}"
10
+ "&sxsrf=AE3TifOZcTbH54cOkE27wqRqSVEmaqb7fw%3A1750003707838"
11
+ )
12
+ async with async_playwright() as pw:
13
+ browser = await pw.chromium.launch(headless=True)
14
+ context = await browser.new_context()
15
+ page = await context.new_page()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Accept cookie/consent pop-ups
 
 
 
18
  try:
19
+ btn = await page.wait_for_selector('button:has-text("I agree")', timeout=5000)
20
+ await btn.click()
21
+ except:
22
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ await page.goto(url, wait_until="domcontentloaded")
25
+ await page.wait_for_selector("h3")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ results = []
28
+ for h in await page.query_selector_all("h3"):
29
+ try:
30
+ link = await h.evaluate("(e) => e.closest('a').href")
31
+ title = await h.inner_text()
32
+ results.append({"title": title, "link": link})
33
+ except:
34
+ continue
35
+
36
+ await browser.close()
37
+ return results
38
+
39
+ @app.get("/search")
40
+ async def search(query: str):
41
+ data = await scrape_google(query.replace(" ", "+"))
42
+ return {"query": query, "results": data}