apexherbert200 commited on
Commit
f089137
·
1 Parent(s): 5611064

Working /experimenting

Browse files
Files changed (1) hide show
  1. screenshot.py +68 -23
screenshot.py CHANGED
@@ -1,11 +1,10 @@
1
- # main.py
2
- from fastapi import FastAPI, HTTPException, Query
3
  from pydantic import BaseModel
4
- from typing import List, Optional
5
  import base64
6
  import json
7
  from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
8
- import asyncio
9
 
10
  app = FastAPI(title="Web Analyzer API")
11
 
@@ -37,23 +36,42 @@ async def get_metadata(url: str):
37
  page, browser, pw = await get_page(url)
38
  try:
39
  title = await page.title()
40
- desc = await page.get_attribute("meta[name='description']", "content")
41
- og = {}
42
- twitter = {}
 
 
 
 
 
43
  og = {}
44
  for prop in ["title", "description", "image"]:
45
- selector = f"meta[property='og:{prop}']"
46
  try:
 
47
  if await page.query_selector(selector):
48
  og[f"og:{prop}"] = await page.get_attribute(selector, "content")
49
  else:
50
  og[f"og:{prop}"] = None
51
- except Exception as e:
52
  og[f"og:{prop}"] = None
53
 
54
- og[f"og:{prop}"] = await page.get_attribute(f"meta[property='og:{prop}']", "content")
55
- twitter[f"twitter:{prop}"] = await page.get_attribute(f"meta[name='twitter:{prop}']", "content")
56
- canonical = await page.get_attribute("link[rel='canonical']", "href")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  return {
58
  "title": title,
59
  "description": desc,
@@ -94,8 +112,15 @@ async def seo_audit(url: str):
94
  internal += 1
95
  else:
96
  external += 1
97
- robots = await page.get_attribute("meta[name='robots']", "content")
98
- canonical = await page.get_attribute("link[rel='canonical']", "href")
 
 
 
 
 
 
 
99
  return {
100
  "h1_count": h1_count,
101
  "missing_image_alts": missing_alts,
@@ -108,19 +133,39 @@ async def seo_audit(url: str):
108
  await browser.close()
109
  await pw.stop()
110
 
111
-
112
  @app.get("/performance")
113
  async def performance_metrics(url: str):
114
  page, browser, pw = await get_page(url)
115
  try:
116
- nav_timing = await page.evaluate("JSON.stringify(performance.getEntriesByType('navigation'))")
117
- timing = json.loads(nav_timing)[0]
118
- fcp = await page.evaluate("performance.getEntriesByName('first-contentful-paint')[0]?.startTime")
119
- lcp = await page.evaluate("performance.getEntriesByType('largest-contentful-paint')[0]?.renderTime")
120
- cls_entries = await page.evaluate("JSON.stringify(performance.getEntriesByType('layout-shift'))")
121
- cls = sum(e['value'] for e in json.loads(cls_entries))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  return {
123
- "page_load_time_ms": timing['duration'],
124
  "first_contentful_paint": fcp,
125
  "largest_contentful_paint": lcp,
126
  "cumulative_layout_shift": cls
@@ -176,4 +221,4 @@ async def accessibility_check(url: str):
176
  }
177
  finally:
178
  await browser.close()
179
- await pw.stop()
 
1
+ # scrape.py
2
+ from fastapi import FastAPI, HTTPException
3
  from pydantic import BaseModel
4
+ from typing import Optional
5
  import base64
6
  import json
7
  from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
 
8
 
9
  app = FastAPI(title="Web Analyzer API")
10
 
 
36
  page, browser, pw = await get_page(url)
37
  try:
38
  title = await page.title()
39
+
40
+ # Get description meta tag
41
+ try:
42
+ desc = await page.get_attribute("meta[name='description']", "content")
43
+ except Exception:
44
+ desc = None
45
+
46
+ # Extract Open Graph metadata
47
  og = {}
48
  for prop in ["title", "description", "image"]:
 
49
  try:
50
+ selector = f"meta[property='og:{prop}']"
51
  if await page.query_selector(selector):
52
  og[f"og:{prop}"] = await page.get_attribute(selector, "content")
53
  else:
54
  og[f"og:{prop}"] = None
55
+ except Exception:
56
  og[f"og:{prop}"] = None
57
 
58
+ # Extract Twitter metadata
59
+ twitter = {}
60
+ for prop in ["title", "description", "image"]:
61
+ try:
62
+ selector = f"meta[name='twitter:{prop}']"
63
+ if await page.query_selector(selector):
64
+ twitter[f"twitter:{prop}"] = await page.get_attribute(selector, "content")
65
+ else:
66
+ twitter[f"twitter:{prop}"] = None
67
+ except Exception:
68
+ twitter[f"twitter:{prop}"] = None
69
+
70
+ # Get canonical URL
71
+ try:
72
+ canonical = await page.get_attribute("link[rel='canonical']", "href")
73
+ except Exception:
74
+ canonical = None
75
  return {
76
  "title": title,
77
  "description": desc,
 
112
  internal += 1
113
  else:
114
  external += 1
115
+ try:
116
+ robots = await page.get_attribute("meta[name='robots']", "content")
117
+ except Exception:
118
+ robots = None
119
+
120
+ try:
121
+ canonical = await page.get_attribute("link[rel='canonical']", "href")
122
+ except Exception:
123
+ canonical = None
124
  return {
125
  "h1_count": h1_count,
126
  "missing_image_alts": missing_alts,
 
133
  await browser.close()
134
  await pw.stop()
135
 
 
136
  @app.get("/performance")
137
  async def performance_metrics(url: str):
138
  page, browser, pw = await get_page(url)
139
  try:
140
+ # Get navigation timing
141
+ try:
142
+ nav_timing = await page.evaluate("JSON.stringify(performance.getEntriesByType('navigation'))")
143
+ timing = json.loads(nav_timing)[0] if nav_timing else {}
144
+ page_load_time = timing.get('duration', None)
145
+ except Exception:
146
+ page_load_time = None
147
+
148
+ # Get First Contentful Paint
149
+ try:
150
+ fcp = await page.evaluate("performance.getEntriesByName('first-contentful-paint')[0]?.startTime")
151
+ except Exception:
152
+ fcp = None
153
+
154
+ # Get Largest Contentful Paint
155
+ try:
156
+ lcp = await page.evaluate("performance.getEntriesByType('largest-contentful-paint')[0]?.renderTime")
157
+ except Exception:
158
+ lcp = None
159
+
160
+ # Get Cumulative Layout Shift
161
+ try:
162
+ cls_entries = await page.evaluate("JSON.stringify(performance.getEntriesByType('layout-shift'))")
163
+ cls = sum(e.get('value', 0) for e in json.loads(cls_entries) if isinstance(e, dict))
164
+ except Exception:
165
+ cls = None
166
+
167
  return {
168
+ "page_load_time_ms": page_load_time,
169
  "first_contentful_paint": fcp,
170
  "largest_contentful_paint": lcp,
171
  "cumulative_layout_shift": cls
 
221
  }
222
  finally:
223
  await browser.close()
224
+ await pw.stop()