Commit
·
84c8f5a
1
Parent(s):
79508b3
Tool for scraping contacts
Browse files- contacts.py +6 -1
contacts.py
CHANGED
@@ -41,6 +41,7 @@ class LeadData(BaseModel):
|
|
41 |
technologies: List[str] = []
|
42 |
|
43 |
class ScrapeResponse(BaseModel):
|
|
|
44 |
body_content: Optional[str] = None
|
45 |
screenshot: Optional[str] = None
|
46 |
links: Optional[List[LinkInfo]] = None
|
@@ -72,7 +73,8 @@ async def scrape_page(
|
|
72 |
lead_generation: bool = Query(True, description="Extract lead generation data (emails, phones, business info)"),
|
73 |
screenshot: bool = Query(True, description="Take a full page screenshot"),
|
74 |
get_links: bool = Query(True, description="Extract all links from the page"),
|
75 |
-
get_body: bool = Query(False, description="Extract body tag content (can be large)")
|
|
|
76 |
):
|
77 |
norm_url = normalize_url(url)
|
78 |
if norm_url in visited_urls:
|
@@ -98,6 +100,9 @@ async def scrape_page(
|
|
98 |
}
|
99 |
""")
|
100 |
|
|
|
|
|
|
|
101 |
if get_body:
|
102 |
response.body_content = await page.evaluate("""
|
103 |
() => {
|
|
|
41 |
technologies: List[str] = []
|
42 |
|
43 |
class ScrapeResponse(BaseModel):
|
44 |
+
full_html: Optional[str] = None
|
45 |
body_content: Optional[str] = None
|
46 |
screenshot: Optional[str] = None
|
47 |
links: Optional[List[LinkInfo]] = None
|
|
|
73 |
lead_generation: bool = Query(True, description="Extract lead generation data (emails, phones, business info)"),
|
74 |
screenshot: bool = Query(True, description="Take a full page screenshot"),
|
75 |
get_links: bool = Query(True, description="Extract all links from the page"),
|
76 |
+
get_body: bool = Query(False, description="Extract body tag content (can be large)"),
|
77 |
+
get_frontend: bool = Query(True, description="Get full rendered frontend HTML content")
|
78 |
):
|
79 |
norm_url = normalize_url(url)
|
80 |
if norm_url in visited_urls:
|
|
|
100 |
}
|
101 |
""")
|
102 |
|
103 |
+
if get_frontend:
|
104 |
+
response.full_html = await page.content()
|
105 |
+
|
106 |
if get_body:
|
107 |
response.body_content = await page.evaluate("""
|
108 |
() => {
|