apexherbert200 commited on
Commit
84c8f5a
·
1 Parent(s): 79508b3

Tool for scraping contacts

Browse files
Files changed (1) hide show
  1. contacts.py +6 -1
contacts.py CHANGED
@@ -41,6 +41,7 @@ class LeadData(BaseModel):
41
  technologies: List[str] = []
42
 
43
  class ScrapeResponse(BaseModel):
 
44
  body_content: Optional[str] = None
45
  screenshot: Optional[str] = None
46
  links: Optional[List[LinkInfo]] = None
@@ -72,7 +73,8 @@ async def scrape_page(
72
  lead_generation: bool = Query(True, description="Extract lead generation data (emails, phones, business info)"),
73
  screenshot: bool = Query(True, description="Take a full page screenshot"),
74
  get_links: bool = Query(True, description="Extract all links from the page"),
75
- get_body: bool = Query(False, description="Extract body tag content (can be large)")
 
76
  ):
77
  norm_url = normalize_url(url)
78
  if norm_url in visited_urls:
@@ -98,6 +100,9 @@ async def scrape_page(
98
  }
99
  """)
100
 
 
 
 
101
  if get_body:
102
  response.body_content = await page.evaluate("""
103
  () => {
 
41
  technologies: List[str] = []
42
 
43
  class ScrapeResponse(BaseModel):
44
+ full_html: Optional[str] = None
45
  body_content: Optional[str] = None
46
  screenshot: Optional[str] = None
47
  links: Optional[List[LinkInfo]] = None
 
73
  lead_generation: bool = Query(True, description="Extract lead generation data (emails, phones, business info)"),
74
  screenshot: bool = Query(True, description="Take a full page screenshot"),
75
  get_links: bool = Query(True, description="Extract all links from the page"),
76
+ get_body: bool = Query(False, description="Extract body tag content (can be large)"),
77
+ get_frontend: bool = Query(True, description="Get full rendered frontend HTML content")
78
  ):
79
  norm_url = normalize_url(url)
80
  if norm_url in visited_urls:
 
100
  }
101
  """)
102
 
103
+ if get_frontend:
104
+ response.full_html = await page.content()
105
+
106
  if get_body:
107
  response.body_content = await page.evaluate("""
108
  () => {