Commit
Β·
f9275f7
1
Parent(s):
35b24cc
Added script content links
Browse files
scrape.py
CHANGED
@@ -22,6 +22,11 @@ class ContactInfo(BaseModel):
|
|
22 |
social_media: List[str] = []
|
23 |
contact_forms: List[str] = []
|
24 |
|
|
|
|
|
|
|
|
|
|
|
25 |
class BusinessInfo(BaseModel):
|
26 |
company_name: Optional[str] = None
|
27 |
address: Optional[str] = None
|
@@ -38,6 +43,7 @@ class ScrapeResponse(BaseModel):
|
|
38 |
body_content: Optional[str] = None
|
39 |
screenshot: Optional[str] = None
|
40 |
links: Optional[List[LinkInfo]] = None
|
|
|
41 |
page_title: Optional[str] = None
|
42 |
meta_description: Optional[str] = None
|
43 |
lead_data: Optional[LeadData] = None
|
@@ -64,7 +70,8 @@ async def root():
|
|
64 |
"basic_features": [
|
65 |
"π Clean body text extraction",
|
66 |
"π Smart link filtering",
|
67 |
-
"
|
|
|
68 |
"π Page metadata extraction"
|
69 |
],
|
70 |
"use_cases": [
|
|
|
22 |
social_media: List[str] = []
|
23 |
contact_forms: List[str] = []
|
24 |
|
25 |
+
class ScriptInfo(BaseModel):
|
26 |
+
src: str
|
27 |
+
script_type: Optional[str] = None
|
28 |
+
is_external: bool = False
|
29 |
+
|
30 |
class BusinessInfo(BaseModel):
|
31 |
company_name: Optional[str] = None
|
32 |
address: Optional[str] = None
|
|
|
43 |
body_content: Optional[str] = None
|
44 |
screenshot: Optional[str] = None
|
45 |
links: Optional[List[LinkInfo]] = None
|
46 |
+
scripts: Optional[List[ScriptInfo]] = None
|
47 |
page_title: Optional[str] = None
|
48 |
meta_description: Optional[str] = None
|
49 |
lead_data: Optional[LeadData] = None
|
|
|
70 |
"basic_features": [
|
71 |
"π Clean body text extraction",
|
72 |
"π Smart link filtering",
|
73 |
+
"οΏ½ Script and JavaScript file extraction",
|
74 |
+
"οΏ½πΈ Full page screenshots",
|
75 |
"π Page metadata extraction"
|
76 |
],
|
77 |
"use_cases": [
|