apexherbert200 commited on
Commit
f9275f7
Β·
1 Parent(s): 35b24cc

Added script content links

Browse files
Files changed (1) hide show
  1. scrape.py +8 -1
scrape.py CHANGED
@@ -22,6 +22,11 @@ class ContactInfo(BaseModel):
22
  social_media: List[str] = []
23
  contact_forms: List[str] = []
24
 
 
 
 
 
 
25
  class BusinessInfo(BaseModel):
26
  company_name: Optional[str] = None
27
  address: Optional[str] = None
@@ -38,6 +43,7 @@ class ScrapeResponse(BaseModel):
38
  body_content: Optional[str] = None
39
  screenshot: Optional[str] = None
40
  links: Optional[List[LinkInfo]] = None
 
41
  page_title: Optional[str] = None
42
  meta_description: Optional[str] = None
43
  lead_data: Optional[LeadData] = None
@@ -64,7 +70,8 @@ async def root():
64
  "basic_features": [
65
  "πŸ“„ Clean body text extraction",
66
  "πŸ”— Smart link filtering",
67
- "πŸ“Έ Full page screenshots",
 
68
  "πŸ“‹ Page metadata extraction"
69
  ],
70
  "use_cases": [
 
22
  social_media: List[str] = []
23
  contact_forms: List[str] = []
24
 
25
+ class ScriptInfo(BaseModel):
26
+ src: str
27
+ script_type: Optional[str] = None
28
+ is_external: bool = False
29
+
30
  class BusinessInfo(BaseModel):
31
  company_name: Optional[str] = None
32
  address: Optional[str] = None
 
43
  body_content: Optional[str] = None
44
  screenshot: Optional[str] = None
45
  links: Optional[List[LinkInfo]] = None
46
+ scripts: Optional[List[ScriptInfo]] = None
47
  page_title: Optional[str] = None
48
  meta_description: Optional[str] = None
49
  lead_data: Optional[LeadData] = None
 
70
  "basic_features": [
71
  "πŸ“„ Clean body text extraction",
72
  "πŸ”— Smart link filtering",
73
+ "οΏ½ Script and JavaScript file extraction",
74
+ "οΏ½πŸ“Έ Full page screenshots",
75
  "πŸ“‹ Page metadata extraction"
76
  ],
77
  "use_cases": [