NightFury2710 commited on
Commit
1091e12
·
1 Parent(s): 286ca0d

update api handle 3

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -25,6 +25,7 @@ class CrawlRequest(BaseModel):
25
  cache_mode: str = "DISABLED"
26
  excluded_tags: list[str] = ["nav", "footer", "aside", "header", "script", "style"]
27
  remove_overlay_elements: bool = True
 
28
  subject: Optional[str] = None # Optional subject for content filtering
29
 
30
  class Article(BaseModel):
@@ -201,9 +202,16 @@ async def crawl_url(request: CrawlRequest):
201
  min_word_threshold=50
202
  )
203
 
 
 
 
 
 
 
 
204
  md_generator = DefaultMarkdownGenerator(
205
  content_filter=content_filter,
206
- options={"ignore_images": True}
207
  )
208
 
209
  # Create crawler with configuration
 
25
  cache_mode: str = "DISABLED"
26
  excluded_tags: list[str] = ["nav", "footer", "aside", "header", "script", "style"]
27
  remove_overlay_elements: bool = True
28
+ ignore_links: bool = True
29
  subject: Optional[str] = None # Optional subject for content filtering
30
 
31
  class Article(BaseModel):
 
202
  min_word_threshold=50
203
  )
204
 
205
+ # Create options dictionary with ignore_images
206
+ options = {"ignore_images": True}
207
+
208
+ # Add ignore_links if requested
209
+ if request.ignore_links:
210
+ options["ignore_links"] = True
211
+
212
  md_generator = DefaultMarkdownGenerator(
213
  content_filter=content_filter,
214
+ options=options
215
  )
216
 
217
  # Create crawler with configuration