Commit
·
1091e12
1
Parent(s):
286ca0d
update api handle 3
Browse files
app.py
CHANGED
@@ -25,6 +25,7 @@ class CrawlRequest(BaseModel):
|
|
25 |
cache_mode: str = "DISABLED"
|
26 |
excluded_tags: list[str] = ["nav", "footer", "aside", "header", "script", "style"]
|
27 |
remove_overlay_elements: bool = True
|
|
|
28 |
subject: Optional[str] = None # Optional subject for content filtering
|
29 |
|
30 |
class Article(BaseModel):
|
@@ -201,9 +202,16 @@ async def crawl_url(request: CrawlRequest):
|
|
201 |
min_word_threshold=50
|
202 |
)
|
203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
md_generator = DefaultMarkdownGenerator(
|
205 |
content_filter=content_filter,
|
206 |
-
options=
|
207 |
)
|
208 |
|
209 |
# Create crawler with configuration
|
|
|
25 |
cache_mode: str = "DISABLED"
|
26 |
excluded_tags: list[str] = ["nav", "footer", "aside", "header", "script", "style"]
|
27 |
remove_overlay_elements: bool = True
|
28 |
+
ignore_links: bool = True
|
29 |
subject: Optional[str] = None # Optional subject for content filtering
|
30 |
|
31 |
class Article(BaseModel):
|
|
|
202 |
min_word_threshold=50
|
203 |
)
|
204 |
|
205 |
+
# Create options dictionary with ignore_images
|
206 |
+
options = {"ignore_images": True}
|
207 |
+
|
208 |
+
# Add ignore_links if requested
|
209 |
+
if request.ignore_links:
|
210 |
+
options["ignore_links"] = True
|
211 |
+
|
212 |
md_generator = DefaultMarkdownGenerator(
|
213 |
content_filter=content_filter,
|
214 |
+
options=options
|
215 |
)
|
216 |
|
217 |
# Create crawler with configuration
|