Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -92,7 +92,7 @@ async def get_links(session, url, base_url):
|
|
92 |
logger.error(f"Error getting links from {url}: {str(e)}")
|
93 |
return []
|
94 |
|
95 |
-
async def crawl_pages(base_url, max_depth):
|
96 |
visited = set()
|
97 |
to_visit = [(base_url, 0)]
|
98 |
all_pages = []
|
@@ -126,7 +126,7 @@ async def crawl_pages(base_url, max_depth):
|
|
126 |
if depth < max_depth:
|
127 |
links = await get_links(session, current_url, base_url)
|
128 |
for link in links:
|
129 |
-
if link not in visited:
|
130 |
to_visit.append((link, depth + 1))
|
131 |
|
132 |
return all_pages
|
|
|
92 |
logger.error(f"Error getting links from {url}: {str(e)}")
|
93 |
return []
|
94 |
|
95 |
+
async def crawl_pages(base_url, max_depth):
|
96 |
visited = set()
|
97 |
to_visit = [(base_url, 0)]
|
98 |
all_pages = []
|
|
|
126 |
if depth < max_depth:
|
127 |
links = await get_links(session, current_url, base_url)
|
128 |
for link in links:
|
129 |
+
if link not in visited and link not in [url for url, _ in to_visit]:
|
130 |
to_visit.append((link, depth + 1))
|
131 |
|
132 |
return all_pages
|