bluenevus commited on
Commit
d5a3b2e
·
verified ·
1 Parent(s): 1748e66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -49,10 +49,19 @@ def get_subdirectory_pages(url, base_url, visited=set(), max_pages=100):
49
  href = link.get('href')
50
  if href:
51
  full_url = urljoin(base_url, href)
52
- if full_url.startswith(base_url) and full_url not in visited:
53
- pages.extend(get_subdirectory_pages(full_url, base_url, visited, max_pages))
54
- if len(visited) >= max_pages:
55
- break
 
 
 
 
 
 
 
 
 
56
  except Exception as e:
57
  logger.error(f"Error processing {url}: {e}")
58
 
 
49
  href = link.get('href')
50
  if href:
51
  full_url = urljoin(base_url, href)
52
+ parsed_full_url = urlparse(full_url)
53
+ parsed_base_url = urlparse(base_url)
54
+
55
+ # Check if the URL is one level deeper
56
+ if (parsed_full_url.scheme == parsed_base_url.scheme and
57
+ parsed_full_url.netloc == parsed_base_url.netloc and
58
+ parsed_full_url.path.startswith(parsed_base_url.path) and
59
+ parsed_full_url.path.count('/') == parsed_base_url.path.count('/') + 1):
60
+
61
+ if full_url not in visited:
62
+ pages.extend(get_subdirectory_pages(full_url, full_url, visited, max_pages))
63
+ if len(visited) >= max_pages:
64
+ break
65
  except Exception as e:
66
  logger.error(f"Error processing {url}: {e}")
67