Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -52,14 +52,14 @@ def get_subdirectory_pages(url, base_url, visited=set(), max_pages=100):
|
|
52 |
parsed_full_url = urlparse(full_url)
|
53 |
parsed_base_url = urlparse(base_url)
|
54 |
|
55 |
-
# Check if the URL is
|
56 |
if (parsed_full_url.scheme == parsed_base_url.scheme and
|
57 |
parsed_full_url.netloc == parsed_base_url.netloc and
|
58 |
parsed_full_url.path.startswith(parsed_base_url.path) and
|
59 |
-
parsed_full_url.path.count('/')
|
60 |
|
61 |
if full_url not in visited:
|
62 |
-
pages.extend(get_subdirectory_pages(full_url,
|
63 |
if len(visited) >= max_pages:
|
64 |
break
|
65 |
except Exception as e:
|
@@ -101,7 +101,7 @@ def process_url(url):
|
|
101 |
|
102 |
iface = gr.Interface(
|
103 |
fn=process_url,
|
104 |
-
inputs=gr.Textbox(label="Enter website URL (e.g., https://www.gradio.app/docs)"),
|
105 |
outputs=gr.File(label="Download PDF"),
|
106 |
title="Website Subdirectory to PDF Converter",
|
107 |
description="Enter a website URL to convert its subdirectories into a PDF."
|
|
|
52 |
parsed_full_url = urlparse(full_url)
|
53 |
parsed_base_url = urlparse(base_url)
|
54 |
|
55 |
+
# Check if the URL is in the same directory or a direct subdirectory
|
56 |
if (parsed_full_url.scheme == parsed_base_url.scheme and
|
57 |
parsed_full_url.netloc == parsed_base_url.netloc and
|
58 |
parsed_full_url.path.startswith(parsed_base_url.path) and
|
59 |
+
parsed_full_url.path.count('/') <= parsed_base_url.path.count('/') + 1):
|
60 |
|
61 |
if full_url not in visited:
|
62 |
+
pages.extend(get_subdirectory_pages(full_url, base_url, visited, max_pages))
|
63 |
if len(visited) >= max_pages:
|
64 |
break
|
65 |
except Exception as e:
|
|
|
101 |
|
102 |
iface = gr.Interface(
|
103 |
fn=process_url,
|
104 |
+
inputs=gr.Textbox(label="Enter website URL (e.g., https://www.gradio.app/docs/gradio)"),
|
105 |
outputs=gr.File(label="Download PDF"),
|
106 |
title="Website Subdirectory to PDF Converter",
|
107 |
description="Enter a website URL to convert its subdirectories into a PDF."
|