bluenevus commited on
Commit
4d8faec
·
verified ·
1 Parent(s): d5a3b2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -52,14 +52,14 @@ def get_subdirectory_pages(url, base_url, visited=set(), max_pages=100):
52
  parsed_full_url = urlparse(full_url)
53
  parsed_base_url = urlparse(base_url)
54
 
55
- # Check if the URL is one level deeper
56
  if (parsed_full_url.scheme == parsed_base_url.scheme and
57
  parsed_full_url.netloc == parsed_base_url.netloc and
58
  parsed_full_url.path.startswith(parsed_base_url.path) and
59
- parsed_full_url.path.count('/') == parsed_base_url.path.count('/') + 1):
60
 
61
  if full_url not in visited:
62
- pages.extend(get_subdirectory_pages(full_url, full_url, visited, max_pages))
63
  if len(visited) >= max_pages:
64
  break
65
  except Exception as e:
@@ -101,7 +101,7 @@ def process_url(url):
101
 
102
  iface = gr.Interface(
103
  fn=process_url,
104
- inputs=gr.Textbox(label="Enter website URL (e.g., https://www.gradio.app/docs)"),
105
  outputs=gr.File(label="Download PDF"),
106
  title="Website Subdirectory to PDF Converter",
107
  description="Enter a website URL to convert its subdirectories into a PDF."
 
52
  parsed_full_url = urlparse(full_url)
53
  parsed_base_url = urlparse(base_url)
54
 
55
+ # Check if the URL is in the same directory or a direct subdirectory
56
  if (parsed_full_url.scheme == parsed_base_url.scheme and
57
  parsed_full_url.netloc == parsed_base_url.netloc and
58
  parsed_full_url.path.startswith(parsed_base_url.path) and
59
+ parsed_full_url.path.count('/') <= parsed_base_url.path.count('/') + 1):
60
 
61
  if full_url not in visited:
62
+ pages.extend(get_subdirectory_pages(full_url, base_url, visited, max_pages))
63
  if len(visited) >= max_pages:
64
  break
65
  except Exception as e:
 
101
 
102
  iface = gr.Interface(
103
  fn=process_url,
104
+ inputs=gr.Textbox(label="Enter website URL (e.g., https://www.gradio.app/docs/gradio)"),
105
  outputs=gr.File(label="Download PDF"),
106
  title="Website Subdirectory to PDF Converter",
107
  description="Enter a website URL to convert its subdirectories into a PDF."