bluenevus commited on
Commit
af244f7
·
verified ·
1 Parent(s): 127d5f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -8,7 +8,6 @@ import re
8
  import logging
9
  from concurrent.futures import ThreadPoolExecutor, as_completed
10
 
11
-
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
14
 
@@ -35,7 +34,7 @@ def get_page_content(url):
35
  return content
36
  except Exception as e:
37
  logger.error(f"Error processing {url}: {str(e)}")
38
- return [f"Error processing {url}: {str(e)}"]
39
 
40
  def get_links(url, base_url):
41
  try:
@@ -63,7 +62,7 @@ def crawl_pages(base_url, max_depth):
63
  logger.info(f"Processed page: {url} at depth {depth}")
64
  return url, content, depth
65
 
66
- with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
67
  futures = []
68
  while to_visit:
69
  current_url, depth = to_visit.pop(0)
@@ -81,7 +80,8 @@ def crawl_pages(base_url, max_depth):
81
 
82
  for future in as_completed(futures):
83
  url, content, depth = future.result()
84
- all_pages.append((url, content))
 
85
 
86
  return all_pages
87
 
@@ -118,16 +118,18 @@ def process_url(url, depth):
118
  return pdf_file
119
  except Exception as e:
120
  logger.error(f"Error in process_url: {str(e)}")
121
- return f"An error occurred: {str(e)}"
122
 
123
- # Add this new function
124
  def threaded_process_url(url, depth):
125
  with ThreadPoolExecutor() as executor:
126
  future = executor.submit(process_url, url, depth)
127
- return future.result()
 
 
 
128
 
129
  iface = gr.Interface(
130
- fn=threaded_process_url, # Use the new threaded function
131
  inputs=[
132
  gr.Textbox(label="Enter website URL (e.g., https://www.gradio.app/docs)"),
133
  gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Crawl Depth")
@@ -138,4 +140,4 @@ iface = gr.Interface(
138
  )
139
 
140
  if __name__ == "__main__":
141
- iface.launch(share=True, server_name="0.0.0.0", server_port=7860)
 
8
  import logging
9
  from concurrent.futures import ThreadPoolExecutor, as_completed
10
 
 
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
 
34
  return content
35
  except Exception as e:
36
  logger.error(f"Error processing {url}: {str(e)}")
37
+ return [] # Return an empty list instead of error message
38
 
39
  def get_links(url, base_url):
40
  try:
 
62
  logger.info(f"Processed page: {url} at depth {depth}")
63
  return url, content, depth
64
 
65
+ with ThreadPoolExecutor(max_workers=10) as executor:
66
  futures = []
67
  while to_visit:
68
  current_url, depth = to_visit.pop(0)
 
80
 
81
  for future in as_completed(futures):
82
  url, content, depth = future.result()
83
+ if content: # Only add pages with content
84
+ all_pages.append((url, content))
85
 
86
  return all_pages
87
 
 
118
  return pdf_file
119
  except Exception as e:
120
  logger.error(f"Error in process_url: {str(e)}")
121
+ return None # Return None instead of error message
122
 
 
123
  def threaded_process_url(url, depth):
124
  with ThreadPoolExecutor() as executor:
125
  future = executor.submit(process_url, url, depth)
126
+ result = future.result()
127
+ if result is None:
128
+ return gr.update(value=None, visible=False)
129
+ return gr.update(value=result, visible=True)
130
 
131
  iface = gr.Interface(
132
+ fn=threaded_process_url,
133
  inputs=[
134
  gr.Textbox(label="Enter website URL (e.g., https://www.gradio.app/docs)"),
135
  gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Crawl Depth")
 
140
  )
141
 
142
  if __name__ == "__main__":
143
+ iface.launch()