Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -193,8 +193,9 @@ def website_to_pdf(all_pages, progress_callback):
|
|
193 |
|
194 |
progress = min((i + chunk_size) / len(all_pages), 1.0)
|
195 |
progress_callback(f"Processing pages... {progress:.0%}")
|
|
|
196 |
|
197 |
-
|
198 |
output_pdf = os.path.join(temp_dir, "final.pdf")
|
199 |
merger = PdfMerger()
|
200 |
for temp_file in temp_files:
|
@@ -202,6 +203,7 @@ def website_to_pdf(all_pages, progress_callback):
|
|
202 |
merger.write(output_pdf)
|
203 |
merger.close()
|
204 |
|
|
|
205 |
with open(output_pdf, 'rb') as f:
|
206 |
return f.read()
|
207 |
|
@@ -211,9 +213,11 @@ async def process_url(url, depth, progress_callback):
|
|
211 |
if not all_pages:
|
212 |
return "No pages were successfully crawled. Please check the URL and try again."
|
213 |
|
|
|
214 |
# Use ThreadPoolExecutor to run PDF generation in a separate thread
|
215 |
loop = asyncio.get_event_loop()
|
216 |
pdf_content = await loop.run_in_executor(executor, website_to_pdf, all_pages, progress_callback)
|
|
|
217 |
return pdf_content
|
218 |
except Exception as e:
|
219 |
logger.error(f"Error in process_url: {str(e)}")
|
@@ -262,7 +266,7 @@ app.layout = dbc.Container([
|
|
262 |
Output("progress-bar", "style"),
|
263 |
Input("submit-button", "n_clicks"),
|
264 |
Input("progress-interval", "n_intervals"),
|
265 |
-
Input("progress-store", "data"),
|
266 |
State("url-input", "value"),
|
267 |
State("depth-slider", "value"),
|
268 |
prevent_initial_call=True
|
@@ -329,7 +333,9 @@ def background_task(url, depth, task_id):
|
|
329 |
app.layout.children[1].data = message
|
330 |
|
331 |
try:
|
|
|
332 |
pdf_content = asyncio.run(process_url(url, depth, progress_callback))
|
|
|
333 |
# Store the result in the progress-store
|
334 |
app.layout.children[1].data = pdf_content
|
335 |
except Exception as e:
|
|
|
193 |
|
194 |
progress = min((i + chunk_size) / len(all_pages), 1.0)
|
195 |
progress_callback(f"Processing pages... {progress:.0%}")
|
196 |
+
logger.info(f"Generated PDF chunk {i//chunk_size + 1}/{total_chunks}")
|
197 |
|
198 |
+
logger.info("Merging PDF chunks...")
|
199 |
output_pdf = os.path.join(temp_dir, "final.pdf")
|
200 |
merger = PdfMerger()
|
201 |
for temp_file in temp_files:
|
|
|
203 |
merger.write(output_pdf)
|
204 |
merger.close()
|
205 |
|
206 |
+
logger.info("PDF generation complete. Reading final PDF...")
|
207 |
with open(output_pdf, 'rb') as f:
|
208 |
return f.read()
|
209 |
|
|
|
213 |
if not all_pages:
|
214 |
return "No pages were successfully crawled. Please check the URL and try again."
|
215 |
|
216 |
+
logger.info("Crawling complete. Starting PDF generation...")
|
217 |
# Use ThreadPoolExecutor to run PDF generation in a separate thread
|
218 |
loop = asyncio.get_event_loop()
|
219 |
pdf_content = await loop.run_in_executor(executor, website_to_pdf, all_pages, progress_callback)
|
220 |
+
logger.info("PDF generation complete.")
|
221 |
return pdf_content
|
222 |
except Exception as e:
|
223 |
logger.error(f"Error in process_url: {str(e)}")
|
|
|
266 |
Output("progress-bar", "style"),
|
267 |
Input("submit-button", "n_clicks"),
|
268 |
Input("progress-interval", "n_intervals"),
|
269 |
+
Input("progress-store", "data"),
|
270 |
State("url-input", "value"),
|
271 |
State("depth-slider", "value"),
|
272 |
prevent_initial_call=True
|
|
|
333 |
app.layout.children[1].data = message
|
334 |
|
335 |
try:
|
336 |
+
logger.info(f"Starting background task for URL: {url}, depth: {depth}")
|
337 |
pdf_content = asyncio.run(process_url(url, depth, progress_callback))
|
338 |
+
logger.info("Background task completed successfully")
|
339 |
# Store the result in the progress-store
|
340 |
app.layout.children[1].data = pdf_content
|
341 |
except Exception as e:
|