bluenevus commited on
Commit
2c0abdb
·
verified ·
1 Parent(s): 281a277

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -193,8 +193,9 @@ def website_to_pdf(all_pages, progress_callback):
193
 
194
  progress = min((i + chunk_size) / len(all_pages), 1.0)
195
  progress_callback(f"Processing pages... {progress:.0%}")
 
196
 
197
- # Merge PDF chunks
198
  output_pdf = os.path.join(temp_dir, "final.pdf")
199
  merger = PdfMerger()
200
  for temp_file in temp_files:
@@ -202,6 +203,7 @@ def website_to_pdf(all_pages, progress_callback):
202
  merger.write(output_pdf)
203
  merger.close()
204
 
 
205
  with open(output_pdf, 'rb') as f:
206
  return f.read()
207
 
@@ -211,9 +213,11 @@ async def process_url(url, depth, progress_callback):
211
  if not all_pages:
212
  return "No pages were successfully crawled. Please check the URL and try again."
213
 
 
214
  # Use ThreadPoolExecutor to run PDF generation in a separate thread
215
  loop = asyncio.get_event_loop()
216
  pdf_content = await loop.run_in_executor(executor, website_to_pdf, all_pages, progress_callback)
 
217
  return pdf_content
218
  except Exception as e:
219
  logger.error(f"Error in process_url: {str(e)}")
@@ -262,7 +266,7 @@ app.layout = dbc.Container([
262
  Output("progress-bar", "style"),
263
  Input("submit-button", "n_clicks"),
264
  Input("progress-interval", "n_intervals"),
265
- Input("progress-store", "data"), # Add this line
266
  State("url-input", "value"),
267
  State("depth-slider", "value"),
268
  prevent_initial_call=True
@@ -329,7 +333,9 @@ def background_task(url, depth, task_id):
329
  app.layout.children[1].data = message
330
 
331
  try:
 
332
  pdf_content = asyncio.run(process_url(url, depth, progress_callback))
 
333
  # Store the result in the progress-store
334
  app.layout.children[1].data = pdf_content
335
  except Exception as e:
 
193
 
194
  progress = min((i + chunk_size) / len(all_pages), 1.0)
195
  progress_callback(f"Processing pages... {progress:.0%}")
196
+ logger.info(f"Generated PDF chunk {i//chunk_size + 1}/{total_chunks}")
197
 
198
+ logger.info("Merging PDF chunks...")
199
  output_pdf = os.path.join(temp_dir, "final.pdf")
200
  merger = PdfMerger()
201
  for temp_file in temp_files:
 
203
  merger.write(output_pdf)
204
  merger.close()
205
 
206
+ logger.info("PDF generation complete. Reading final PDF...")
207
  with open(output_pdf, 'rb') as f:
208
  return f.read()
209
 
 
213
  if not all_pages:
214
  return "No pages were successfully crawled. Please check the URL and try again."
215
 
216
+ logger.info("Crawling complete. Starting PDF generation...")
217
  # Use ThreadPoolExecutor to run PDF generation in a separate thread
218
  loop = asyncio.get_event_loop()
219
  pdf_content = await loop.run_in_executor(executor, website_to_pdf, all_pages, progress_callback)
220
+ logger.info("PDF generation complete.")
221
  return pdf_content
222
  except Exception as e:
223
  logger.error(f"Error in process_url: {str(e)}")
 
266
  Output("progress-bar", "style"),
267
  Input("submit-button", "n_clicks"),
268
  Input("progress-interval", "n_intervals"),
269
+ Input("progress-store", "data"),
270
  State("url-input", "value"),
271
  State("depth-slider", "value"),
272
  prevent_initial_call=True
 
333
  app.layout.children[1].data = message
334
 
335
  try:
336
+ logger.info(f"Starting background task for URL: {url}, depth: {depth}")
337
  pdf_content = asyncio.run(process_url(url, depth, progress_callback))
338
+ logger.info("Background task completed successfully")
339
  # Store the result in the progress-store
340
  app.layout.children[1].data = pdf_content
341
  except Exception as e: