bluenevus commited on
Commit
353269b
·
verified ·
1 Parent(s): 0b1d7d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -16
app.py CHANGED
@@ -7,7 +7,6 @@ import requests
7
  from bs4 import BeautifulSoup
8
  from urllib.parse import urljoin, urlparse
9
  from fpdf import FPDF
10
- import tempfile
11
  import re
12
  import logging
13
  import asyncio
@@ -19,6 +18,7 @@ from threading import local
19
  import time
20
  import os
21
  import ssl
 
22
 
23
  # Initialize Dash app
24
  app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
@@ -170,20 +170,18 @@ def website_to_pdf(all_pages):
170
  if pdf.get_y() > 250: # Add a new page if the current page is almost full
171
  pdf.add_page()
172
 
173
- with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
174
- pdf_path = tmp.name
175
- pdf.output(pdf_path)
176
- logger.info(f"PDF saved to: {pdf_path}")
177
-
178
- return pdf_path
179
 
180
  async def process_url(url, depth):
181
  try:
182
  all_pages = await crawl_pages(url, depth)
183
  if not all_pages:
184
  return "No pages were successfully crawled. Please check the URL and try again."
185
- pdf_file = website_to_pdf(all_pages)
186
- return pdf_file
187
  except Exception as e:
188
  logger.error(f"Error in process_url: {str(e)}")
189
  return f"An error occurred: {str(e)}"
@@ -232,16 +230,13 @@ def update_output(n_clicks, url, depth):
232
  if not url:
233
  return "Please enter a valid URL."
234
 
235
- pdf_path = asyncio.run(process_url(url, depth))
236
 
237
- if isinstance(pdf_path, str):
238
- return pdf_path # This is an error message
239
 
240
  try:
241
- with open(pdf_path, "rb") as f:
242
- encoded = base64.b64encode(f.read()).decode()
243
-
244
- os.unlink(pdf_path) # Remove the temporary file
245
 
246
  return html.Div([
247
  html.H4("PDF Generated Successfully"),
 
7
  from bs4 import BeautifulSoup
8
  from urllib.parse import urljoin, urlparse
9
  from fpdf import FPDF
 
10
  import re
11
  import logging
12
  import asyncio
 
18
  import time
19
  import os
20
  import ssl
21
+ from io import BytesIO
22
 
23
  # Initialize Dash app
24
  app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
 
170
  if pdf.get_y() > 250: # Add a new page if the current page is almost full
171
  pdf.add_page()
172
 
173
+ pdf_buffer = BytesIO()
174
+ pdf.output(pdf_buffer)
175
+ pdf_buffer.seek(0)
176
+ return pdf_buffer
 
 
177
 
178
  async def process_url(url, depth):
179
  try:
180
  all_pages = await crawl_pages(url, depth)
181
  if not all_pages:
182
  return "No pages were successfully crawled. Please check the URL and try again."
183
+ pdf_buffer = website_to_pdf(all_pages)
184
+ return pdf_buffer
185
  except Exception as e:
186
  logger.error(f"Error in process_url: {str(e)}")
187
  return f"An error occurred: {str(e)}"
 
230
  if not url:
231
  return "Please enter a valid URL."
232
 
233
+ pdf_buffer = asyncio.run(process_url(url, depth))
234
 
235
+ if isinstance(pdf_buffer, str):
236
+ return pdf_buffer # This is an error message
237
 
238
  try:
239
+ encoded = base64.b64encode(pdf_buffer.getvalue()).decode()
 
 
 
240
 
241
  return html.Div([
242
  html.H4("PDF Generated Successfully"),