Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,6 @@ import requests
|
|
7 |
from bs4 import BeautifulSoup
|
8 |
from urllib.parse import urljoin, urlparse
|
9 |
from fpdf import FPDF
|
10 |
-
import tempfile
|
11 |
import re
|
12 |
import logging
|
13 |
import asyncio
|
@@ -19,6 +18,7 @@ from threading import local
|
|
19 |
import time
|
20 |
import os
|
21 |
import ssl
|
|
|
22 |
|
23 |
# Initialize Dash app
|
24 |
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
@@ -170,20 +170,18 @@ def website_to_pdf(all_pages):
|
|
170 |
if pdf.get_y() > 250: # Add a new page if the current page is almost full
|
171 |
pdf.add_page()
|
172 |
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
return pdf_path
|
179 |
|
180 |
async def process_url(url, depth):
|
181 |
try:
|
182 |
all_pages = await crawl_pages(url, depth)
|
183 |
if not all_pages:
|
184 |
return "No pages were successfully crawled. Please check the URL and try again."
|
185 |
-
|
186 |
-
return
|
187 |
except Exception as e:
|
188 |
logger.error(f"Error in process_url: {str(e)}")
|
189 |
return f"An error occurred: {str(e)}"
|
@@ -232,16 +230,13 @@ def update_output(n_clicks, url, depth):
|
|
232 |
if not url:
|
233 |
return "Please enter a valid URL."
|
234 |
|
235 |
-
|
236 |
|
237 |
-
if isinstance(
|
238 |
-
return
|
239 |
|
240 |
try:
|
241 |
-
|
242 |
-
encoded = base64.b64encode(f.read()).decode()
|
243 |
-
|
244 |
-
os.unlink(pdf_path) # Remove the temporary file
|
245 |
|
246 |
return html.Div([
|
247 |
html.H4("PDF Generated Successfully"),
|
|
|
7 |
from bs4 import BeautifulSoup
|
8 |
from urllib.parse import urljoin, urlparse
|
9 |
from fpdf import FPDF
|
|
|
10 |
import re
|
11 |
import logging
|
12 |
import asyncio
|
|
|
18 |
import time
|
19 |
import os
|
20 |
import ssl
|
21 |
+
from io import BytesIO
|
22 |
|
23 |
# Initialize Dash app
|
24 |
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
|
|
170 |
if pdf.get_y() > 250: # Add a new page if the current page is almost full
|
171 |
pdf.add_page()
|
172 |
|
173 |
+
pdf_buffer = BytesIO()
|
174 |
+
pdf.output(pdf_buffer)
|
175 |
+
pdf_buffer.seek(0)
|
176 |
+
return pdf_buffer
|
|
|
|
|
177 |
|
178 |
async def process_url(url, depth):
|
179 |
try:
|
180 |
all_pages = await crawl_pages(url, depth)
|
181 |
if not all_pages:
|
182 |
return "No pages were successfully crawled. Please check the URL and try again."
|
183 |
+
pdf_buffer = website_to_pdf(all_pages)
|
184 |
+
return pdf_buffer
|
185 |
except Exception as e:
|
186 |
logger.error(f"Error in process_url: {str(e)}")
|
187 |
return f"An error occurred: {str(e)}"
|
|
|
230 |
if not url:
|
231 |
return "Please enter a valid URL."
|
232 |
|
233 |
+
pdf_buffer = asyncio.run(process_url(url, depth))
|
234 |
|
235 |
+
if isinstance(pdf_buffer, str):
|
236 |
+
return pdf_buffer # This is an error message
|
237 |
|
238 |
try:
|
239 |
+
encoded = base64.b64encode(pdf_buffer.getvalue()).decode()
|
|
|
|
|
|
|
240 |
|
241 |
return html.Div([
|
242 |
html.H4("PDF Generated Successfully"),
|