Spaces:

MicroHealth
/

website-to-pdf

Sleeping

App Files Files Community

bluenevus commited on Apr 24

Commit

353269b

verified ·

1 Parent(s): 0b1d7d6

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -16

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin, urlparse
 from fpdf import FPDF
-import tempfile
 import re
 import logging
 import asyncio
@@ -19,6 +18,7 @@ from threading import local
 import time
 import os
 import ssl
 # Initialize Dash app
 app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
@@ -170,20 +170,18 @@ def website_to_pdf(all_pages):
         if pdf.get_y() > 250:  # Add a new page if the current page is almost full
             pdf.add_page()
-    with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
-        pdf_path = tmp.name
-        pdf.output(pdf_path)
-        logger.info(f"PDF saved to: {pdf_path}")
-    return pdf_path
 async def process_url(url, depth):
     try:
         all_pages = await crawl_pages(url, depth)
         if not all_pages:
             return "No pages were successfully crawled. Please check the URL and try again."
-        pdf_file = website_to_pdf(all_pages)
-        return pdf_file
     except Exception as e:
         logger.error(f"Error in process_url: {str(e)}")
         return f"An error occurred: {str(e)}"
@@ -232,16 +230,13 @@ def update_output(n_clicks, url, depth):
     if not url:
         return "Please enter a valid URL."
-    pdf_path = asyncio.run(process_url(url, depth))
-    if isinstance(pdf_path, str):
-        return pdf_path  # This is an error message
     try:
-        with open(pdf_path, "rb") as f:
-            encoded = base64.b64encode(f.read()).decode()
-        os.unlink(pdf_path)  # Remove the temporary file
         return html.Div([
             html.H4("PDF Generated Successfully"),

 from bs4 import BeautifulSoup
 from urllib.parse import urljoin, urlparse
 from fpdf import FPDF
 import re
 import logging
 import asyncio
 import time
 import os
 import ssl
+from io import BytesIO
 # Initialize Dash app
 app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
         if pdf.get_y() > 250:  # Add a new page if the current page is almost full
             pdf.add_page()
+    pdf_buffer = BytesIO()
+    pdf.output(pdf_buffer)
+    pdf_buffer.seek(0)
+    return pdf_buffer
 async def process_url(url, depth):
     try:
         all_pages = await crawl_pages(url, depth)
         if not all_pages:
             return "No pages were successfully crawled. Please check the URL and try again."
+        pdf_buffer = website_to_pdf(all_pages)
+        return pdf_buffer
     except Exception as e:
         logger.error(f"Error in process_url: {str(e)}")
         return f"An error occurred: {str(e)}"
     if not url:
         return "Please enter a valid URL."
+    pdf_buffer = asyncio.run(process_url(url, depth))
+    if isinstance(pdf_buffer, str):
+        return pdf_buffer  # This is an error message
     try:
+        encoded = base64.b64encode(pdf_buffer.getvalue()).decode()
         return html.Div([
             html.H4("PDF Generated Successfully"),