Spaces:
Build error
Build error
import streamlit as st | |
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
import pdfkit | |
import time | |
import os | |
from pathlib import Path | |
class StreamlitPDFCapture: | |
def __init__(self): | |
self.options = { | |
'page-size': 'A4', | |
'margin-top': '0mm', | |
'margin-right': '0mm', | |
'margin-bottom': '0mm', | |
'margin-left': '0mm', | |
'encoding': 'UTF-8', | |
'custom-header': [('Accept-Encoding', 'gzip')], | |
'no-outline': None, | |
'enable-local-file-access': None | |
} | |
def setup_chrome_driver(self): | |
"""Setup Chrome driver with appropriate options.""" | |
chrome_options = Options() | |
chrome_options.add_argument("--headless") # Run in headless mode | |
chrome_options.add_argument("--window-size=1920,1080") | |
chrome_options.add_argument("--disable-gpu") | |
chrome_options.add_argument("--no-sandbox") | |
return webdriver.Chrome(options=chrome_options) | |
def capture_page(self, url, output_path, wait_time=5): | |
"""Capture a single Streamlit page as PDF.""" | |
driver = self.setup_chrome_driver() | |
try: | |
# Navigate to page | |
driver.get(url) | |
time.sleep(wait_time) # Wait for page to fully load | |
# Get page height and set window size | |
height = driver.execute_script("return document.body.scrollHeight") | |
driver.set_window_size(1920, height + 100) | |
# Save as PDF using pdfkit | |
html_content = driver.page_source | |
pdfkit.from_string(html_content, output_path, options=self.options) | |
finally: | |
driver.quit() | |
def capture_multiple_pages(self, urls, output_dir, prefix="page"): | |
"""Capture multiple Streamlit pages as separate PDFs.""" | |
Path(output_dir).mkdir(parents=True, exist_ok=True) | |
pdfs = [] | |
for i, url in enumerate(urls): | |
output_path = os.path.join(output_dir, f"{prefix}_{i+1}.pdf") | |
self.capture_page(url, output_path) | |
pdfs.append(output_path) | |
return pdfs | |
def add_pdf_download_button(): | |
"""Add a PDF download button to your Streamlit app.""" | |
if st.button("π Download as PDF"): | |
with st.spinner("Generating PDF..."): | |
# Get current page URL | |
ctx = st.runtime.get_instance() | |
url = ctx.serverAddress if hasattr(ctx, 'serverAddress') else "http://localhost:8501" | |
# Initialize capture utility | |
pdf_capture = StreamlitPDFCapture() | |
# Generate PDF | |
output_path = "streamlit_page.pdf" | |
pdf_capture.capture_page(url, output_path) | |
# Provide download link | |
with open(output_path, "rb") as pdf_file: | |
pdfd = pdf_file.read() | |
st.download_button( | |
label="β¬οΈ Download PDF", | |
data=pdfd, | |
file_name="streamlit_page.pdf", | |
mime="application/pdf" | |
) | |
# Example usage in your Streamlit app: | |
if __name__ == "__main__": | |
st.title("Streamlit PDF Capture Demo") | |
# Add the PDF download button | |
add_pdf_download_button() | |
# Your regular Streamlit content here | |
st.write("This is a demo of PDF capture functionality") | |
# For multiple pages | |
if st.checkbox("Capture multiple pages"): | |
urls = [ | |
"http://localhost:8501/page1", | |
"http://localhost:8501/page2" | |
] | |
if st.button("Generate PDFs for all pages"): | |
pdf_capture = StreamlitPDFCapture() | |
pdfs = pdf_capture.capture_multiple_pages(urls, "output_pdfs") | |
st.success(f"Generated {len(pdfs)} PDFs in output_pdfs directory") |