SeleniumPDF / app.py
awacke1's picture
Create app.py
25d433d verified
raw
history blame
4.11 kB
import streamlit as st
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pdfkit
import time
import os
from pathlib import Path
class StreamlitPDFCapture:
def __init__(self):
self.options = {
'page-size': 'A4',
'margin-top': '0mm',
'margin-right': '0mm',
'margin-bottom': '0mm',
'margin-left': '0mm',
'encoding': 'UTF-8',
'custom-header': [('Accept-Encoding', 'gzip')],
'no-outline': None,
'enable-local-file-access': None
}
def setup_chrome_driver(self):
"""Setup Chrome driver with appropriate options."""
chrome_options = Options()
chrome_options.add_argument("--headless") # Run in headless mode
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
return webdriver.Chrome(options=chrome_options)
def capture_page(self, url, output_path, wait_time=5):
"""Capture a single Streamlit page as PDF."""
driver = self.setup_chrome_driver()
try:
# Navigate to page
driver.get(url)
time.sleep(wait_time) # Wait for page to fully load
# Get page height and set window size
height = driver.execute_script("return document.body.scrollHeight")
driver.set_window_size(1920, height + 100)
# Save as PDF using pdfkit
html_content = driver.page_source
pdfkit.from_string(html_content, output_path, options=self.options)
finally:
driver.quit()
def capture_multiple_pages(self, urls, output_dir, prefix="page"):
"""Capture multiple Streamlit pages as separate PDFs."""
Path(output_dir).mkdir(parents=True, exist_ok=True)
pdfs = []
for i, url in enumerate(urls):
output_path = os.path.join(output_dir, f"{prefix}_{i+1}.pdf")
self.capture_page(url, output_path)
pdfs.append(output_path)
return pdfs
def add_pdf_download_button():
"""Add a PDF download button to your Streamlit app."""
if st.button("πŸ“‘ Download as PDF"):
with st.spinner("Generating PDF..."):
# Get current page URL
ctx = st.runtime.get_instance()
url = ctx.serverAddress if hasattr(ctx, 'serverAddress') else "http://localhost:8501"
# Initialize capture utility
pdf_capture = StreamlitPDFCapture()
# Generate PDF
output_path = "streamlit_page.pdf"
pdf_capture.capture_page(url, output_path)
# Provide download link
with open(output_path, "rb") as pdf_file:
pdfd = pdf_file.read()
st.download_button(
label="⬇️ Download PDF",
data=pdfd,
file_name="streamlit_page.pdf",
mime="application/pdf"
)
# Example usage in your Streamlit app:
if __name__ == "__main__":
st.title("Streamlit PDF Capture Demo")
# Add the PDF download button
add_pdf_download_button()
# Your regular Streamlit content here
st.write("This is a demo of PDF capture functionality")
# For multiple pages
if st.checkbox("Capture multiple pages"):
urls = [
"http://localhost:8501/page1",
"http://localhost:8501/page2"
]
if st.button("Generate PDFs for all pages"):
pdf_capture = StreamlitPDFCapture()
pdfs = pdf_capture.capture_multiple_pages(urls, "output_pdfs")
st.success(f"Generated {len(pdfs)} PDFs in output_pdfs directory")