SeleniumPDF / app.py
awacke1's picture
Create app.py
25d433d verified
import streamlit as st
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pdfkit
import time
import os
from pathlib import Path
class StreamlitPDFCapture:
def __init__(self):
self.options = {
'page-size': 'A4',
'margin-top': '0mm',
'margin-right': '0mm',
'margin-bottom': '0mm',
'margin-left': '0mm',
'encoding': 'UTF-8',
'custom-header': [('Accept-Encoding', 'gzip')],
'no-outline': None,
'enable-local-file-access': None
}
def setup_chrome_driver(self):
"""Setup Chrome driver with appropriate options."""
chrome_options = Options()
chrome_options.add_argument("--headless") # Run in headless mode
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
return webdriver.Chrome(options=chrome_options)
def capture_page(self, url, output_path, wait_time=5):
"""Capture a single Streamlit page as PDF."""
driver = self.setup_chrome_driver()
try:
# Navigate to page
driver.get(url)
time.sleep(wait_time) # Wait for page to fully load
# Get page height and set window size
height = driver.execute_script("return document.body.scrollHeight")
driver.set_window_size(1920, height + 100)
# Save as PDF using pdfkit
html_content = driver.page_source
pdfkit.from_string(html_content, output_path, options=self.options)
finally:
driver.quit()
def capture_multiple_pages(self, urls, output_dir, prefix="page"):
"""Capture multiple Streamlit pages as separate PDFs."""
Path(output_dir).mkdir(parents=True, exist_ok=True)
pdfs = []
for i, url in enumerate(urls):
output_path = os.path.join(output_dir, f"{prefix}_{i+1}.pdf")
self.capture_page(url, output_path)
pdfs.append(output_path)
return pdfs
def add_pdf_download_button():
"""Add a PDF download button to your Streamlit app."""
if st.button("πŸ“‘ Download as PDF"):
with st.spinner("Generating PDF..."):
# Get current page URL
ctx = st.runtime.get_instance()
url = ctx.serverAddress if hasattr(ctx, 'serverAddress') else "http://localhost:8501"
# Initialize capture utility
pdf_capture = StreamlitPDFCapture()
# Generate PDF
output_path = "streamlit_page.pdf"
pdf_capture.capture_page(url, output_path)
# Provide download link
with open(output_path, "rb") as pdf_file:
pdfd = pdf_file.read()
st.download_button(
label="⬇️ Download PDF",
data=pdfd,
file_name="streamlit_page.pdf",
mime="application/pdf"
)
# Example usage in your Streamlit app:
if __name__ == "__main__":
st.title("Streamlit PDF Capture Demo")
# Add the PDF download button
add_pdf_download_button()
# Your regular Streamlit content here
st.write("This is a demo of PDF capture functionality")
# For multiple pages
if st.checkbox("Capture multiple pages"):
urls = [
"http://localhost:8501/page1",
"http://localhost:8501/page2"
]
if st.button("Generate PDFs for all pages"):
pdf_capture = StreamlitPDFCapture()
pdfs = pdf_capture.capture_multiple_pages(urls, "output_pdfs")
st.success(f"Generated {len(pdfs)} PDFs in output_pdfs directory")