Spaces:

awacke1
/

SeleniumPDF

Build error

App Files Files Community

SeleniumPDF / app.py

awacke1

Create app.py

25d433d verified 5 months ago

raw

history blame

4.11 kB

	import streamlit as st
	from selenium import webdriver
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	import pdfkit
	import time
	import os
	from pathlib import Path

	class StreamlitPDFCapture:
	def __init__(self):
	self.options = {
	'page-size': 'A4',
	'margin-top': '0mm',
	'margin-right': '0mm',
	'margin-bottom': '0mm',
	'margin-left': '0mm',
	'encoding': 'UTF-8',
	'custom-header': [('Accept-Encoding', 'gzip')],
	'no-outline': None,
	'enable-local-file-access': None
	}

	def setup_chrome_driver(self):
	"""Setup Chrome driver with appropriate options."""
	chrome_options = Options()
	chrome_options.add_argument("--headless") # Run in headless mode
	chrome_options.add_argument("--window-size=1920,1080")
	chrome_options.add_argument("--disable-gpu")
	chrome_options.add_argument("--no-sandbox")
	return webdriver.Chrome(options=chrome_options)

	def capture_page(self, url, output_path, wait_time=5):
	"""Capture a single Streamlit page as PDF."""
	driver = self.setup_chrome_driver()
	try:
	# Navigate to page
	driver.get(url)
	time.sleep(wait_time) # Wait for page to fully load

	# Get page height and set window size
	height = driver.execute_script("return document.body.scrollHeight")
	driver.set_window_size(1920, height + 100)

	# Save as PDF using pdfkit
	html_content = driver.page_source
	pdfkit.from_string(html_content, output_path, options=self.options)

	finally:
	driver.quit()

	def capture_multiple_pages(self, urls, output_dir, prefix="page"):
	"""Capture multiple Streamlit pages as separate PDFs."""
	Path(output_dir).mkdir(parents=True, exist_ok=True)
	pdfs = []

	for i, url in enumerate(urls):
	output_path = os.path.join(output_dir, f"{prefix}_{i+1}.pdf")
	self.capture_page(url, output_path)
	pdfs.append(output_path)

	return pdfs

	def add_pdf_download_button():
	"""Add a PDF download button to your Streamlit app."""
	if st.button("📑 Download as PDF"):
	with st.spinner("Generating PDF..."):
	# Get current page URL
	ctx = st.runtime.get_instance()
	url = ctx.serverAddress if hasattr(ctx, 'serverAddress') else "http://localhost:8501"

	# Initialize capture utility
	pdf_capture = StreamlitPDFCapture()

	# Generate PDF
	output_path = "streamlit_page.pdf"
	pdf_capture.capture_page(url, output_path)

	# Provide download link
	with open(output_path, "rb") as pdf_file:
	pdfd = pdf_file.read()
	st.download_button(
	label="⬇️ Download PDF",
	data=pdfd,
	file_name="streamlit_page.pdf",
	mime="application/pdf"
	)

	# Example usage in your Streamlit app:
	if __name__ == "__main__":
	st.title("Streamlit PDF Capture Demo")

	# Add the PDF download button
	add_pdf_download_button()

	# Your regular Streamlit content here
	st.write("This is a demo of PDF capture functionality")

	# For multiple pages
	if st.checkbox("Capture multiple pages"):
	urls = [
	"http://localhost:8501/page1",
	"http://localhost:8501/page2"
	]
	if st.button("Generate PDFs for all pages"):
	pdf_capture = StreamlitPDFCapture()
	pdfs = pdf_capture.capture_multiple_pages(urls, "output_pdfs")
	st.success(f"Generated {len(pdfs)} PDFs in output_pdfs directory")