Spaces:

awacke1
/

SeleniumPDF

Build error

File size: 4,108 Bytes

25d433d

import streamlit as st
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pdfkit
import time
import os
from pathlib import Path

class StreamlitPDFCapture:
    def __init__(self):
        self.options = {
            'page-size': 'A4',
            'margin-top': '0mm',
            'margin-right': '0mm',
            'margin-bottom': '0mm',
            'margin-left': '0mm',
            'encoding': 'UTF-8',
            'custom-header': [('Accept-Encoding', 'gzip')],
            'no-outline': None,
            'enable-local-file-access': None
        }
        
    def setup_chrome_driver(self):
        """Setup Chrome driver with appropriate options."""
        chrome_options = Options()
        chrome_options.add_argument("--headless")  # Run in headless mode
        chrome_options.add_argument("--window-size=1920,1080")
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--no-sandbox")
        return webdriver.Chrome(options=chrome_options)
        
    def capture_page(self, url, output_path, wait_time=5):
        """Capture a single Streamlit page as PDF."""
        driver = self.setup_chrome_driver()
        try:
            # Navigate to page
            driver.get(url)
            time.sleep(wait_time)  # Wait for page to fully load
            
            # Get page height and set window size
            height = driver.execute_script("return document.body.scrollHeight")
            driver.set_window_size(1920, height + 100)
            
            # Save as PDF using pdfkit
            html_content = driver.page_source
            pdfkit.from_string(html_content, output_path, options=self.options)
            
        finally:
            driver.quit()
            
    def capture_multiple_pages(self, urls, output_dir, prefix="page"):
        """Capture multiple Streamlit pages as separate PDFs."""
        Path(output_dir).mkdir(parents=True, exist_ok=True)
        pdfs = []
        
        for i, url in enumerate(urls):
            output_path = os.path.join(output_dir, f"{prefix}_{i+1}.pdf")
            self.capture_page(url, output_path)
            pdfs.append(output_path)
            
        return pdfs

def add_pdf_download_button():
    """Add a PDF download button to your Streamlit app."""
    if st.button("📑 Download as PDF"):
        with st.spinner("Generating PDF..."):
            # Get current page URL
            ctx = st.runtime.get_instance()
            url = ctx.serverAddress if hasattr(ctx, 'serverAddress') else "http://localhost:8501"
            
            # Initialize capture utility
            pdf_capture = StreamlitPDFCapture()
            
            # Generate PDF
            output_path = "streamlit_page.pdf"
            pdf_capture.capture_page(url, output_path)
            
            # Provide download link
            with open(output_path, "rb") as pdf_file:
                pdfd = pdf_file.read()
                st.download_button(
                    label="⬇️ Download PDF",
                    data=pdfd,
                    file_name="streamlit_page.pdf",
                    mime="application/pdf"
                )

# Example usage in your Streamlit app:
if __name__ == "__main__":
    st.title("Streamlit PDF Capture Demo")
    
    # Add the PDF download button
    add_pdf_download_button()
    
    # Your regular Streamlit content here
    st.write("This is a demo of PDF capture functionality")
    
    # For multiple pages
    if st.checkbox("Capture multiple pages"):
        urls = [
            "http://localhost:8501/page1",
            "http://localhost:8501/page2"
        ]
        if st.button("Generate PDFs for all pages"):
            pdf_capture = StreamlitPDFCapture()
            pdfs = pdf_capture.capture_multiple_pages(urls, "output_pdfs")
            st.success(f"Generated {len(pdfs)} PDFs in output_pdfs directory")