File size: 4,108 Bytes
25d433d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import streamlit as st
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pdfkit
import time
import os
from pathlib import Path

class StreamlitPDFCapture:
    def __init__(self):
        self.options = {
            'page-size': 'A4',
            'margin-top': '0mm',
            'margin-right': '0mm',
            'margin-bottom': '0mm',
            'margin-left': '0mm',
            'encoding': 'UTF-8',
            'custom-header': [('Accept-Encoding', 'gzip')],
            'no-outline': None,
            'enable-local-file-access': None
        }
        
    def setup_chrome_driver(self):
        """Setup Chrome driver with appropriate options."""
        chrome_options = Options()
        chrome_options.add_argument("--headless")  # Run in headless mode
        chrome_options.add_argument("--window-size=1920,1080")
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--no-sandbox")
        return webdriver.Chrome(options=chrome_options)
        
    def capture_page(self, url, output_path, wait_time=5):
        """Capture a single Streamlit page as PDF."""
        driver = self.setup_chrome_driver()
        try:
            # Navigate to page
            driver.get(url)
            time.sleep(wait_time)  # Wait for page to fully load
            
            # Get page height and set window size
            height = driver.execute_script("return document.body.scrollHeight")
            driver.set_window_size(1920, height + 100)
            
            # Save as PDF using pdfkit
            html_content = driver.page_source
            pdfkit.from_string(html_content, output_path, options=self.options)
            
        finally:
            driver.quit()
            
    def capture_multiple_pages(self, urls, output_dir, prefix="page"):
        """Capture multiple Streamlit pages as separate PDFs."""
        Path(output_dir).mkdir(parents=True, exist_ok=True)
        pdfs = []
        
        for i, url in enumerate(urls):
            output_path = os.path.join(output_dir, f"{prefix}_{i+1}.pdf")
            self.capture_page(url, output_path)
            pdfs.append(output_path)
            
        return pdfs

def add_pdf_download_button():
    """Add a PDF download button to your Streamlit app."""
    if st.button("📑 Download as PDF"):
        with st.spinner("Generating PDF..."):
            # Get current page URL
            ctx = st.runtime.get_instance()
            url = ctx.serverAddress if hasattr(ctx, 'serverAddress') else "http://localhost:8501"
            
            # Initialize capture utility
            pdf_capture = StreamlitPDFCapture()
            
            # Generate PDF
            output_path = "streamlit_page.pdf"
            pdf_capture.capture_page(url, output_path)
            
            # Provide download link
            with open(output_path, "rb") as pdf_file:
                pdfd = pdf_file.read()
                st.download_button(
                    label="⬇️ Download PDF",
                    data=pdfd,
                    file_name="streamlit_page.pdf",
                    mime="application/pdf"
                )

# Example usage in your Streamlit app:
if __name__ == "__main__":
    st.title("Streamlit PDF Capture Demo")
    
    # Add the PDF download button
    add_pdf_download_button()
    
    # Your regular Streamlit content here
    st.write("This is a demo of PDF capture functionality")
    
    # For multiple pages
    if st.checkbox("Capture multiple pages"):
        urls = [
            "http://localhost:8501/page1",
            "http://localhost:8501/page2"
        ]
        if st.button("Generate PDFs for all pages"):
            pdf_capture = StreamlitPDFCapture()
            pdfs = pdf_capture.capture_multiple_pages(urls, "output_pdfs")
            st.success(f"Generated {len(pdfs)} PDFs in output_pdfs directory")