Spaces:

MicroHealth
/

pdf-compressor

Paused

File size: 3,768 Bytes

import gradio as gr
import PyPDF2
import requests
import io
import tempfile
import sys
from PIL import Image

def compress_image(image, quality):
    img_buffer = io.BytesIO()
    image.save(img_buffer, format='JPEG', quality=quality)
    img_buffer.seek(0)
    return img_buffer

def compress_pdf(input_file, url, strength):
    if input_file is None and (url is None or url.strip() == ""):
        return None, "Please provide either a file or a URL."
    
    if input_file is not None and url and url.strip() != "":
        return None, "Please provide either a file or a URL, not both."
    
    if url and url.strip() != "":
        try:
            response = requests.get(url)
            response.raise_for_status()
            pdf_content = io.BytesIO(response.content)
        except requests.RequestException as e:
            return None, f"Error downloading PDF: {str(e)}"
    else:
        pdf_content = input_file

    try:
        reader = PyPDF2.PdfReader(pdf_content)
        writer = PyPDF2.PdfWriter()

        if strength == "Low":
            image_quality = 65
            compression_level = 1
        elif strength == "Medium":
            image_quality = 40
            compression_level = 2
        else:  # High
            image_quality = 20
            compression_level = 3

        for page in reader.pages:
            page.compress_content_streams()  # Apply content stream compression
            
            # Compress images on the page
            for img_index, img in enumerate(page.images):
                if img.image is not None:
                    try:
                        pil_image = Image.open(io.BytesIO(img.image))
                        compressed_image = compress_image(pil_image, image_quality)
                        page.replace_image(img_index, compressed_image)
                    except Exception as e:
                        print(f"Error compressing image: {e}")

            writer.add_page(page)

        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
            writer.write(temp_file)
            temp_file_path = temp_file.name

        # Apply additional compression using PyPDF2's built-in compression
        with open(temp_file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            writer = PyPDF2.PdfWriter()
            
            for page in reader.pages:
                page.compress_content_streams()
                writer.add_page(page)
            
            writer.add_metadata(reader.metadata)
            
            with open(temp_file_path, 'wb') as output_file:
                writer.write(output_file)

        return temp_file_path, "PDF compressed successfully!"
    except Exception as e:
        return None, f"Error compressing PDF: {str(e)}"

def process_and_compress(input_file, url, strength):
    sys.setrecursionlimit(10000)
    
    output_file, message = compress_pdf(input_file, url, strength)
    if output_file:
        return output_file, message
    else:
        return None, message

with gr.Blocks() as demo:
    gr.Markdown("# PDF Compressor")
    with gr.Row():
        input_file = gr.File(label="Upload PDF")
        url_input = gr.Textbox(label="Or enter PDF URL")
    strength = gr.Radio(["Low", "Medium", "High"], label="Compression Strength", value="Medium", info="Low: ~25% compression, Medium: ~50% compression, High: ~75% compression")
    compress_btn = gr.Button("Compress")
    output_file = gr.File(label="Download Compressed PDF")
    message = gr.Textbox(label="Message")

    compress_btn.click(
        process_and_compress,
        inputs=[input_file, url_input, strength],
        outputs=[output_file, message]
    )

if __name__ == "__main__":
    demo.launch(share=True)