pdf-compressor / app.py
bluenevus's picture
Update app.py
39ae163 verified
raw
history blame
4.33 kB
import gradio as gr
import PyPDF2
import requests
import io
import tempfile
import sys
import os
def compress_pdf(input_file, url, strength):
if input_file is None and (url is None or url.strip() == ""):
return None, "Please provide either a file or a URL."
if input_file is not None and url and url.strip() != "":
return None, "Please provide either a file or a URL, not both."
try:
if url and url.strip() != "":
response = requests.get(url)
response.raise_for_status()
pdf_content = io.BytesIO(response.content)
initial_size = len(response.content)
else:
if hasattr(input_file, 'name'):
# If input_file is a file path
with open(input_file.name, 'rb') as file:
pdf_content = io.BytesIO(file.read())
initial_size = os.path.getsize(input_file.name)
else:
# If input_file is file-like object
pdf_content = io.BytesIO(input_file.read())
pdf_content.seek(0, io.SEEK_END)
initial_size = pdf_content.tell()
pdf_content.seek(0)
reader = PyPDF2.PdfReader(pdf_content)
writer = PyPDF2.PdfWriter()
if strength == "Low":
target_ratio = 0.75 # 25% compression
elif strength == "Medium":
target_ratio = 0.50 # 50% compression
else: # High
target_ratio = 0.25 # 75% compression
# First pass: apply basic compression
for page in reader.pages:
page.compress_content_streams() # Apply content stream compression
writer.add_page(page)
# Set compression parameters
writer.compress = True
writer._compress_streams = True
writer._compress_pages = True
# Write the compressed PDF to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
writer.write(temp_file)
temp_file_path = temp_file.name
# Check the compression ratio achieved
compressed_size = os.path.getsize(temp_file_path)
current_ratio = compressed_size / initial_size
# If we haven't reached the target ratio, apply additional compression
if current_ratio > target_ratio:
reader = PyPDF2.PdfReader(temp_file_path)
writer = PyPDF2.PdfWriter()
for page in reader.pages:
# Apply more aggressive compression
page.compress_content_streams()
writer.add_page(page)
writer.compress = True
writer._compress_streams = True
writer._compress_pages = True
# Overwrite the temporary file with the more compressed version
with open(temp_file_path, 'wb') as temp_file:
writer.write(temp_file)
# Final compression ratio
final_size = os.path.getsize(temp_file_path)
final_ratio = final_size / initial_size
compression_percentage = (1 - final_ratio) * 100
return temp_file_path, f"PDF compressed successfully! Compression achieved: {compression_percentage:.2f}%"
except Exception as e:
return None, f"Error compressing PDF: {str(e)}"
# The rest of the code remains the same
def process_and_compress(input_file, url, strength):
sys.setrecursionlimit(10000)
output_file, message = compress_pdf(input_file, url, strength)
if output_file:
return output_file, message
else:
return None, message
with gr.Blocks() as demo:
gr.Markdown("# PDF Compressor")
with gr.Row():
input_file = gr.File(label="Upload PDF")
url_input = gr.Textbox(label="Or enter PDF URL")
strength = gr.Radio(["Low", "Medium", "High"], label="Compression Strength", value="Medium", info="Low: ~25% compression, Medium: ~50% compression, High: ~75% compression")
compress_btn = gr.Button("Compress")
output_file = gr.File(label="Download Compressed PDF")
message = gr.Textbox(label="Message")
compress_btn.click(
process_and_compress,
inputs=[input_file, url_input, strength],
outputs=[output_file, message]
)
if __name__ == "__main__":
demo.launch(share=True)