Spaces:
Paused
Paused
import gradio as gr | |
import PyPDF2 | |
import requests | |
import io | |
import tempfile | |
import sys | |
from PIL import Image | |
def compress_image(image, quality): | |
img_buffer = io.BytesIO() | |
image.save(img_buffer, format='JPEG', quality=quality) | |
img_buffer.seek(0) | |
return img_buffer | |
def compress_pdf(input_file, url, strength): | |
if input_file is None and (url is None or url.strip() == ""): | |
return None, "Please provide either a file or a URL." | |
if input_file is not None and url and url.strip() != "": | |
return None, "Please provide either a file or a URL, not both." | |
if url and url.strip() != "": | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
pdf_content = io.BytesIO(response.content) | |
except requests.RequestException as e: | |
return None, f"Error downloading PDF: {str(e)}" | |
else: | |
pdf_content = input_file | |
try: | |
reader = PyPDF2.PdfReader(pdf_content) | |
writer = PyPDF2.PdfWriter() | |
if strength == "Low": | |
image_quality = 65 | |
compression_level = 1 | |
elif strength == "Medium": | |
image_quality = 40 | |
compression_level = 2 | |
else: # High | |
image_quality = 20 | |
compression_level = 3 | |
for page in reader.pages: | |
page.compress_content_streams() # Apply content stream compression | |
# Compress images on the page | |
for img_index, img in enumerate(page.images): | |
if img.image is not None: | |
try: | |
pil_image = Image.open(io.BytesIO(img.image)) | |
compressed_image = compress_image(pil_image, image_quality) | |
page.replace_image(img_index, compressed_image) | |
except Exception as e: | |
print(f"Error compressing image: {e}") | |
writer.add_page(page) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: | |
writer.write(temp_file) | |
temp_file_path = temp_file.name | |
# Apply additional compression using PyPDF2's built-in compression | |
with open(temp_file_path, 'rb') as file: | |
reader = PyPDF2.PdfReader(file) | |
writer = PyPDF2.PdfWriter() | |
for page in reader.pages: | |
page.compress_content_streams() | |
writer.add_page(page) | |
writer.add_metadata(reader.metadata) | |
with open(temp_file_path, 'wb') as output_file: | |
writer.write(output_file) | |
return temp_file_path, "PDF compressed successfully!" | |
except Exception as e: | |
return None, f"Error compressing PDF: {str(e)}" | |
def process_and_compress(input_file, url, strength): | |
sys.setrecursionlimit(10000) | |
output_file, message = compress_pdf(input_file, url, strength) | |
if output_file: | |
return output_file, message | |
else: | |
return None, message | |
with gr.Blocks() as demo: | |
gr.Markdown("# PDF Compressor") | |
with gr.Row(): | |
input_file = gr.File(label="Upload PDF") | |
url_input = gr.Textbox(label="Or enter PDF URL") | |
strength = gr.Radio(["Low", "Medium", "High"], label="Compression Strength", value="Medium", info="Low: ~25% compression, Medium: ~50% compression, High: ~75% compression") | |
compress_btn = gr.Button("Compress") | |
output_file = gr.File(label="Download Compressed PDF") | |
message = gr.Textbox(label="Message") | |
compress_btn.click( | |
process_and_compress, | |
inputs=[input_file, url_input, strength], | |
outputs=[output_file, message] | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) |