PDF-resizer

Sleeping

File size: 3,391 Bytes

3b2d5b2
a1622dd
49acc9d
 
 
 
f8f9bae
 
 
 
 
 
 
 
 
 
 
 
 
0787dc4
 
5b36d3d
f8f9bae
a1622dd
49acc9d
fe2e131
0787dc4
49acc9d
 
0787dc4
49acc9d
0787dc4
 
49acc9d
 
 
0787dc4
 
49acc9d
 
 
 
 
fe2e131
 
 
5b36d3d
 
49acc9d
 
fe2e131
 
 
 
5b36d3d
0787dc4
 
f8f9bae
0787dc4
 
 
 
 
 
 
 
 
f8f9bae
0787dc4
5b36d3d
49acc9d
 
 
 
 
 
 
 
 
 
 
0787dc4
49acc9d
 
 
 
f8f9bae
49acc9d
3b2d5b2

import gradio as gr
import tempfile
from pdf2image import convert_from_path
from PIL import Image
import img2pdf
import os
import shutil
import PyPDF2

def get_pdf_page_sizes(pdf_path):
    sizes = []
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for i, page in enumerate(reader.pages):
            mediabox = page.mediabox
            width_pt = float(mediabox.width)
            height_pt = float(mediabox.height)
            width_mm = width_pt * 25.4 / 72
            height_mm = height_pt * 25.4 / 72
            sizes.append((i, width_pt, height_pt, width_mm, height_mm))
    return sizes

def downscale_pdf(pdf_file, dpi=150, max_width=1500):
    with tempfile.TemporaryDirectory() as tmpdir:
        images = convert_from_path(pdf_file.name, dpi=dpi, output_folder=tmpdir)
        if not images:
            raise ValueError("PDFを画像に変換できませんでした。")

        downscaled_paths = []
        pixel_info = []  # 画像のピクセル情報を記録
        for i, img in enumerate(images):
            original_size = img.size
            w, h = original_size
            if w > max_width:
                new_h = int(h * max_width / w)
                img = img.resize((max_width, new_h), Image.LANCZOS)
            resized_size = img.size
            pixel_info.append((i, resized_size[0], resized_size[1]))

            img_path = os.path.join(tmpdir, f"page_{i}.jpg")
            img.save(img_path, "JPEG", quality=85)
            downscaled_paths.append(img_path)

        if not downscaled_paths:
            raise ValueError("変換後の画像が見つかりません。")

        tmp_output_path = os.path.join(tmpdir, "downscaled.pdf")
        with open(tmp_output_path, "wb") as f_out:
            f_out.write(img2pdf.convert(downscaled_paths))

        result_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
        with open(tmp_output_path, "rb") as src:
            shutil.copyfileobj(src, result_file)
        result_file.close()

    # サイズ情報（PDF）を取得
    pdf_sizes = get_pdf_page_sizes(result_file.name)

    # テキストとして合成
    info_lines = []
    for i, width_px, height_px in pixel_info:
        width_pt, height_pt, width_mm, height_mm = pdf_sizes[i][1:]
        info_lines.append(
            f"Page {i+1}:\n"
            f"  - Pixel size : {width_px} x {height_px} px\n"
            f"  - PDF size   : {width_pt:.1f}pt x {height_pt:.1f}pt (~{width_mm:.1f}mm x {height_mm:.1f}mm)\n"
        )

    return result_file.name, "\n".join(info_lines)

with gr.Blocks() as demo:
    gr.Markdown("# PDF 解像度ダウンサイザー\nPDFのページ画像を縮小して容量を減らします。")
    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(label="入力PDF", file_types=[".pdf"])
            dpi_input = gr.Slider(72, 300, value=150, step=1, label="変換DPI")
            width_input = gr.Slider(500, 3000, value=1500, step=50, label="最大幅(px)")
            convert_button = gr.Button("変換")

        with gr.Column():
            pdf_output = gr.File(label="出力PDF")
            size_output = gr.Textbox(label="出力PDFの詳細", lines=12)

    convert_button.click(
        fn=downscale_pdf,
        inputs=[pdf_input, dpi_input, width_input],
        outputs=[pdf_output, size_output]
    )

demo.launch()