File size: 3,090 Bytes
3b2d5b2
a1622dd
49acc9d
 
 
 
f8f9bae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b2d5b2
5b36d3d
f8f9bae
a1622dd
5b36d3d
49acc9d
fe2e131
 
49acc9d
 
 
 
 
 
 
 
 
 
 
 
fe2e131
 
 
5b36d3d
 
 
49acc9d
 
fe2e131
 
 
 
 
5b36d3d
f8f9bae
 
 
 
 
5b36d3d
49acc9d
 
 
 
 
 
 
 
 
 
 
f8f9bae
49acc9d
 
 
 
f8f9bae
49acc9d
3b2d5b2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
import tempfile
from pdf2image import convert_from_path
from PIL import Image
import img2pdf
import os
import shutil
import PyPDF2

def get_pdf_page_sizes(pdf_path):
    sizes = []
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for i, page in enumerate(reader.pages):
            mediabox = page.mediabox
            width_pt = float(mediabox.width)
            height_pt = float(mediabox.height)
            # 1ポイント = 1/72インチ
            width_mm = width_pt * 25.4 / 72
            height_mm = height_pt * 25.4 / 72
            sizes.append(f"Page {i+1}: {width_pt:.1f}pt x {height_pt:.1f}pt  (~{width_mm:.1f}mm x {height_mm:.1f}mm)")
    return "\n".join(sizes)


def downscale_pdf(pdf_file, dpi=150, max_width=1500):
    with tempfile.TemporaryDirectory() as tmpdir:
        # ステップ 1: PDF → 画像
        images = convert_from_path(pdf_file.name, dpi=dpi, output_folder=tmpdir)
        if not images:
            raise ValueError("PDFを画像に変換できませんでした。PDFが空か、Popplerが入っていないかもしれません。")

        downscaled_paths = []
        for i, img in enumerate(images):
            w, h = img.size
            if w > max_width:
                new_h = int(h * max_width / w)
                img = img.resize((max_width, new_h), Image.LANCZOS)

            img_path = os.path.join(tmpdir, f"page_{i}.jpg")
            img.save(img_path, "JPEG", quality=85)
            downscaled_paths.append(img_path)

        if not downscaled_paths:
            raise ValueError("変換後の画像が見つかりません。")

        # ステップ 2: 画像 → PDF
        tmp_output_path = os.path.join(tmpdir, "downscaled.pdf")
        with open(tmp_output_path, "wb") as f_out:
            f_out.write(img2pdf.convert(downscaled_paths))

        # ここでtmpdirは消えるので、ファイルを残す
        result_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
        with open(tmp_output_path, "rb") as src:
            shutil.copyfileobj(src, result_file)
        result_file.close()

    # サイズ情報を取得
    page_sizes_info = get_pdf_page_sizes(result_file.name)

    return result_file.name, page_sizes_info


with gr.Blocks() as demo:
    gr.Markdown("# PDF 解像度ダウンサイザー\nPDFのページ画像を縮小して容量を減らします。")
    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(label="入力PDF", file_types=[".pdf"])
            dpi_input = gr.Slider(72, 300, value=150, step=1, label="変換DPI")
            width_input = gr.Slider(500, 3000, value=1500, step=50, label="最大幅(px)")
            convert_button = gr.Button("変換")

        with gr.Column():
            pdf_output = gr.File(label="出力PDF")
            size_output = gr.Textbox(label="ページサイズ情報", lines=10)

    convert_button.click(
        fn=downscale_pdf,
        inputs=[pdf_input, dpi_input, width_input],
        outputs=[pdf_output, size_output]
    )

demo.launch()