PDF-resizer / app.py
soiz1's picture
Update app.py
0787dc4 verified
raw
history blame
3.39 kB
import gradio as gr
import tempfile
from pdf2image import convert_from_path
from PIL import Image
import img2pdf
import os
import shutil
import PyPDF2
def get_pdf_page_sizes(pdf_path):
sizes = []
with open(pdf_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
for i, page in enumerate(reader.pages):
mediabox = page.mediabox
width_pt = float(mediabox.width)
height_pt = float(mediabox.height)
width_mm = width_pt * 25.4 / 72
height_mm = height_pt * 25.4 / 72
sizes.append((i, width_pt, height_pt, width_mm, height_mm))
return sizes
def downscale_pdf(pdf_file, dpi=150, max_width=1500):
with tempfile.TemporaryDirectory() as tmpdir:
images = convert_from_path(pdf_file.name, dpi=dpi, output_folder=tmpdir)
if not images:
raise ValueError("PDFを画像に変換できませんでした。")
downscaled_paths = []
pixel_info = [] # 画像のピクセル情報を記録
for i, img in enumerate(images):
original_size = img.size
w, h = original_size
if w > max_width:
new_h = int(h * max_width / w)
img = img.resize((max_width, new_h), Image.LANCZOS)
resized_size = img.size
pixel_info.append((i, resized_size[0], resized_size[1]))
img_path = os.path.join(tmpdir, f"page_{i}.jpg")
img.save(img_path, "JPEG", quality=85)
downscaled_paths.append(img_path)
if not downscaled_paths:
raise ValueError("変換後の画像が見つかりません。")
tmp_output_path = os.path.join(tmpdir, "downscaled.pdf")
with open(tmp_output_path, "wb") as f_out:
f_out.write(img2pdf.convert(downscaled_paths))
result_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
with open(tmp_output_path, "rb") as src:
shutil.copyfileobj(src, result_file)
result_file.close()
# サイズ情報(PDF)を取得
pdf_sizes = get_pdf_page_sizes(result_file.name)
# テキストとして合成
info_lines = []
for i, width_px, height_px in pixel_info:
width_pt, height_pt, width_mm, height_mm = pdf_sizes[i][1:]
info_lines.append(
f"Page {i+1}:\n"
f" - Pixel size : {width_px} x {height_px} px\n"
f" - PDF size : {width_pt:.1f}pt x {height_pt:.1f}pt (~{width_mm:.1f}mm x {height_mm:.1f}mm)\n"
)
return result_file.name, "\n".join(info_lines)
with gr.Blocks() as demo:
gr.Markdown("# PDF 解像度ダウンサイザー\nPDFのページ画像を縮小して容量を減らします。")
with gr.Row():
with gr.Column():
pdf_input = gr.File(label="入力PDF", file_types=[".pdf"])
dpi_input = gr.Slider(72, 300, value=150, step=1, label="変換DPI")
width_input = gr.Slider(500, 3000, value=1500, step=50, label="最大幅(px)")
convert_button = gr.Button("変換")
with gr.Column():
pdf_output = gr.File(label="出力PDF")
size_output = gr.Textbox(label="出力PDFの詳細", lines=12)
convert_button.click(
fn=downscale_pdf,
inputs=[pdf_input, dpi_input, width_input],
outputs=[pdf_output, size_output]
)
demo.launch()