Spaces:
Sleeping
Sleeping
File size: 3,391 Bytes
3b2d5b2 a1622dd 49acc9d f8f9bae 0787dc4 5b36d3d f8f9bae a1622dd 49acc9d fe2e131 0787dc4 49acc9d 0787dc4 49acc9d 0787dc4 49acc9d 0787dc4 49acc9d fe2e131 5b36d3d 49acc9d fe2e131 5b36d3d 0787dc4 f8f9bae 0787dc4 f8f9bae 0787dc4 5b36d3d 49acc9d 0787dc4 49acc9d f8f9bae 49acc9d 3b2d5b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
import tempfile
from pdf2image import convert_from_path
from PIL import Image
import img2pdf
import os
import shutil
import PyPDF2
def get_pdf_page_sizes(pdf_path):
sizes = []
with open(pdf_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
for i, page in enumerate(reader.pages):
mediabox = page.mediabox
width_pt = float(mediabox.width)
height_pt = float(mediabox.height)
width_mm = width_pt * 25.4 / 72
height_mm = height_pt * 25.4 / 72
sizes.append((i, width_pt, height_pt, width_mm, height_mm))
return sizes
def downscale_pdf(pdf_file, dpi=150, max_width=1500):
with tempfile.TemporaryDirectory() as tmpdir:
images = convert_from_path(pdf_file.name, dpi=dpi, output_folder=tmpdir)
if not images:
raise ValueError("PDFを画像に変換できませんでした。")
downscaled_paths = []
pixel_info = [] # 画像のピクセル情報を記録
for i, img in enumerate(images):
original_size = img.size
w, h = original_size
if w > max_width:
new_h = int(h * max_width / w)
img = img.resize((max_width, new_h), Image.LANCZOS)
resized_size = img.size
pixel_info.append((i, resized_size[0], resized_size[1]))
img_path = os.path.join(tmpdir, f"page_{i}.jpg")
img.save(img_path, "JPEG", quality=85)
downscaled_paths.append(img_path)
if not downscaled_paths:
raise ValueError("変換後の画像が見つかりません。")
tmp_output_path = os.path.join(tmpdir, "downscaled.pdf")
with open(tmp_output_path, "wb") as f_out:
f_out.write(img2pdf.convert(downscaled_paths))
result_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
with open(tmp_output_path, "rb") as src:
shutil.copyfileobj(src, result_file)
result_file.close()
# サイズ情報(PDF)を取得
pdf_sizes = get_pdf_page_sizes(result_file.name)
# テキストとして合成
info_lines = []
for i, width_px, height_px in pixel_info:
width_pt, height_pt, width_mm, height_mm = pdf_sizes[i][1:]
info_lines.append(
f"Page {i+1}:\n"
f" - Pixel size : {width_px} x {height_px} px\n"
f" - PDF size : {width_pt:.1f}pt x {height_pt:.1f}pt (~{width_mm:.1f}mm x {height_mm:.1f}mm)\n"
)
return result_file.name, "\n".join(info_lines)
with gr.Blocks() as demo:
gr.Markdown("# PDF 解像度ダウンサイザー\nPDFのページ画像を縮小して容量を減らします。")
with gr.Row():
with gr.Column():
pdf_input = gr.File(label="入力PDF", file_types=[".pdf"])
dpi_input = gr.Slider(72, 300, value=150, step=1, label="変換DPI")
width_input = gr.Slider(500, 3000, value=1500, step=50, label="最大幅(px)")
convert_button = gr.Button("変換")
with gr.Column():
pdf_output = gr.File(label="出力PDF")
size_output = gr.Textbox(label="出力PDFの詳細", lines=12)
convert_button.click(
fn=downscale_pdf,
inputs=[pdf_input, dpi_input, width_input],
outputs=[pdf_output, size_output]
)
demo.launch()
|