soiz1 commited on
Commit
18236eb
·
verified ·
1 Parent(s): a1622dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -3,37 +3,38 @@ import subprocess
3
  import os
4
  import tempfile
5
 
6
- def rasterize_with_imagemagick(input_pdf):
7
  with tempfile.TemporaryDirectory() as tmpdir:
8
- # PDF→PNG(72dpi)
9
- png_pattern = os.path.join(tmpdir, "page_%03d.png")
10
- cmd1 = ["convert", "-density", "72", input_pdf, png_pattern]
11
  res1 = subprocess.run(cmd1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
12
  if res1.returncode != 0:
13
- return f"ImageMagick変換エラー: {res1.stderr.decode()}", None, None
14
 
15
- # PNG→PDFにまとめる
 
 
 
 
 
16
  output_pdf = os.path.join(tmpdir, "output_rasterized.pdf")
17
- cmd2 = ["convert", os.path.join(tmpdir, "page_*.png"), output_pdf]
18
  res2 = subprocess.run(cmd2, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
19
  if res2.returncode != 0:
20
  return f"PDF作成エラー: {res2.stderr.decode()}", None, None
21
 
22
- if not os.path.exists(output_pdf):
23
- return "PDF生成に失敗しました", None, None
24
-
25
  size_bytes = os.path.getsize(output_pdf)
26
  size_kb = size_bytes / 1024
27
  size_str = f"{size_bytes} bytes ({size_kb:.1f} KB)"
28
 
29
- return "ImageMagickでラスタライズ完了", output_pdf, size_str
30
 
31
  demo = gr.Interface(
32
- fn=rasterize_with_imagemagick,
33
  inputs=gr.File(file_types=[".pdf"]),
34
  outputs=[gr.Text(), gr.File(), gr.Text()],
35
- title="PDFをImageMagickでラスタライズ",
36
- description="PDFを画像に変換して再PDF化し、ベクターを除去します。"
37
  )
38
 
39
  demo.launch()
 
3
  import os
4
  import tempfile
5
 
6
+ def rasterize_with_pdftoppm(input_pdf):
7
  with tempfile.TemporaryDirectory() as tmpdir:
8
+ output_prefix = os.path.join(tmpdir, "page")
9
+ cmd1 = ["pdftoppm", "-png", "-r", "150", input_pdf, output_prefix]
 
10
  res1 = subprocess.run(cmd1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
11
  if res1.returncode != 0:
12
+ return f"pdftoppm変換エラー: {res1.stderr.decode()}", None, None
13
 
14
+ # PNGファイル一覧取得
15
+ png_files = sorted([os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith(".png")])
16
+ if not png_files:
17
+ return "PNG変換に失敗しました", None, None
18
+
19
+ # PNG→PDF変換(ImageMagickのconvertが使えなければpypdfで合成も可能)
20
  output_pdf = os.path.join(tmpdir, "output_rasterized.pdf")
21
+ cmd2 = ["convert"] + png_files + [output_pdf]
22
  res2 = subprocess.run(cmd2, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
23
  if res2.returncode != 0:
24
  return f"PDF作成エラー: {res2.stderr.decode()}", None, None
25
 
 
 
 
26
  size_bytes = os.path.getsize(output_pdf)
27
  size_kb = size_bytes / 1024
28
  size_str = f"{size_bytes} bytes ({size_kb:.1f} KB)"
29
 
30
+ return "pdftoppmでラスタライズ完了", output_pdf, size_str
31
 
32
  demo = gr.Interface(
33
+ fn=rasterize_with_pdftoppm,
34
  inputs=gr.File(file_types=[".pdf"]),
35
  outputs=[gr.Text(), gr.File(), gr.Text()],
36
+ title="PDFをpdftoppmでラスタライズ",
37
+ description="pdftoppmでPDFをPNGに変換し再度PDF化します。"
38
  )
39
 
40
  demo.launch()