Spaces:

soiz1
/

PDF-converter

Sleeping

App Files Files Community

soiz1 commited on Jul 11

Commit

093fe97

verified ·

1 Parent(s): 0787dc4

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -76

app.py CHANGED Viewed

@@ -1,90 +1,71 @@
 import gradio as gr
 import tempfile
 from pdf2image import convert_from_path
-from PIL import Image
-import img2pdf
-import os
-import shutil
-import PyPDF2
-def get_pdf_page_sizes(pdf_path):
-    sizes = []
-    with open(pdf_path, "rb") as f:
-        reader = PyPDF2.PdfReader(f)
-        for i, page in enumerate(reader.pages):
-            mediabox = page.mediabox
-            width_pt = float(mediabox.width)
-            height_pt = float(mediabox.height)
-            width_mm = width_pt * 25.4 / 72
-            height_mm = height_pt * 25.4 / 72
-            sizes.append((i, width_pt, height_pt, width_mm, height_mm))
-    return sizes
-def downscale_pdf(pdf_file, dpi=150, max_width=1500):
     with tempfile.TemporaryDirectory() as tmpdir:
-        images = convert_from_path(pdf_file.name, dpi=dpi, output_folder=tmpdir)
-        if not images:
-            raise ValueError("PDFを画像に変換できませんでした。")
-        downscaled_paths = []
-        pixel_info = []  # 画像のピクセル情報を記録
         for i, img in enumerate(images):
-            original_size = img.size
-            w, h = original_size
-            if w > max_width:
-                new_h = int(h * max_width / w)
-                img = img.resize((max_width, new_h), Image.LANCZOS)
-            resized_size = img.size
-            pixel_info.append((i, resized_size[0], resized_size[1]))
-            img_path = os.path.join(tmpdir, f"page_{i}.jpg")
-            img.save(img_path, "JPEG", quality=85)
-            downscaled_paths.append(img_path)
-        if not downscaled_paths:
-            raise ValueError("変換後の画像が見つかりません。")
-        tmp_output_path = os.path.join(tmpdir, "downscaled.pdf")
-        with open(tmp_output_path, "wb") as f_out:
-            f_out.write(img2pdf.convert(downscaled_paths))
-        result_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
-        with open(tmp_output_path, "rb") as src:
-            shutil.copyfileobj(src, result_file)
-        result_file.close()
-    # サイズ情報（PDF）を取得
-    pdf_sizes = get_pdf_page_sizes(result_file.name)
-    # テキストとして合成
-    info_lines = []
-    for i, width_px, height_px in pixel_info:
-        width_pt, height_pt, width_mm, height_mm = pdf_sizes[i][1:]
-        info_lines.append(
-            f"Page {i+1}:\n"
-            f"  - Pixel size : {width_px} x {height_px} px\n"
-            f"  - PDF size   : {width_pt:.1f}pt x {height_pt:.1f}pt (~{width_mm:.1f}mm x {height_mm:.1f}mm)\n"
-        )
-    return result_file.name, "\n".join(info_lines)
 with gr.Blocks() as demo:
-    gr.Markdown("# PDF 解像度ダウンサイザー\nPDFのページ画像を縮小して容量を減らします。")
     with gr.Row():
-        with gr.Column():
-            pdf_input = gr.File(label="入力PDF", file_types=[".pdf"])
-            dpi_input = gr.Slider(72, 300, value=150, step=1, label="変換DPI")
-            width_input = gr.Slider(500, 3000, value=1500, step=50, label="最大幅(px)")
-            convert_button = gr.Button("変換")
-        with gr.Column():
-            pdf_output = gr.File(label="出力PDF")
-            size_output = gr.Textbox(label="出力PDFの詳細", lines=12)
-    convert_button.click(
-        fn=downscale_pdf,
-        inputs=[pdf_input, dpi_input, width_input],
-        outputs=[pdf_output, size_output]
-    )
-demo.launch()

 import gradio as gr
 import tempfile
 from pdf2image import convert_from_path
+import fitz  # PyMuPDF
+def analyze_pdf(pdf_file):
+    results = []
+    overall_rating = "OK"
     with tempfile.TemporaryDirectory() as tmpdir:
+        # pdf2imageで画像を取得
+        try:
+            images = convert_from_path(pdf_file.name, dpi=300, output_folder=tmpdir)
+        except Exception as e:
+            return f"PDF変換エラー: {str(e)}", ""
+        # 画像サイズ確認
         for i, img in enumerate(images):
+            width, height = img.size
+            page_rating = "OK"
+            if width < 1000 or height < 1000:
+                page_rating = "非推奨（解像度低）"
+            elif width < 1500 or height < 1500:
+                page_rating = "注意（やや低め）"
+            results.append(
+                f"ページ{i+1}: {width}x{height}px → {page_rating}"
+            )
+            if page_rating.startswith("非推奨"):
+                overall_rating = "非推奨"
+            elif page_rating.startswith("注意") and overall_rating == "OK":
+                overall_rating = "注意"
+        # 追加で、PDFメタ情報のDPIも参考に
+        doc = fitz.open(pdf_file.name)
+        dpi_infos = []
+        for i, page in enumerate(doc):
+            try:
+                # MediaBoxや解像度情報から概算
+                rect = page.rect
+                width_pt = rect.width
+                height_pt = rect.height
+                dpi_x = width / (width_pt / 72)
+                dpi_y = height / (height_pt / 72)
+                dpi_infos.append(f"ページ{i+1}推定DPI: {dpi_x:.1f}x{dpi_y:.1f}")
+            except Exception:
+                dpi_infos.append(f"ページ{i+1}推定DPI: 取得失敗")
+        # まとめ
+        result_text = "\n".join(results + [""] + dpi_infos)
+        result_text += f"\n\n総合評価: {overall_rating}"
+        return result_text, overall_rating
 with gr.Blocks() as demo:
+    gr.Markdown("# Audiveris適性チェック（非公式・推定）")
+    gr.Markdown("PDFをアップロードすると、ページ画像サイズからAudiverisで使えそうかを推定します。")
     with gr.Row():
+        pdf_input = gr.File(label="PDFファイル")
+        analyze_button = gr.Button("判定")
+    result_output = gr.Textbox(label="詳細結果", lines=20)
+    rating_output = gr.Textbox(label="総合評価")
+    analyze_button.click(analyze_pdf, inputs=pdf_input, outputs=[result_output, rating_output])
+if __name__ == "__main__":
+    demo.launch()