File size: 922 Bytes
c227628
127cc6f
 
 
 
 
ab82350
127cc6f
 
 
 
 
 
 
 
 
 
 
c227628
 
127cc6f
 
 
 
 
c227628
ab82350
c227628
 
ab82350
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
"""Helper script to go from PDF to PNG ZIP Files we can use in HTML on the LB."""

from __future__ import annotations

import glob
import os
import zipfile
from pathlib import Path

from pdf2image import convert_from_path

root_dir = "./data"
pdf_paths = glob.glob(os.path.join(root_dir, "**", "*.pdf"), recursive=True)


for pdf_path in pdf_paths:
    # Relative path to recreate folder structure
    path_to_pdf = Path(pdf_path).resolve()
    path_to_png = path_to_pdf.with_suffix(".png")
    path_to_zip = path_to_pdf.with_suffix(".png.zip")
    print(f"Converting {pdf_path}...")

    images = convert_from_path(pdf_path, dpi=800)
    for _i, image in enumerate(images):
        image.save(path_to_png, "PNG")

        with zipfile.ZipFile(path_to_zip, "w") as zipf:
            zipf.write(path_to_png, arcname=path_to_png.name)

        path_to_png.unlink(missing_ok=True)
        path_to_pdf.unlink(missing_ok=True)