"""Helper script to go from PDF to PNG ZIP Files we can use in HTML on the LB.""" from __future__ import annotations import glob import os import zipfile from pathlib import Path from pdf2image import convert_from_path root_dir = "./data" pdf_paths = glob.glob(os.path.join(root_dir, "**", "*.pdf"), recursive=True) for pdf_path in pdf_paths: # Relative path to recreate folder structure path_to_pdf = Path(pdf_path).resolve() path_to_png = path_to_pdf.with_suffix(".png") path_to_zip = path_to_pdf.with_suffix(".png.zip") print(f"Converting {pdf_path}...") images = convert_from_path(pdf_path, dpi=800) for _i, image in enumerate(images): image.save(path_to_png, "PNG") with zipfile.ZipFile(path_to_zip, "w") as zipf: zipf.write(path_to_png, arcname=path_to_png.name) path_to_png.unlink(missing_ok=True) path_to_pdf.unlink(missing_ok=True)