Spaces:
Running
Running
File size: 922 Bytes
c227628 127cc6f ab82350 127cc6f c227628 127cc6f c227628 ab82350 c227628 ab82350 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
"""Helper script to go from PDF to PNG ZIP Files we can use in HTML on the LB."""
from __future__ import annotations
import glob
import os
import zipfile
from pathlib import Path
from pdf2image import convert_from_path
root_dir = "./data"
pdf_paths = glob.glob(os.path.join(root_dir, "**", "*.pdf"), recursive=True)
for pdf_path in pdf_paths:
# Relative path to recreate folder structure
path_to_pdf = Path(pdf_path).resolve()
path_to_png = path_to_pdf.with_suffix(".png")
path_to_zip = path_to_pdf.with_suffix(".png.zip")
print(f"Converting {pdf_path}...")
images = convert_from_path(pdf_path, dpi=800)
for _i, image in enumerate(images):
image.save(path_to_png, "PNG")
with zipfile.ZipFile(path_to_zip, "w") as zipf:
zipf.write(path_to_png, arcname=path_to_png.name)
path_to_png.unlink(missing_ok=True)
path_to_pdf.unlink(missing_ok=True)
|