leaderboard / data_pdfs_to_pngs.py
LennartPurucker's picture
fix: lb and data update
c227628
raw
history blame
877 Bytes
"""Helper script to go from PDF to PNG ZIP Files we can use in HTML on the LB."""
from __future__ import annotations
import glob
import os
from pathlib import Path
from pdf2image import convert_from_path
import zipfile
root_dir = "./data"
pdf_paths = glob.glob(os.path.join(root_dir, "**", "*.pdf"), recursive=True)
for pdf_path in pdf_paths:
# Relative path to recreate folder structure
path_to_pdf = Path(pdf_path).resolve()
path_to_png = path_to_pdf.with_suffix(".png")
path_to_zip = path_to_pdf.with_suffix(".png.zip")
print(f"Converting {pdf_path}...")
images = convert_from_path(pdf_path, dpi=800)
for _i, image in enumerate(images):
image.save(path_to_png, "PNG")
with zipfile.ZipFile(path_to_zip, 'w') as zipf:
zipf.write(path_to_png, arcname=path_to_png.name)
path_to_png.unlink(missing_ok=True)