Spaces:
Sleeping
Sleeping
File size: 389 Bytes
43e97e3 |
1 2 3 4 5 6 7 8 9 10 11 |
from pypdf import PdfReader
def pdf_text_extractor(filepath: str) -> None:
content = ""
pdf_reader = PdfReader(filepath, strict=True)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
content += f"{page_text}\n\n"
with open(filepath.replace("pdf", "txt"), "w", encoding="utf-8") as file:
file.write(content) |