Spaces:
Sleeping
Sleeping
from pypdf import PdfReader | |
def pdf_text_extractor(filepath: str) -> None: | |
content = "" | |
pdf_reader = PdfReader(filepath, strict=True) | |
for page in pdf_reader.pages: | |
page_text = page.extract_text() | |
if page_text: | |
content += f"{page_text}\n\n" | |
with open(filepath.replace("pdf", "txt"), "w", encoding="utf-8") as file: | |
file.write(content) |