File size: 389 Bytes
43e97e3
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
from pypdf import PdfReader

def pdf_text_extractor(filepath: str) -> None:
    content = ""
    pdf_reader = PdfReader(filepath, strict=True) 
    for page in pdf_reader.pages:
        page_text = page.extract_text()
        if page_text:
            content += f"{page_text}\n\n" 
    with open(filepath.replace("pdf", "txt"), "w", encoding="utf-8") as file: 
        file.write(content)