Chatbot / pdf_extractor.py
Ralqasimi's picture
Update pdf_extractor.py
a377365 verified
raw
history blame
377 Bytes
from PyPDF2 import PdfReader
# Function to extract text from PDFs (normal PDFs only)
def extract_text_from_pdf(pdf_path):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text()
return text.strip()
# Main function to handle PDF text extraction
def get_pdf_text(pdf_path):
return extract_text_from_pdf(pdf_path)