cv / text_extractor.py
saherPervaiz's picture
Create text_extractor.py
0c91aa8 verified
raw
history blame
481 Bytes
# text_extractor.py
import docx2txt
import fitz # PyMuPDF
def extract_text_from_file(file_path):
if file_path.endswith(".pdf"):
return extract_text_from_pdf(file_path)
elif file_path.endswith(".docx"):
return docx2txt.process(file_path)
else:
return "Unsupported file type."
def extract_text_from_pdf(file_path):
text = ""
with fitz.open(file_path) as doc:
for page in doc:
text += page.get_text()
return text