Spaces:
Running
Running
File size: 481 Bytes
0c91aa8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
# text_extractor.py
import docx2txt
import fitz # PyMuPDF
def extract_text_from_file(file_path):
if file_path.endswith(".pdf"):
return extract_text_from_pdf(file_path)
elif file_path.endswith(".docx"):
return docx2txt.process(file_path)
else:
return "Unsupported file type."
def extract_text_from_pdf(file_path):
text = ""
with fitz.open(file_path) as doc:
for page in doc:
text += page.get_text()
return text
|