Spaces:
Sleeping
Sleeping
# text_extractor.py | |
import docx2txt | |
import fitz # PyMuPDF | |
def extract_text_from_file(file_path): | |
if file_path.endswith(".pdf"): | |
return extract_text_from_pdf(file_path) | |
elif file_path.endswith(".docx"): | |
return docx2txt.process(file_path) | |
else: | |
return "Unsupported file type." | |
def extract_text_from_pdf(file_path): | |
text = "" | |
with fitz.open(file_path) as doc: | |
for page in doc: | |
text += page.get_text() | |
return text | |