# text_extractor.py import docx2txt import fitz # PyMuPDF def extract_text_from_file(file_path): if file_path.endswith(".pdf"): return extract_text_from_pdf(file_path) elif file_path.endswith(".docx"): return docx2txt.process(file_path) else: return "Unsupported file type." def extract_text_from_pdf(file_path): text = "" with fitz.open(file_path) as doc: for page in doc: text += page.get_text() return text