# text_extractor.py import os import docx2txt import PyPDF2 def extract_text_from_file(file_path): ext = os.path.splitext(file_path)[1].lower() if ext == ".pdf": try: with open(file_path, "rb") as f: reader = PyPDF2.PdfReader(f) return " ".join([page.extract_text() or "" for page in reader.pages]) except: return "[Error extracting PDF text]" elif ext == ".docx": try: return docx2txt.process(file_path) except: return "[Error extracting DOCX text]" else: return "[Unsupported file type]"