from PyPDF2 import PdfReader import docx def extract_text_from_files(files): all_text = "" for file in files: if file.name.endswith(".pdf"): reader = PdfReader(file) for page in reader.pages: all_text += page.extract_text() + "\n" elif file.name.endswith(".docx"): doc = docx.Document(file) for para in doc.paragraphs: all_text += para.text + "\n" elif file.name.endswith(".txt"): all_text += file.read().decode("utf-8") + "\n" return all_text