Spaces:
Sleeping
Sleeping
from PyPDF2 import PdfReader | |
import docx | |
def extract_text_from_files(files): | |
all_text = "" | |
for file in files: | |
if file.name.endswith(".pdf"): | |
reader = PdfReader(file) | |
for page in reader.pages: | |
all_text += page.extract_text() + "\n" | |
elif file.name.endswith(".docx"): | |
doc = docx.Document(file) | |
for para in doc.paragraphs: | |
all_text += para.text + "\n" | |
elif file.name.endswith(".txt"): | |
all_text += file.read().decode("utf-8") + "\n" | |
return all_text |