chat22GV2 / utils.py
ramysaidagieb's picture
Upload 5 files
4254fda verified
raw
history blame
572 Bytes
from PyPDF2 import PdfReader
import docx
def extract_text_from_files(files):
all_text = ""
for file in files:
if file.name.endswith(".pdf"):
reader = PdfReader(file)
for page in reader.pages:
all_text += page.extract_text() + "\n"
elif file.name.endswith(".docx"):
doc = docx.Document(file)
for para in doc.paragraphs:
all_text += para.text + "\n"
elif file.name.endswith(".txt"):
all_text += file.read().decode("utf-8") + "\n"
return all_text