File size: 572 Bytes
4254fda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from PyPDF2 import PdfReader
import docx

def extract_text_from_files(files):
    all_text = ""
    for file in files:
        if file.name.endswith(".pdf"):
            reader = PdfReader(file)
            for page in reader.pages:
                all_text += page.extract_text() + "\n"
        elif file.name.endswith(".docx"):
            doc = docx.Document(file)
            for para in doc.paragraphs:
                all_text += para.text + "\n"
        elif file.name.endswith(".txt"):
            all_text += file.read().decode("utf-8") + "\n"
    return all_text