File size: 595 Bytes
d765c07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import pandas as pd
import pdfplumber

def load_file(uploaded_file):
    ext = uploaded_file.name.split(".")[-1].lower()
    if ext == "pdf":
        with pdfplumber.open(uploaded_file) as pdf:
            return [page.extract_text() for page in pdf.pages if page.extract_text()]
    elif ext == "csv":
        df = pd.read_csv(uploaded_file)
        return df.astype(str).apply(" ".join, axis=1).tolist()
    elif ext == "xlsx":
        df = pd.read_excel(uploaded_file)
        return df.astype(str).apply(" ".join, axis=1).tolist()
    else:
        raise ValueError("Unsupported file type")