import streamlit as st from PyPDF2 import PdfReader import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity uploaded_files = st.file_uploader( "Choose a CSV file", accept_multiple_files=True ) for uploaded_file in uploaded_files: pdf_reader = PdfReader(uploaded_file) # read your PDF file # extract the text data from your PDF file after looping through its pages with the .extract_text() method text_data= "" for page in pdf_reader.pages: # for loop method text_data+= page.extract_text() data = pd.Series(text_data, index = ["Resume"]) st.dataframe(data) # view the text data