nlpblogs commited on
Commit
040362f
·
verified ·
1 Parent(s): 0a5d201

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
2
  from PyPDF2 import PdfReader
3
  import pandas as pd
4
  from sklearn.feature_extraction.text import TfidfVectorizer
 
 
5
 
6
  uploaded_files = st.file_uploader(
7
  "Choose a CSV file", accept_multiple_files=True
@@ -14,17 +16,14 @@ for uploaded_file in uploaded_files:
14
  text_data+= page.extract_text()
15
 
16
 
17
- data = pd.Series(text_data, index = ["Resume"])
18
-
19
- st.dataframe(data) # view the text data
20
-
21
- from sklearn.feature_extraction.text import TfidfVectorizer
22
- from sklearn.metrics.pairwise import cosine_similarity
23
- vec = TfidfVectorizer()
24
- tf_idf = vec.fit_transform(data[["Resume"]])
25
- st.dataframe(pd.DataFrame(tf_idf.toarray(), columns=vec.get_feature_names_out()))
26
- cosine_sim = cosine_similarity(tf_idf, tf_idf)
27
- st.write(cosine_sim)
28
 
29
 
30
 
 
2
  from PyPDF2 import PdfReader
3
  import pandas as pd
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
 
8
  uploaded_files = st.file_uploader(
9
  "Choose a CSV file", accept_multiple_files=True
 
16
  text_data+= page.extract_text()
17
 
18
 
19
+ for text in text_data:
20
+ data = pd.Series(text, index = ["Resume"])
21
+ st.dataframe(data) # view the text data
22
+ vec = TfidfVectorizer()
23
+ tf_idf = vec.fit_transform(data[["Resume"]])
24
+ st.dataframe(pd.DataFrame(tf_idf.toarray(), columns=vec.get_feature_names_out()))
25
+ cosine_sim = cosine_similarity(tf_idf, tf_idf)
26
+ st.write(cosine_sim)
 
 
 
27
 
28
 
29