Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
|
|
2 |
from PyPDF2 import PdfReader
|
3 |
import pandas as pd
|
4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
|
|
5 |
|
6 |
uploaded_files = st.file_uploader(
|
7 |
"Choose a CSV file", accept_multiple_files=True
|
@@ -14,17 +16,14 @@ for uploaded_file in uploaded_files:
|
|
14 |
text_data+= page.extract_text()
|
15 |
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
st.dataframe(pd.DataFrame(tf_idf.toarray(), columns=vec.get_feature_names_out()))
|
26 |
-
cosine_sim = cosine_similarity(tf_idf, tf_idf)
|
27 |
-
st.write(cosine_sim)
|
28 |
|
29 |
|
30 |
|
|
|
2 |
from PyPDF2 import PdfReader
|
3 |
import pandas as pd
|
4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
6 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
|
8 |
uploaded_files = st.file_uploader(
|
9 |
"Choose a CSV file", accept_multiple_files=True
|
|
|
16 |
text_data+= page.extract_text()
|
17 |
|
18 |
|
19 |
+
for text in text_data:
|
20 |
+
data = pd.Series(text, index = ["Resume"])
|
21 |
+
st.dataframe(data) # view the text data
|
22 |
+
vec = TfidfVectorizer()
|
23 |
+
tf_idf = vec.fit_transform(data[["Resume"]])
|
24 |
+
st.dataframe(pd.DataFrame(tf_idf.toarray(), columns=vec.get_feature_names_out()))
|
25 |
+
cosine_sim = cosine_similarity(tf_idf, tf_idf)
|
26 |
+
st.write(cosine_sim)
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
|