Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
|
|
2 |
from PyPDF2 import PdfReader
|
3 |
import pandas as pd
|
4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
-
|
6 |
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
|
8 |
import streamlit as st
|
@@ -10,6 +10,8 @@ from PyPDF2 import PdfReader
|
|
10 |
import pandas as pd
|
11 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
12 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
13 |
|
14 |
uploaded_files = st.file_uploader(
|
15 |
"Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type = "pdf"
|
@@ -24,6 +26,9 @@ if uploaded_files:
|
|
24 |
text_data = ""
|
25 |
for page in pdf_reader.pages:
|
26 |
text_data += page.extract_text()
|
|
|
|
|
|
|
27 |
|
28 |
column_name = f"Candidate profile {i + 1}"
|
29 |
resumes = pd.Series({column_name: text_data})
|
@@ -35,19 +40,7 @@ if uploaded_files:
|
|
35 |
st.error(f"Error processing file {uploaded_file.name}: {e}")
|
36 |
|
37 |
|
38 |
-
if all_resumes:
|
39 |
-
# Initialize the TF-IDF vectorizer
|
40 |
-
vectorizer = TfidfVectorizer()
|
41 |
|
42 |
-
|
43 |
-
tfidf_matrix = vectorizer.fit_transform(all_resumes)
|
44 |
-
|
45 |
-
# Calculate the cosine similarity matrix
|
46 |
-
cosine_sim = cosine_similarity(tfidf_matrix)
|
47 |
-
|
48 |
-
st.subheader("Cosine Similarity Matrix")
|
49 |
-
st.dataframe(cosine_sim)
|
50 |
-
elif uploaded_files:
|
51 |
-
st.info("Please upload at least two PDF files to calculate cosine similarity.")
|
52 |
|
53 |
|
|
|
2 |
from PyPDF2 import PdfReader
|
3 |
import pandas as pd
|
4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
|
6 |
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
|
8 |
import streamlit as st
|
|
|
10 |
import pandas as pd
|
11 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
12 |
from sklearn.metrics.pairwise import cosine_similarity
|
13 |
+
from gliner import GLiNER
|
14 |
+
|
15 |
|
16 |
uploaded_files = st.file_uploader(
|
17 |
"Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type = "pdf"
|
|
|
26 |
text_data = ""
|
27 |
for page in pdf_reader.pages:
|
28 |
text_data += page.extract_text()
|
29 |
+
model = GLiNER.from_pretrained("xomad/gliner-model-merge-large-v1.0")
|
30 |
+
labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
|
31 |
+
entities = model.predict_entities(text_data, labels)
|
32 |
|
33 |
column_name = f"Candidate profile {i + 1}"
|
34 |
resumes = pd.Series({column_name: text_data})
|
|
|
40 |
st.error(f"Error processing file {uploaded_file.name}: {e}")
|
41 |
|
42 |
|
|
|
|
|
|
|
43 |
|
44 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
|