nlpblogs commited on
Commit
d08e279
·
verified ·
1 Parent(s): 3323972

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -140
app.py CHANGED
@@ -41,153 +41,121 @@ with st.sidebar:
41
 
42
 
43
 
44
- st.subheader ("Job Description", divider = "orange")
 
 
 
 
 
 
 
 
 
45
 
46
- txt = st.text_area("Paste the job description and then press Ctrl + Enter", key = "text 1")
 
47
  job = pd.Series(txt, name="Text")
48
 
49
- st.subheader("Candidate Profile 1", divider = "red")
50
-
51
-
52
- if 'upload_count' not in st.session_state:
53
- st.session_state['upload_count'] = 0
54
-
55
- max_attempts = 3
56
-
57
- if st.session_state['upload_count'] < max_attempts:
58
- uploaded_files = st.file_uploader(
59
- "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
60
  )
61
-
62
- if uploaded_files:
63
- st.session_state['upload_count'] += 1
64
- for uploaded_file in uploaded_files:
65
- pdf_reader = PdfReader(uploaded_file)
66
- text_data = ""
67
- for page in pdf_reader.pages:
68
- text_data += page.extract_text()
69
- data = pd.Series(text_data, name = 'Text')
70
-
71
-
72
- frames = [job, data]
73
- result = pd.concat(frames)
74
-
75
-
76
- model = GLiNER.from_pretrained("urchade/gliner_base")
77
- labels = ["person", "country","organization", "role", "skills", "year"]
78
- entities = model.predict_entities(text_data, labels)
79
- df = pd.DataFrame(entities)
80
-
81
-
82
- st.title("Profile of candidate 1")
83
- fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
84
- values='score', color='label')
85
- fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
86
- st.plotly_chart(fig, key = "tv")
87
-
88
-
89
-
90
-
91
- vectorizer = TfidfVectorizer()
92
- tfidf_matrix = vectorizer.fit_transform(result)
93
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
94
-
95
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
96
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
97
-
98
-
99
-
100
- st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
101
- fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
102
- x=['Resume 1', 'Jon Description'],
103
- y=['Resume 1', 'Job Description'])
104
- st.plotly_chart(fig, key = "bar")
105
-
106
-
107
-
108
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
109
- st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
110
- st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
111
-
112
  else:
113
- st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
114
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
115
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
116
-
117
-
118
-
119
-
120
-
121
- st.subheader ("Candidate Profile 2", divider = "green")
122
-
123
-
124
-
125
- if 'upload_count' not in st.session_state:
126
- st.session_state['upload_count'] = 0
127
-
128
- max_attempts = 3
129
-
130
- if st.session_state['upload_count'] < max_attempts:
131
- uploaded_files = st.file_uploader(
132
- "Upload your resume in .pdf format", accept_multiple_files=True, type="pdf", key="candidate 2"
133
  )
134
-
135
- if uploaded_files:
136
- st.session_state['upload_count'] += 1
137
- for uploaded_file in uploaded_files:
138
- pdf_reader = PdfReader(uploaded_file)
139
- text_data = ""
140
- for page in pdf_reader.pages:
141
- text_data += page.extract_text()
142
- data = pd.Series(text_data, name = 'Text')
143
-
144
-
145
- frames = [job, data]
146
- result = pd.concat(frames)
147
-
148
-
149
- model = GLiNER.from_pretrained("urchade/gliner_base")
150
- labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
151
- entities = model.predict_entities(text_data, labels)
152
- df = pd.DataFrame(entities)
153
-
154
-
155
-
156
- fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
157
- values='score', color='label')
158
- fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
159
- st.plotly_chart(fig, key = "iris")
160
-
161
-
162
-
163
-
164
- vectorizer = TfidfVectorizer()
165
- tfidf_matrix = vectorizer.fit_transform(result)
166
- tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
167
-
168
- cosine_sim_matrix = cosine_similarity(tfidf_matrix)
169
- cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
170
-
171
-
172
-
173
-
174
- fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
175
- x=['Resume 2', 'Jon Description'],
176
- y=['Resume 2', 'Job Description'])
177
- st.plotly_chart(fig, key = "radio")
178
-
179
-
180
-
181
- for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
182
- st.write(f"Similarity with Candidate Profile. A score closer to 1 means higher similarity. {i + 1}: {similarity_score:.4f}")
183
-
184
  else:
185
- st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
186
- if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
187
- st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
188
-
189
-
190
-
191
 
192
 
193
 
 
41
 
42
 
43
 
44
+
45
+
46
+
47
+ import streamlit as st
48
+ import pandas as pd
49
+ from pypdf import PdfReader
50
+ from transformers import pipeline
51
+ from sklearn.feature_extraction.text import TfidfVectorizer
52
+ from sklearn.metrics.pairwise import cosine_similarity
53
+ import plotly.express as px
54
 
55
+ st.subheader("Job Description", divider="orange")
56
+ txt = st.text_area("Paste the job description and then press Ctrl + Enter", key="text_job_desc")
57
  job = pd.Series(txt, name="Text")
58
 
59
+ st.subheader("Candidate Profile 1", divider="red")
60
+ if 'upload_count_candidate1' not in st.session_state:
61
+ st.session_state['upload_count_candidate1'] = 0
62
+ max_attempts_candidate1 = 3
63
+ if st.session_state['upload_count_candidate1'] < max_attempts_candidate1:
64
+ uploaded_files_candidate1 = st.file_uploader(
65
+ "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate_1_upload"
 
 
 
 
66
  )
67
+ if uploaded_files_candidate1:
68
+ st.session_state['upload_count_candidate1'] += 1
69
+ for uploaded_file in uploaded_files_candidate1:
70
+ pdf_reader = PdfReader(uploaded_file)
71
+ text_data = ""
72
+ for page in pdf_reader.pages:
73
+ text_data += page.extract_text()
74
+ data_candidate1 = pd.Series(text_data, name='Text')
75
+ frames_candidate1 = [job, data_candidate1]
76
+ result_candidate1 = pd.concat(frames_candidate1)
77
+ model_candidate1 = GLiNER.from_pretrained("urchade/gliner_base")
78
+ labels_candidate1 = ["person", "country", "organization", "role", "skills", "year"]
79
+ entities_candidate1 = model_candidate1(text_data, labels=labels_candidate1)
80
+ df_candidate1 = pd.DataFrame(entities_candidate1)
81
+
82
+ st.title("Profile of candidate 1")
83
+ fig_entities_candidate1 = px.treemap(entities_candidate1, path=[px.Constant("all"), 'text', 'label'],
84
+ values='score', color='label')
85
+ fig_entities_candidate1.update_layout(margin=dict(t=50, l=25, r=25, b=25))
86
+ st.plotly_chart(fig_entities_candidate1, key="tv_candidate1")
87
+
88
+ vectorizer_candidate1 = TfidfVectorizer()
89
+ tfidf_matrix_candidate1 = vectorizer_candidate1.fit_transform(result_candidate1)
90
+ tfidf_df_candidate1 = pd.DataFrame(tfidf_matrix_candidate1.toarray(),
91
+ columns=vectorizer_candidate1.get_feature_names_out())
92
+ cosine_sim_matrix_candidate1 = cosine_similarity(tfidf_matrix_candidate1)
93
+ cosine_sim_df_candidate1 = pd.DataFrame(cosine_sim_matrix_candidate1)
94
+
95
+ st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
96
+ fig_similarity_candidate1 = px.imshow(cosine_sim_df_candidate1, text_auto=True,
97
+ labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
98
+ x=['Resume 1', 'Jon Description'],
99
+ y=['Resume 1', 'Job Description'])
100
+ st.plotly_chart(fig_similarity_candidate1, key="bar_candidate1")
101
+
102
+ for i, similarity_score in enumerate(cosine_sim_matrix_candidate1[0][1:]):
103
+ st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
104
+ st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  else:
106
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts_candidate1}) for Candidate Profile 1.")
107
+ if 'upload_count_candidate1' in st.session_state and st.session_state['upload_count_candidate1'] > 0:
108
+ st.info(f"Files uploaded for Candidate Profile 1: {st.session_state['upload_count_candidate1']} time(s).")
109
+
110
+ st.subheader("Candidate Profile 2", divider="green")
111
+ if 'upload_count_candidate2' not in st.session_state:
112
+ st.session_state['upload_count_candidate2'] = 0
113
+ max_attempts_candidate2 = 3
114
+ if st.session_state['upload_count_candidate2'] < max_attempts_candidate2:
115
+ uploaded_files_candidate2 = st.file_uploader(
116
+ "Upload your resume in .pdf format", accept_multiple_files=True, type="pdf", key="candidate_2_upload"
 
 
 
 
 
 
 
 
 
117
  )
118
+ if uploaded_files_candidate2:
119
+ st.session_state['upload_count_candidate2'] += 1
120
+ for uploaded_file in uploaded_files_candidate2:
121
+ pdf_reader = PdfReader(uploaded_file)
122
+ text_data = ""
123
+ for page in pdf_reader.pages:
124
+ text_data += page.extract_text()
125
+ data_candidate2 = pd.Series(text_data, name='Text')
126
+ frames_candidate2 = [job, data_candidate2]
127
+ result_candidate2 = pd.concat(frames_candidate2)
128
+ model_candidate2 = GLiNER.from_pretrained("urchade/gliner_base")
129
+ labels_candidate2 = ["person", "country", "organization", "date", "time", "role", "skills", "year"]
130
+ entities_candidate2 = model_candidate2(text_data, labels=labels_candidate2)
131
+ df_candidate2 = pd.DataFrame(entities_candidate2)
132
+
133
+ st.title("Profile of candidate 2")
134
+ fig_entities_candidate2 = px.treemap(entities_candidate2, path=[px.Constant("all"), 'text', 'label'],
135
+ values='score', color='label')
136
+ fig_entities_candidate2.update_layout(margin=dict(t=50, l=25, r=25, b=25))
137
+ st.plotly_chart(fig_entities_candidate2, key="iris_candidate2")
138
+
139
+ vectorizer_candidate2 = TfidfVectorizer()
140
+ tfidf_matrix_candidate2 = vectorizer_candidate2.fit_transform(result_candidate2)
141
+ tfidf_df_candidate2 = pd.DataFrame(tfidf_matrix_candidate2.toarray(),
142
+ columns=vectorizer_candidate2.get_feature_names_out())
143
+ cosine_sim_matrix_candidate2 = cosine_similarity(tfidf_matrix_candidate2)
144
+ cosine_sim_df_candidate2 = pd.DataFrame(cosine_sim_matrix_candidate2)
145
+
146
+ st.subheader("Measuring similarity between keywords of candidate profile 2 and job description")
147
+ fig_similarity_candidate2 = px.imshow(cosine_sim_df_candidate2, text_auto=True,
148
+ labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
149
+ x=['Resume 2', 'Jon Description'],
150
+ y=['Resume 2', 'Job Description'])
151
+ st.plotly_chart(fig_similarity_candidate2, key="radio_candidate2")
152
+
153
+ for i, similarity_score in enumerate(cosine_sim_matrix_candidate2[0][1:]):
154
+ st.write(f"Similarity with Candidate Profile 2. A score closer to 1 means higher similarity. {i + 1}: {similarity_score:.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  else:
156
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts_candidate2}) for Candidate Profile 2.")
157
+ if 'upload_count_candidate2' in st.session_state and st.session_state['upload_count_candidate2'] > 0:
158
+ st.info(f"Files uploaded for Candidate Profile 2: {st.session_state['upload_count_candidate2']} time(s).")
 
 
 
159
 
160
 
161