nlpblogs commited on
Commit
0b36569
·
verified ·
1 Parent(s): 49d9bd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -103
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import streamlit as st
2
  from PyPDF2 import PdfReader
3
  import pandas as pd
@@ -41,117 +42,152 @@ with st.sidebar:
41
 
42
 
43
 
44
-
45
-
46
-
47
-
48
 
49
- st.subheader("Job Description", divider="orange")
50
- txt = st.text_area("Paste the job description and then press Ctrl + Enter", key="text_job_desc")
51
  job = pd.Series(txt, name="Text")
52
 
53
- st.subheader("Candidate Profile 1", divider="red")
54
- if 'upload_count_candidate1' not in st.session_state:
55
- st.session_state['upload_count_candidate1'] = 0
56
- max_attempts_candidate1 = 3
57
- if st.session_state['upload_count_candidate1'] < max_attempts_candidate1:
58
- uploaded_files_candidate1 = st.file_uploader(
59
- "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate_1_upload"
 
 
 
 
60
  )
61
- if uploaded_files_candidate1:
62
- st.session_state['upload_count_candidate1'] += 1
63
- for uploaded_file in uploaded_files_candidate1:
64
- pdf_reader = PdfReader(uploaded_file)
65
- text_data = ""
66
- for page in pdf_reader.pages:
67
- text_data += page.extract_text()
68
- data_candidate1 = pd.Series(text_data, name='Text')
69
- frames_candidate1 = [job, data_candidate1]
70
- result_candidate1 = pd.concat(frames_candidate1)
71
- model_candidate1 = GLiNER.from_pretrained("urchade/gliner_base")
72
- labels_candidate1 = ["person", "country", "organization", "role", "skills", "year"]
73
- entities_candidate1 = model_candidate1(text_data, labels=labels_candidate1)
74
- df_candidate1 = pd.DataFrame(entities_candidate1)
75
-
76
- st.title("Profile of candidate 1")
77
- fig_entities_candidate1 = px.treemap(entities_candidate1, path=[px.Constant("all"), 'text', 'label'],
78
- values='score', color='label')
79
- fig_entities_candidate1.update_layout(margin=dict(t=50, l=25, r=25, b=25))
80
- st.plotly_chart(fig_entities_candidate1, key="tv_candidate1")
81
-
82
- vectorizer_candidate1 = TfidfVectorizer()
83
- tfidf_matrix_candidate1 = vectorizer_candidate1.fit_transform(result_candidate1)
84
- tfidf_df_candidate1 = pd.DataFrame(tfidf_matrix_candidate1.toarray(),
85
- columns=vectorizer_candidate1.get_feature_names_out())
86
- cosine_sim_matrix_candidate1 = cosine_similarity(tfidf_matrix_candidate1)
87
- cosine_sim_df_candidate1 = pd.DataFrame(cosine_sim_matrix_candidate1)
88
-
89
- st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
90
- fig_similarity_candidate1 = px.imshow(cosine_sim_df_candidate1, text_auto=True,
91
- labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
92
- x=['Resume 1', 'Jon Description'],
93
- y=['Resume 1', 'Job Description'])
94
- st.plotly_chart(fig_similarity_candidate1, key="bar_candidate1")
95
-
96
- for i, similarity_score in enumerate(cosine_sim_matrix_candidate1[0][1:]):
97
- st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
98
- st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  else:
100
- st.warning(f"You have reached the maximum upload attempts ({max_attempts_candidate1}) for Candidate Profile 1.")
101
- if 'upload_count_candidate1' in st.session_state and st.session_state['upload_count_candidate1'] > 0:
102
- st.info(f"Files uploaded for Candidate Profile 1: {st.session_state['upload_count_candidate1']} time(s).")
103
-
104
- st.subheader("Candidate Profile 2", divider="green")
105
- if 'upload_count_candidate2' not in st.session_state:
106
- st.session_state['upload_count_candidate2'] = 0
107
- max_attempts_candidate2 = 3
108
- if st.session_state['upload_count_candidate2'] < max_attempts_candidate2:
109
- uploaded_files_candidate2 = st.file_uploader(
110
- "Upload your resume in .pdf format", accept_multiple_files=True, type="pdf", key="candidate_2_upload"
 
 
 
 
 
 
 
 
 
111
  )
112
- if uploaded_files_candidate2:
113
- st.session_state['upload_count_candidate2'] += 1
114
- for uploaded_file in uploaded_files_candidate2:
115
- pdf_reader = PdfReader(uploaded_file)
116
- text_data = ""
117
- for page in pdf_reader.pages:
118
- text_data += page.extract_text()
119
- data_candidate2 = pd.Series(text_data, name='Text')
120
- frames_candidate2 = [job, data_candidate2]
121
- result_candidate2 = pd.concat(frames_candidate2)
122
- model_candidate2 = GLiNER.from_pretrained("urchade/gliner_base")
123
- labels_candidate2 = ["person", "country", "organization", "date", "time", "role", "skills", "year"]
124
- entities_candidate2 = model_candidate2(text_data, labels=labels_candidate2)
125
- df_candidate2 = pd.DataFrame(entities_candidate2)
126
-
127
- st.title("Profile of candidate 2")
128
- fig_entities_candidate2 = px.treemap(entities_candidate2, path=[px.Constant("all"), 'text', 'label'],
129
- values='score', color='label')
130
- fig_entities_candidate2.update_layout(margin=dict(t=50, l=25, r=25, b=25))
131
- st.plotly_chart(fig_entities_candidate2, key="iris_candidate2")
132
-
133
- vectorizer_candidate2 = TfidfVectorizer()
134
- tfidf_matrix_candidate2 = vectorizer_candidate2.fit_transform(result_candidate2)
135
- tfidf_df_candidate2 = pd.DataFrame(tfidf_matrix_candidate2.toarray(),
136
- columns=vectorizer_candidate2.get_feature_names_out())
137
- cosine_sim_matrix_candidate2 = cosine_similarity(tfidf_matrix_candidate2)
138
- cosine_sim_df_candidate2 = pd.DataFrame(cosine_sim_matrix_candidate2)
139
-
140
- st.subheader("Measuring similarity between keywords of candidate profile 2 and job description")
141
- fig_similarity_candidate2 = px.imshow(cosine_sim_df_candidate2, text_auto=True,
142
- labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
143
- x=['Resume 2', 'Jon Description'],
144
- y=['Resume 2', 'Job Description'])
145
- st.plotly_chart(fig_similarity_candidate2, key="radio_candidate2")
146
-
147
- for i, similarity_score in enumerate(cosine_sim_matrix_candidate2[0][1:]):
148
- st.write(f"Similarity with Candidate Profile 2. A score closer to 1 means higher similarity. {i + 1}: {similarity_score:.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  else:
150
- st.warning(f"You have reached the maximum upload attempts ({max_attempts_candidate2}) for Candidate Profile 2.")
151
- if 'upload_count_candidate2' in st.session_state and st.session_state['upload_count_candidate2'] > 0:
152
- st.info(f"Files uploaded for Candidate Profile 2: {st.session_state['upload_count_candidate2']} time(s).")
 
153
 
154
-
155
 
156
 
157
 
 
1
+
2
  import streamlit as st
3
  from PyPDF2 import PdfReader
4
  import pandas as pd
 
42
 
43
 
44
 
45
+ st.subheader ("Job Description", divider = "orange")
 
 
 
46
 
47
+ txt = st.text_area("Paste the job description and then press Ctrl + Enter", key = "text 1")
 
48
  job = pd.Series(txt, name="Text")
49
 
50
+ st.subheader("Candidate Profile 1", divider = "red")
51
+
52
+
53
+ if 'upload_count_cand1' not in st.session_state:
54
+ st.session_state['upload_count_cand1'] = 0
55
+
56
+ max_attempts_cand2 = 1
57
+
58
+ if st.session_state['upload_count_cand1'] < max_attempts:
59
+ uploaded_files = st.file_uploader(
60
+ "Upload your resume", accept_multiple_files=True, type="pdf", key="candidate 1"
61
  )
62
+
63
+ if uploaded_files:
64
+ st.session_state['upload_count'] += 1
65
+ for uploaded_file in uploaded_files:
66
+ pdf_reader = PdfReader(uploaded_file)
67
+ text_data = ""
68
+ for page in pdf_reader.pages:
69
+ text_data += page.extract_text()
70
+ data = pd.Series(text_data, name = 'Text')
71
+
72
+
73
+ frames = [job, data]
74
+ result = pd.concat(frames)
75
+
76
+
77
+ model = GLiNER.from_pretrained("urchade/gliner_base")
78
+ labels = ["person", "country","organization", "role", "skills", "year"]
79
+ entities = model.predict_entities(text_data, labels)
80
+ df = pd.DataFrame(entities)
81
+
82
+
83
+ st.title("Profile of candidate 1")
84
+ fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
85
+ values='score', color='label')
86
+ fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
87
+ st.plotly_chart(fig, key = "tv")
88
+
89
+
90
+
91
+
92
+ vectorizer = TfidfVectorizer()
93
+ tfidf_matrix = vectorizer.fit_transform(result)
94
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
95
+
96
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
97
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
98
+
99
+
100
+
101
+ st.subheader("Measuring similarity between keywords of candidate profile 1 and job description")
102
+ fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
103
+ x=['Resume 1', 'Jon Description'],
104
+ y=['Resume 1', 'Job Description'])
105
+ st.plotly_chart(fig, key = "bar")
106
+
107
+
108
+
109
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
110
+ st.write(f"Similarity of job description with candidate profile 1. {i + 1}: {similarity_score:.4f}")
111
+ st.info("A score closer to 1 (0.80, 0.90) means higher similarity between candidate profile 1 and job description. A score closer to 0 (0.20, 0.30) means lower similarity between candidate profile 1 and job description.")
112
+
113
  else:
114
+ st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
115
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
116
+ st.info(f"Files uploaded {st.session_state['upload_count_cand1']} time(s).")
117
+
118
+
119
+
120
+
121
+
122
+ st.subheader ("Candidate Profile 2", divider = "green")
123
+
124
+
125
+
126
+ if 'upload_count_cand2' not in st.session_state:
127
+ st.session_state['upload_count_cand2'] = 0
128
+
129
+ max_attempts_cand2 = 3
130
+
131
+ if st.session_state['upload_count_cand2'] < max_attempts:
132
+ uploaded_files = st.file_uploader(
133
+ "Upload your resume in .pdf format", accept_multiple_files=True, type="pdf", key="candidate 2"
134
  )
135
+
136
+ if uploaded_files:
137
+ st.session_state['upload_count'] += 1
138
+ for uploaded_file in uploaded_files:
139
+ pdf_reader = PdfReader(uploaded_file)
140
+ text_data = ""
141
+ for page in pdf_reader.pages:
142
+ text_data += page.extract_text()
143
+ data = pd.Series(text_data, name = 'Text')
144
+
145
+
146
+ frames = [job, data]
147
+ result = pd.concat(frames)
148
+
149
+
150
+ model = GLiNER.from_pretrained("urchade/gliner_base")
151
+ labels = ["person", "country","organization", "date", "time", "role", "skills", "year"]
152
+ entities = model.predict_entities(text_data, labels)
153
+ df = pd.DataFrame(entities)
154
+
155
+
156
+
157
+ fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
158
+ values='score', color='label')
159
+ fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
160
+ st.plotly_chart(fig, key = "iris")
161
+
162
+
163
+
164
+
165
+ vectorizer = TfidfVectorizer()
166
+ tfidf_matrix = vectorizer.fit_transform(result)
167
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
168
+
169
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
170
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
171
+
172
+
173
+
174
+
175
+ fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Keyword similarity", y="Resumes", color="Productivity"),
176
+ x=['Resume 2', 'Jon Description'],
177
+ y=['Resume 2', 'Job Description'])
178
+ st.plotly_chart(fig, key = "radio")
179
+
180
+
181
+
182
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
183
+ st.write(f"Similarity with Candidate Profile. A score closer to 1 means higher similarity. {i + 1}: {similarity_score:.4f}")
184
+
185
  else:
186
+ st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
187
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
188
+ st.info(f"Files uploaded {st.session_state['upload_count_cand2']} time(s).")
189
+
190
 
 
191
 
192
 
193