nlpblogs commited on
Commit
98209de
·
verified ·
1 Parent(s): 0f7fbb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py CHANGED
@@ -129,6 +129,70 @@ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
129
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
130
 
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
 
134
 
 
129
  st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
130
 
131
 
132
+ st.subheader("Candidate profile 2", divider = "green")
133
+ txt = st.text_area("Job description", key = "text 2")
134
+ job = pd.Series(txt, name="Text")
135
+ st.dataframe(job)
136
+
137
+ if 'upload_count' not in st.session_state:
138
+ st.session_state['upload_count'] = 0
139
+
140
+ max_attempts = 2
141
+
142
+ if st.session_state['upload_count'] < max_attempts:
143
+ uploaded_files = st.file_uploader(
144
+ "Choose a PDF file", accept_multiple_files=True, type="pdf", key="candidate_upload"
145
+ )
146
+ if uploaded_files:
147
+ st.session_state['upload_count'] += 1
148
+ for uploaded_file in uploaded_files:
149
+ pdf_reader = PdfReader(uploaded_file)
150
+ text_data = ""
151
+ for page in pdf_reader.pages:
152
+ text_data += page.extract_text()
153
+ data = pd.Series(text_data, name = 'Text')
154
+ st.dataframe(data)
155
+ frames = [job, data]
156
+ result = pd.concat(frames)
157
+ st.dataframe(result)
158
+ model = GLiNER.from_pretrained("urchade/gliner_base")
159
+ labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
160
+ entities = model.predict_entities(text_data, labels)
161
+ df = pd.DataFrame(entities)
162
+ st.dataframe(entities)
163
+ st.dataframe(df)
164
+ fig = px.treemap(entities, path=[px.Constant("all"), 'text', 'label'],
165
+ values='score', color='label')
166
+ fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
167
+ st.plotly_chart(fig, key = "fig1")
168
+ vectorizer = TfidfVectorizer()
169
+ tfidf_matrix = vectorizer.fit_transform(result)
170
+ tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
171
+ st.subheader("TF-IDF Values:")
172
+ st.dataframe(tfidf_df)
173
+
174
+ cosine_sim_matrix = cosine_similarity(tfidf_matrix)
175
+ cosine_sim_df = pd.DataFrame(cosine_sim_matrix)
176
+ st.subheader("Cosine Similarity Matrix:")
177
+ st.dataframe(cosine_sim_df)
178
+
179
+
180
+ st.subheader("A score closer to 1 means closer match")
181
+
182
+ fig = px.imshow(cosine_sim_df, text_auto=True, labels=dict(x="Cosine similarity", y="Text", color="Productivity"),
183
+ x=['text1', 'Jon Description'],
184
+ y=['text1', 'Job Description'])
185
+ st.plotly_chart(fig)
186
+
187
+ st.subheader("Cosine Similarity Scores (Job Description vs. Resumes):")
188
+ for i, similarity_score in enumerate(cosine_sim_matrix[0][1:]):
189
+ st.write(f"Similarity with Candidate Profile {i + 1}: {similarity_score:.4f}")
190
+
191
+ else:
192
+ st.warning(f"You have reached the maximum upload attempts ({max_attempts}). Please refresh to upload more files.")
193
+
194
+ if 'upload_count' in st.session_state and st.session_state['upload_count'] > 0:
195
+ st.info(f"Files uploaded {st.session_state['upload_count']} time(s).")
196
 
197
 
198