nlpblogs commited on
Commit
00fa330
·
verified ·
1 Parent(s): ed09cf1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -23
app.py CHANGED
@@ -8,15 +8,28 @@ from nltk.corpus import stopwords
8
  nltk.download('stopwords')
9
  from nltk.tokenize import word_tokenize
10
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  model = SentenceTransformer("all-mpnet-base-v2")
12
  st.title("AI Resume Analysis based on Keywords App")
13
  st.divider()
14
- job_desc = st.text_area("Paste the job description and then press Ctrl + Enter", key="job_desc")
15
- text_tokens = []
16
- for sentence in job_desc:
17
- text_tokens.extend(word_tokenize(job_desc))
18
- job_desc = [word for word in text_tokens if not word in stopwords.words()]
19
- st.write(job_desc)
 
20
 
21
  if 'applicant_data' not in st.session_state:
22
  st.session_state['applicant_data'] = {}
@@ -36,24 +49,23 @@ for i in range(1, 51): # Looping for 50 applicants
36
  if st.session_state['applicant_data'][applicant_key]['uploaded_file'] and not st.session_state['applicant_data'][applicant_key]['analysis_done']:
37
  try:
38
  pdf_reader = PdfReader(st.session_state['applicant_data'][applicant_key]['uploaded_file'])
39
- text_data = ""
40
  for page in pdf_reader.pages:
41
- text_data += page.extract_text()
42
  with st.expander(f"See Applicant's {i} resume"):
43
- text_tokens = []
44
- for sentence in text_data:
45
- text_tokens.extend(word_tokenize(text_data))
46
- text_data = [word for word in text_tokens if not word in stopwords.words()]
47
- st.write(text_data)
48
-
49
-
50
- # Encode the job description and resume text separately
51
- job_embedding = model.encode([job_desc])
52
- resume_embedding = model.encode([text_data])
53
 
 
 
 
 
 
 
 
 
 
54
  # Calculate the cosine similarity between the two embeddings
55
  similarity_score = model.similarity(job_embedding, resume_embedding)[0][0]
56
-
57
  with st.popover(f"See Result for Applicant {i}"):
58
  st.write(f"Similarity between Applicant's resume and job description based on keywords: {similarity_score:.2f}")
59
  st.info(
@@ -64,7 +76,4 @@ for i in range(1, 51): # Looping for 50 applicants
64
  else:
65
  st.warning(f"Maximum upload attempts has been reached ({max_attempts}).")
66
  if st.session_state['applicant_data'][applicant_key]['upload_count'] > 0:
67
- st.info(f"Files uploaded for Applicant {i}: {st.session_state['applicant_data'][applicant_key]['upload_count']} time(s).")
68
-
69
-
70
-
 
8
  nltk.download('stopwords')
9
  from nltk.tokenize import word_tokenize
10
 
11
+ from sentence_transformers import SentenceTransformer
12
+ import streamlit as st
13
+ import pandas as pd
14
+ from PyPDF2 import PdfReader
15
+ import nltk
16
+ from nltk.corpus import stopwords
17
+ from nltk.tokenize import word_tokenize
18
+
19
+ nltk.download('punkt')
20
+ nltk.download('stopwords')
21
+ stop_words = set(stopwords.words('english'))
22
+
23
  model = SentenceTransformer("all-mpnet-base-v2")
24
  st.title("AI Resume Analysis based on Keywords App")
25
  st.divider()
26
+ job_desc_raw = st.text_area("Paste the job description and then press Ctrl + Enter", key="job_desc")
27
+
28
+ # Process job description for stop words
29
+ job_desc_tokens = word_tokenize(job_desc_raw.lower())
30
+ job_desc_filtered = [word for word in job_desc_tokens if not word in stop_words]
31
+ job_desc_processed = " ".join(job_desc_filtered)
32
+ st.write("Processed Job Description:", job_desc_processed)
33
 
34
  if 'applicant_data' not in st.session_state:
35
  st.session_state['applicant_data'] = {}
 
49
  if st.session_state['applicant_data'][applicant_key]['uploaded_file'] and not st.session_state['applicant_data'][applicant_key]['analysis_done']:
50
  try:
51
  pdf_reader = PdfReader(st.session_state['applicant_data'][applicant_key]['uploaded_file'])
52
+ text_data_raw = ""
53
  for page in pdf_reader.pages:
54
+ text_data_raw += page.extract_text()
55
  with st.expander(f"See Applicant's {i} resume"):
56
+ st.write(text_data_raw)
 
 
 
 
 
 
 
 
 
57
 
58
+ # Process resume text for stop words
59
+ text_tokens = word_tokenize(text_data_raw.lower())
60
+ text_data_filtered = [word for word in text_tokens if not word in stop_words]
61
+ text_data_processed = " ".join(text_data_filtered)
62
+ st.write("Processed Resume:", text_data_processed)
63
+
64
+ # Encode the processed job description and resume text
65
+ job_embedding = model.encode([job_desc_processed])
66
+ resume_embedding = model.encode([text_data_processed])
67
  # Calculate the cosine similarity between the two embeddings
68
  similarity_score = model.similarity(job_embedding, resume_embedding)[0][0]
 
69
  with st.popover(f"See Result for Applicant {i}"):
70
  st.write(f"Similarity between Applicant's resume and job description based on keywords: {similarity_score:.2f}")
71
  st.info(
 
76
  else:
77
  st.warning(f"Maximum upload attempts has been reached ({max_attempts}).")
78
  if st.session_state['applicant_data'][applicant_key]['upload_count'] > 0:
79
+ st.info(f"Files uploaded for Applicant {i}: {st.session_state['applicant_data'][applicant_key]['upload_count']} time(s).")