File size: 4,263 Bytes
af35711 e6f3be5 af35711 83ab759 df122f4 83ab759 e6f3be5 00fa330 df122f4 00fa330 e6f3be5 00fa330 e6f3be5 af35711 e6f3be5 af35711 00fa330 af35711 00fa330 af35711 00fa330 e6f3be5 00fa330 af35711 172d695 e6f3be5 af35711 e6f3be5 00fa330 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
from sentence_transformers import SentenceTransformer
import streamlit as st
import pandas as pd
from PyPDF2 import PdfReader
import nltk
nltk.download('punkt_tab')
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.tokenize import word_tokenize
from sentence_transformers import SentenceTransformer
import streamlit as st
import pandas as pd
from PyPDF2 import PdfReader
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stop_words = set(stopwords.words('english'))
model = SentenceTransformer("all-mpnet-base-v2")
st.title("AI Resume Analysis based on Keywords App")
st.divider()
job_desc_raw = st.text_area("Paste the job description and then press Ctrl + Enter", key="job_desc")
# Process job description for stop words
job_desc_tokens = word_tokenize(job_desc_raw.lower())
job_desc_filtered = [word for word in job_desc_tokens if not word in stop_words]
job_desc_processed = " ".join(job_desc_filtered)
st.write("Processed Job Description:", job_desc_processed)
if 'applicant_data' not in st.session_state:
st.session_state['applicant_data'] = {}
max_attempts = 1
for i in range(1, 51): # Looping for 50 applicants
st.subheader(f"Applicant {i} Resume", divider="green")
applicant_key = f"applicant_{i}"
upload_key = f"candidate_{i}"
if applicant_key not in st.session_state['applicant_data']:
st.session_state['applicant_data'][applicant_key] = {'upload_count': 0, 'uploaded_file': None, 'analysis_done': False}
if st.session_state['applicant_data'][applicant_key]['upload_count'] < max_attempts:
uploaded_file = st.file_uploader(f"Upload Applicant's {i} resume", type="pdf", key=upload_key)
if uploaded_file:
st.session_state['applicant_data'][applicant_key]['uploaded_file'] = uploaded_file
st.session_state['applicant_data'][applicant_key]['upload_count'] += 1
st.session_state['applicant_data'][applicant_key]['analysis_done'] = False # Reset analysis flag
if st.session_state['applicant_data'][applicant_key]['uploaded_file'] and not st.session_state['applicant_data'][applicant_key]['analysis_done']:
try:
pdf_reader = PdfReader(st.session_state['applicant_data'][applicant_key]['uploaded_file'])
text_data_raw = ""
for page in pdf_reader.pages:
text_data_raw += page.extract_text()
with st.expander(f"See Applicant's {i} resume"):
st.write(text_data_raw)
# Process resume text for stop words
text_tokens = word_tokenize(text_data_raw.lower())
text_data_filtered = [word for word in text_tokens if not word in stop_words]
text_data_processed = " ".join(text_data_filtered)
st.write("Processed Resume:", text_data_processed)
# Encode the processed job description and resume text
job_embedding = model.encode([job_desc_processed])
resume_embedding = model.encode([text_data_processed])
# Calculate the cosine similarity between the two embeddings
similarity_score = model.similarity(job_embedding, resume_embedding)[0][0]
with st.popover(f"See Result for Applicant {i}"):
st.write(f"Similarity between Applicant's resume and job description based on keywords: {similarity_score:.2f}")
st.info(
f"A score closer to 1 (0.80, 0.90) means higher similarity between Applicant's {i} resume and job description. A score closer to 0 (0.20, 0.30) means lower similarity between Applicant's {i} resume and job description.")
st.session_state['applicant_data'][applicant_key]['analysis_done'] = True
except Exception as e:
st.error(f"An error occurred while processing Applicant {i}'s resume: {e}")
else:
st.warning(f"Maximum upload attempts has been reached ({max_attempts}).")
if st.session_state['applicant_data'][applicant_key]['upload_count'] > 0:
st.info(f"Files uploaded for Applicant {i}: {st.session_state['applicant_data'][applicant_key]['upload_count']} time(s).") |