Spaces:

nlpblogs
/

artificial-intelligence-resume-analysis-app2

Sleeping

App Files Files Community

nlpblogs commited on Apr 29

Commit

00fa330

verified ·

1 Parent(s): ed09cf1

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -23

app.py CHANGED Viewed

@@ -8,15 +8,28 @@ from nltk.corpus import stopwords
 nltk.download('stopwords')
 from nltk.tokenize import word_tokenize
 model = SentenceTransformer("all-mpnet-base-v2")
 st.title("AI Resume Analysis based on Keywords App")
 st.divider()
-job_desc = st.text_area("Paste the job description and then press Ctrl + Enter", key="job_desc")
-text_tokens = []
-for sentence in job_desc:
-    text_tokens.extend(word_tokenize(job_desc))
-    job_desc = [word for word in text_tokens if not word in stopwords.words()]
-    st.write(job_desc)
 if 'applicant_data' not in st.session_state:
     st.session_state['applicant_data'] = {}
@@ -36,24 +49,23 @@ for i in range(1, 51):  # Looping for 50 applicants
         if st.session_state['applicant_data'][applicant_key]['uploaded_file'] and not st.session_state['applicant_data'][applicant_key]['analysis_done']:
             try:
                 pdf_reader = PdfReader(st.session_state['applicant_data'][applicant_key]['uploaded_file'])
-                text_data = ""
                 for page in pdf_reader.pages:
-                    text_data += page.extract_text()
                 with st.expander(f"See Applicant's {i} resume"):
-                    text_tokens = []
-                    for sentence in text_data:
-                        text_tokens.extend(word_tokenize(text_data))
-                        text_data = [word for word in text_tokens if not word in stopwords.words()]
-                        st.write(text_data)
-                # Encode the job description and resume text separately
-                job_embedding = model.encode([job_desc])
-                resume_embedding = model.encode([text_data])
                 # Calculate the cosine similarity between the two embeddings
                 similarity_score = model.similarity(job_embedding, resume_embedding)[0][0]
                 with st.popover(f"See Result for Applicant {i}"):
                     st.write(f"Similarity between Applicant's resume and job description based on keywords: {similarity_score:.2f}")
                     st.info(
@@ -64,7 +76,4 @@ for i in range(1, 51):  # Looping for 50 applicants
     else:
         st.warning(f"Maximum upload attempts has been reached ({max_attempts}).")
         if st.session_state['applicant_data'][applicant_key]['upload_count'] > 0:
-            st.info(f"Files uploaded for Applicant {i}: {st.session_state['applicant_data'][applicant_key]['upload_count']} time(s).")

 nltk.download('stopwords')
 from nltk.tokenize import word_tokenize
+from sentence_transformers import SentenceTransformer
+import streamlit as st
+import pandas as pd
+from PyPDF2 import PdfReader
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+nltk.download('punkt')
+nltk.download('stopwords')
+stop_words = set(stopwords.words('english'))
 model = SentenceTransformer("all-mpnet-base-v2")
 st.title("AI Resume Analysis based on Keywords App")
 st.divider()
+job_desc_raw = st.text_area("Paste the job description and then press Ctrl + Enter", key="job_desc")
+# Process job description for stop words
+job_desc_tokens = word_tokenize(job_desc_raw.lower())
+job_desc_filtered = [word for word in job_desc_tokens if not word in stop_words]
+job_desc_processed = " ".join(job_desc_filtered)
+st.write("Processed Job Description:", job_desc_processed)
 if 'applicant_data' not in st.session_state:
     st.session_state['applicant_data'] = {}
         if st.session_state['applicant_data'][applicant_key]['uploaded_file'] and not st.session_state['applicant_data'][applicant_key]['analysis_done']:
             try:
                 pdf_reader = PdfReader(st.session_state['applicant_data'][applicant_key]['uploaded_file'])
+                text_data_raw = ""
                 for page in pdf_reader.pages:
+                    text_data_raw += page.extract_text()
                 with st.expander(f"See Applicant's {i} resume"):
+                    st.write(text_data_raw)
+                # Process resume text for stop words
+                text_tokens = word_tokenize(text_data_raw.lower())
+                text_data_filtered = [word for word in text_tokens if not word in stop_words]
+                text_data_processed = " ".join(text_data_filtered)
+                st.write("Processed Resume:", text_data_processed)
+                # Encode the processed job description and resume text
+                job_embedding = model.encode([job_desc_processed])
+                resume_embedding = model.encode([text_data_processed])
                 # Calculate the cosine similarity between the two embeddings
                 similarity_score = model.similarity(job_embedding, resume_embedding)[0][0]
                 with st.popover(f"See Result for Applicant {i}"):
                     st.write(f"Similarity between Applicant's resume and job description based on keywords: {similarity_score:.2f}")
                     st.info(
     else:
         st.warning(f"Maximum upload attempts has been reached ({max_attempts}).")
         if st.session_state['applicant_data'][applicant_key]['upload_count'] > 0:
+            st.info(f"Files uploaded for Applicant {i}: {st.session_state['applicant_data'][applicant_key]['upload_count']} time(s).")