DeepSoft-Tech's picture
Upload 4 files
1cf3592 verified
import streamlit as st
import os
import fitz # PyMuPDF
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from spellchecker import SpellChecker # Import the SpellChecker class from pyspellchecker
import tempfile
import pandas as pd
st.title("RESUME RANKER")
# User input for skills
skills = st.text_input("Enter Skills (comma-separated):")
# User input for job description
job_description = st.text_area("Enter Job Description:")
# User input for uploading multiple PDF resumes
pdf_resumes = st.file_uploader("Upload Resumes/CVs", type=["pdf"], accept_multiple_files=True)
if st.button("Rank Resumes"):
if not pdf_resumes:
st.warning("Please upload PDF resumes.")
else:
skills = [skill.strip() for skill in skills.split(',')]
job_description = job_description.lower()
resume_data = []
# Function to extract text from a PDF file using PyMuPDF (fitz)
def extract_text_from_pdf(pdf_path):
text = ""
with fitz.open(pdf_path) as doc:
for page in doc:
text += page.get_text()
return text
# Loop through all uploaded PDF resumes and extract text
for pdf_resume in pdf_resumes:
if pdf_resume.type == "application/pdf":
with tempfile.NamedTemporaryFile(delete=False) as temp_pdf:
temp_pdf.write(pdf_resume.read())
temp_pdf_name = temp_pdf.name
pdf_text = extract_text_from_pdf(temp_pdf_name).lower()
resume_data.append((pdf_resume.name, pdf_text))
os.remove(temp_pdf_name)
if not resume_data:
st.warning("No PDF resumes found in the uploaded files.")
else:
resume_rankings = []
# Loop through resumes and check for the presence of input skills
for resume_name, resume_text in resume_data:
matching_skills = [skill for skill in skills if skill.lower() in resume_text]
similarity_score =( len(matching_skills) / len(skills) )# Calculate a simple similarity score
missing_skills = [skill for skill in skills if skill.lower() not in resume_text]
# Calculate the cosine similarity between job description and resume
tfidf_vectorizer = TfidfVectorizer()
job_description_matrix = tfidf_vectorizer.fit_transform([job_description])
resume_matrix = tfidf_vectorizer.transform([resume_text])
job_description_similarity = cosine_similarity(job_description_matrix, resume_matrix)
job_description_similarity = (job_description_similarity[0][0])
similarity_score = round(similarity_score * 100, 2)
job_description_similarity = round(job_description_similarity * 100, 2)
resume_rankings.append((resume_name, f"{similarity_score}%", f"{job_description_similarity}%", missing_skills))
# Sort the resumes by similarity score in descending order
resume_rankings.sort(key=lambda x: x[1], reverse=True)
# Create a DataFrame to display the results
df = pd.DataFrame(resume_rankings, columns=["File Name", "Skills Match ", "Job Description Match ", "Missing Skills"])
st.subheader("Ranked Resumes:")
st.dataframe(df)