|
import streamlit as st |
|
import os |
|
import fitz |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from spellchecker import SpellChecker |
|
import tempfile |
|
import pandas as pd |
|
|
|
st.title("RESUME RANKER") |
|
|
|
|
|
skills = st.text_input("Enter Skills (comma-separated):") |
|
|
|
|
|
job_description = st.text_area("Enter Job Description:") |
|
|
|
|
|
pdf_resumes = st.file_uploader("Upload Resumes/CVs", type=["pdf"], accept_multiple_files=True) |
|
|
|
if st.button("Rank Resumes"): |
|
if not pdf_resumes: |
|
st.warning("Please upload PDF resumes.") |
|
else: |
|
skills = [skill.strip() for skill in skills.split(',')] |
|
job_description = job_description.lower() |
|
resume_data = [] |
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
text = "" |
|
with fitz.open(pdf_path) as doc: |
|
for page in doc: |
|
text += page.get_text() |
|
return text |
|
|
|
|
|
for pdf_resume in pdf_resumes: |
|
if pdf_resume.type == "application/pdf": |
|
with tempfile.NamedTemporaryFile(delete=False) as temp_pdf: |
|
temp_pdf.write(pdf_resume.read()) |
|
temp_pdf_name = temp_pdf.name |
|
pdf_text = extract_text_from_pdf(temp_pdf_name).lower() |
|
resume_data.append((pdf_resume.name, pdf_text)) |
|
os.remove(temp_pdf_name) |
|
|
|
if not resume_data: |
|
st.warning("No PDF resumes found in the uploaded files.") |
|
else: |
|
resume_rankings = [] |
|
|
|
|
|
for resume_name, resume_text in resume_data: |
|
matching_skills = [skill for skill in skills if skill.lower() in resume_text] |
|
similarity_score =( len(matching_skills) / len(skills) ) |
|
missing_skills = [skill for skill in skills if skill.lower() not in resume_text] |
|
|
|
|
|
tfidf_vectorizer = TfidfVectorizer() |
|
job_description_matrix = tfidf_vectorizer.fit_transform([job_description]) |
|
resume_matrix = tfidf_vectorizer.transform([resume_text]) |
|
job_description_similarity = cosine_similarity(job_description_matrix, resume_matrix) |
|
job_description_similarity = (job_description_similarity[0][0]) |
|
|
|
similarity_score = round(similarity_score * 100, 2) |
|
job_description_similarity = round(job_description_similarity * 100, 2) |
|
|
|
resume_rankings.append((resume_name, f"{similarity_score}%", f"{job_description_similarity}%", missing_skills)) |
|
|
|
|
|
resume_rankings.sort(key=lambda x: x[1], reverse=True) |
|
|
|
|
|
df = pd.DataFrame(resume_rankings, columns=["File Name", "Skills Match ", "Job Description Match ", "Missing Skills"]) |
|
|
|
|
|
|
|
st.subheader("Ranked Resumes:") |
|
st.dataframe(df) |