Spaces:
Sleeping
Sleeping
File size: 3,307 Bytes
da06e55 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
from sentence_transformers import util
import numpy as np
# Weights for each aligned JD section
weights = {
"responsibilities": 0.7,
"qualifications": 0.7
}
# Function to compute cosine similarity with fallback
def safe_cos_sim(vec1, vec2):
if vec1 is None or vec2 is None:
return 0.0
return float(util.cos_sim(vec1, vec2).item())
# Enhanced explanation with match levels
def interpret_match(label, score):
if score >= 0.75:
return f"β
Strong alignment in {label}: {round(score * 100, 1)}%"
elif score >= 0.5:
return f"β οΈ Partial alignment in {label}: {round(score * 100, 1)}%"
else:
return f"β Weak alignment in {label}: {round(score * 100, 1)}%"
# Matching logic
def calculate_match_score(jd_embeddings, resume_embeddings):
explanation = []
total_score = 0.0
# Responsibilities: experience + projects
jd_resp = jd_embeddings.get("responsibilities")
resume_resp = _combine_embeddings([
resume_embeddings.get("experience"),
resume_embeddings.get("projects")
])
sim_resp = safe_cos_sim(jd_resp, resume_resp)
total_score += sim_resp * weights["responsibilities"]
explanation.append(interpret_match("Responsibilities", sim_resp))
# Qualifications: education + certs + skills
jd_qual = jd_embeddings.get("qualifications")
resume_qual = _combine_embeddings([
resume_embeddings.get("education"),
resume_embeddings.get("certifications"),
resume_embeddings.get("skills")
])
sim_qual = safe_cos_sim(jd_qual, resume_qual)
total_score += sim_qual * weights["qualifications"]
explanation.append(interpret_match("Qualifications", sim_qual))
return round(total_score, 3), explanation
# Combine multiple numpy vectors into one
def _combine_embeddings(embeddings_list):
valid = [vec for vec in embeddings_list if vec is not None]
if not valid:
return None
return np.mean(valid, axis=0)
# Main matcher
def match_all_resumes(jd_title, jd_embeddings, resume_data, threshold=0.8):
all_candidates = []
print(f"\nπ Matching resumes against JD: **{jd_title}**\n")
for filename, data in resume_data.items():
parsed = data.get("parsed", {})
embeddings = data.get("embedding", {})
name = _extract_name(parsed, fallback=filename)
score, explanation = calculate_match_score(jd_embeddings, embeddings)
print(f"π {name} β Score: {round(score*100, 1)}%")
for line in explanation:
print(" β’", line)
print("β
Shortlisted\n" if score >= threshold else "β Not shortlisted\n")
all_candidates.append({
"name": name,
"score": score,
"reasoning": explanation,
"resume_id": data.get("id"), # Assuming resume ID is stored in data
"is_match": score >= threshold # Flag for passing the threshold
})
return all_candidates
# Name extractor fallback
def _extract_name(parsed, fallback="Unknown"):
name_lines = parsed.get("name", [])
for line in name_lines:
if line and any(c.isalpha() for c in line):
return line.strip()
return fallback
|