Spaces:
Sleeping
Sleeping
File size: 4,459 Bytes
d46f9de cac6615 d46f9de cac6615 2021cd6 cac6615 d46f9de cac6615 d46f9de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import uvicorn
from fastapi import FastAPI
from sentence_transformers import SentenceTransformer, util
from .models import *
from .DTOs import *
app = FastAPI(title="Skill Embedding API")
# Load model once at startup
# You can replace this with, https://huggingface.co/burakkececi/bert-software-engineering ?
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# --- Endpoints ---
@app.post("/similarity")
def similarity(pair: SkillPairInput):
emb1 = model.encode(pair.skill1, convert_to_tensor=True)
emb2 = model.encode(pair.skill2, convert_to_tensor=True)
sim_score = util.cos_sim(emb1, emb2).item()
return {
"skill1": pair.skill1,
"skill2": pair.skill2,
"similarity": sim_score
}
@app.post("/match-skills", response_model=MatchingSkillsResponse)
def match_skills(req: SkillsMatchingRequest):
job_skills = req.jobSkills
user_skills = req.userSkills
threshold = req.similarityThreshold
if not job_skills or not user_skills:
return MatchingSkillsResponse(
matchedSkills=[],
unmatchedJobSkills=job_skills,
unmatchedUserSkills=user_skills
)
job_texts = [j.skill for j in job_skills]
user_texts = [u.skill for u in user_skills]
job_embeddings = model.encode(job_texts, convert_to_tensor=True)
user_embeddings = model.encode(user_texts, convert_to_tensor=True)
similarity_matrix = util.cos_sim(job_embeddings, user_embeddings)
matched = []
unmatched_job_indices = set(range(len(job_skills)))
unmatched_user_indices = set(range(len(user_skills)))
for i, job_row in enumerate(similarity_matrix):
best_idx = int(job_row.argmax())
best_score = float(job_row[best_idx])
if best_score >= threshold:
matched.append(MatchedSkill(
jobSkill=job_skills[i],
userSkill=user_skills[best_idx],
similarity=best_score
))
unmatched_job_indices.discard(i)
unmatched_user_indices.discard(best_idx)
unmatched_jobs = [job_skills[i] for i in unmatched_job_indices]
unmatched_users = [user_skills[i] for i in unmatched_user_indices]
return MatchingSkillsResponse(
matchedSkills=matched,
unmatchedJobSkills=unmatched_jobs,
unmatchedUserSkills=unmatched_users
)
@app.post("/match-projects-skills", response_model=MatchingProjectsResponse)
def match_projects_skills(req: ProjectsMatchingRequest):
job_skills = req.jobSkills
projects = req.projects
threshold = req.similarityThreshold
if not job_skills:
return MatchingProjectsResponse(allAnalyzedProjects=[
MatchedProject(project=project, matchedSkills=[], matchedSkillsCount=0) for project in projects
])
elif not projects:
return MatchingProjectsResponse(allAnalyzedProjects=[])
job_texts = [job.skill for job in job_skills]
job_embeddings = model.encode(job_texts, convert_to_tensor=True)
matched_projects: List[MatchedProject] = []
for project in projects:
if not project.skills:
matched_projects.append(MatchedProject(
project=project,
matchedSkills=[],
matchedSkillsCount=0
))
continue
project_texts = [ps.skill for ps in project.skills]
project_embeddings = model.encode(project_texts, convert_to_tensor=True)
similarity_matrix = util.cos_sim(job_embeddings, project_embeddings)
matched_skills: List[MatchedProjectSkill] = []
for i, job_skill in enumerate(job_skills):
job_row = similarity_matrix[i]
best_idx = int(job_row.argmax())
best_score = float(job_row[best_idx])
if best_score >= threshold:
matched_skills.append(MatchedProjectSkill(
jobSkill=job_skill,
projectSkill=project.skills[best_idx],
similarity=best_score
))
matched_projects.append(MatchedProject(
project=project,
matchedSkills=matched_skills,
matchedSkillsCount=len(matched_skills)
))
return MatchingProjectsResponse(allAnalyzedProjects=matched_projects)
# uvicorn semantic_similarity:app --host 0.0.0.0 --port 8001
# if __name__ == "__main__":
# uvicorn.run(app, host="0.0.0.0", port=8001, reload=False)
|