File size: 4,459 Bytes
d46f9de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cac6615
 
 
 
 
 
 
d46f9de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cac6615
 
 
 
 
2021cd6
cac6615
d46f9de
 
 
 
 
 
cac6615
 
 
 
 
 
 
d46f9de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import uvicorn
from fastapi import FastAPI
from sentence_transformers import SentenceTransformer, util
from .models import *
from .DTOs import *

app = FastAPI(title="Skill Embedding API")

# Load model once at startup
# You can replace this with, https://huggingface.co/burakkececi/bert-software-engineering ?
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


# --- Endpoints ---

@app.post("/similarity")
def similarity(pair: SkillPairInput):
    emb1 = model.encode(pair.skill1, convert_to_tensor=True)
    emb2 = model.encode(pair.skill2, convert_to_tensor=True)
    sim_score = util.cos_sim(emb1, emb2).item()
    return {
        "skill1": pair.skill1,
        "skill2": pair.skill2,
        "similarity": sim_score
    }


@app.post("/match-skills", response_model=MatchingSkillsResponse)
def match_skills(req: SkillsMatchingRequest):
    job_skills = req.jobSkills
    user_skills = req.userSkills
    threshold = req.similarityThreshold

    if not job_skills or not user_skills:
        return MatchingSkillsResponse(
            matchedSkills=[],
            unmatchedJobSkills=job_skills,
            unmatchedUserSkills=user_skills
        )

    job_texts = [j.skill for j in job_skills]
    user_texts = [u.skill for u in user_skills]

    job_embeddings = model.encode(job_texts, convert_to_tensor=True)
    user_embeddings = model.encode(user_texts, convert_to_tensor=True)

    similarity_matrix = util.cos_sim(job_embeddings, user_embeddings)

    matched = []
    unmatched_job_indices = set(range(len(job_skills)))
    unmatched_user_indices = set(range(len(user_skills)))

    for i, job_row in enumerate(similarity_matrix):
        best_idx = int(job_row.argmax())
        best_score = float(job_row[best_idx])

        if best_score >= threshold:
            matched.append(MatchedSkill(
                jobSkill=job_skills[i],
                userSkill=user_skills[best_idx],
                similarity=best_score
            ))
            unmatched_job_indices.discard(i)
            unmatched_user_indices.discard(best_idx)

    unmatched_jobs = [job_skills[i] for i in unmatched_job_indices]
    unmatched_users = [user_skills[i] for i in unmatched_user_indices]

    return MatchingSkillsResponse(
        matchedSkills=matched,
        unmatchedJobSkills=unmatched_jobs,
        unmatchedUserSkills=unmatched_users
    )


@app.post("/match-projects-skills", response_model=MatchingProjectsResponse)
def match_projects_skills(req: ProjectsMatchingRequest):
    job_skills = req.jobSkills
    projects = req.projects
    threshold = req.similarityThreshold

    if not job_skills:
        return MatchingProjectsResponse(allAnalyzedProjects=[
            MatchedProject(project=project, matchedSkills=[], matchedSkillsCount=0) for project in projects
        ])
    elif not projects:
        return MatchingProjectsResponse(allAnalyzedProjects=[])

    job_texts = [job.skill for job in job_skills]
    job_embeddings = model.encode(job_texts, convert_to_tensor=True)

    matched_projects: List[MatchedProject] = []

    for project in projects:
        if not project.skills:
            matched_projects.append(MatchedProject(
                project=project,
                matchedSkills=[],
                matchedSkillsCount=0
            ))
            continue
        project_texts = [ps.skill for ps in project.skills]
        project_embeddings = model.encode(project_texts, convert_to_tensor=True)

        similarity_matrix = util.cos_sim(job_embeddings, project_embeddings)
        matched_skills: List[MatchedProjectSkill] = []

        for i, job_skill in enumerate(job_skills):
            job_row = similarity_matrix[i]
            best_idx = int(job_row.argmax())
            best_score = float(job_row[best_idx])

            if best_score >= threshold:
                matched_skills.append(MatchedProjectSkill(
                    jobSkill=job_skill,
                    projectSkill=project.skills[best_idx],
                    similarity=best_score
                ))

        matched_projects.append(MatchedProject(
            project=project,
            matchedSkills=matched_skills,
            matchedSkillsCount=len(matched_skills)
        ))

    return MatchingProjectsResponse(allAnalyzedProjects=matched_projects)

# uvicorn semantic_similarity:app --host 0.0.0.0 --port 8001
# if __name__ == "__main__":
#     uvicorn.run(app, host="0.0.0.0", port=8001, reload=False)