Spaces:
Runtime error
Runtime error
Commit
·
d46f9de
1
Parent(s):
0c1528f
Add application file
Browse files- Dockerfile +17 -0
- main.py +9 -0
- requirements.txt +7 -0
- semantic_similarity/DTOs.py +52 -0
- semantic_similarity/__init__.py +0 -0
- semantic_similarity/__pycache__/DTOs.cpython-39.pyc +0 -0
- semantic_similarity/__pycache__/__init__.cpython-39.pyc +0 -0
- semantic_similarity/__pycache__/models.cpython-39.pyc +0 -0
- semantic_similarity/__pycache__/semantic_similarity.cpython-39.pyc +0 -0
- semantic_similarity/models.py +24 -0
- semantic_similarity/semantic_similarity.py +111 -0
- skills_extraction/__pycache__/skills_extraction.cpython-39.pyc +0 -0
- skills_extraction/skills_extraction.py +59 -0
Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.9
|
| 2 |
+
|
| 3 |
+
RUN useradd -m -u 1000 user
|
| 4 |
+
USER user
|
| 5 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 6 |
+
|
| 7 |
+
WORKDIR /app
|
| 8 |
+
|
| 9 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 10 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 11 |
+
|
| 12 |
+
COPY --chown=user . /app
|
| 13 |
+
|
| 14 |
+
ENV PORT=7860
|
| 15 |
+
|
| 16 |
+
# ⬇️ FINAL change: run your mounted app from main.py
|
| 17 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
main.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from skills_extraction.skills_extraction import app as skills_app
|
| 3 |
+
from semantic_similarity.semantic_similarity import app as similarity_app
|
| 4 |
+
|
| 5 |
+
main_app = FastAPI()
|
| 6 |
+
|
| 7 |
+
# Mount the two apps under different routes
|
| 8 |
+
main_app.mount("/skills", skills_app)
|
| 9 |
+
main_app.mount("/similarity", similarity_app)
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
transformers
|
| 4 |
+
torch
|
| 5 |
+
pydantic
|
| 6 |
+
numpy
|
| 7 |
+
sentence_transformers
|
semantic_similarity/DTOs.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Optional, List
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from .models import *
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# Needed for testing
|
| 7 |
+
class SkillPairInput(BaseModel):
|
| 8 |
+
skill1: str
|
| 9 |
+
skill2: str
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# Skill Matching
|
| 13 |
+
class SkillsMatchingRequest(BaseModel):
|
| 14 |
+
jobSkills: List[JobSkill]
|
| 15 |
+
userSkills: List[UserSkill]
|
| 16 |
+
similarityThreshold: Optional[float] = 0.7
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class MatchedSkill(BaseModel):
|
| 20 |
+
jobSkill: JobSkill
|
| 21 |
+
userSkill: UserSkill
|
| 22 |
+
similarity: float
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class MatchingSkillsResponse(BaseModel):
|
| 26 |
+
matchedSkills: List[MatchedSkill]
|
| 27 |
+
unmatchedJobSkills: List[JobSkill]
|
| 28 |
+
unmatchedUserSkills: List[UserSkill]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# Project Matching
|
| 32 |
+
|
| 33 |
+
class ProjectsMatchingRequest(BaseModel):
|
| 34 |
+
jobSkills: List[JobSkill]
|
| 35 |
+
projects: List[Project]
|
| 36 |
+
similarityThreshold: Optional[float] = 0.7
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class MatchedProjectSkill(BaseModel):
|
| 40 |
+
jobSkill: JobSkill
|
| 41 |
+
projectSkill: ProjectSkill
|
| 42 |
+
similarity: float
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class MatchedProject(BaseModel):
|
| 46 |
+
project: Project
|
| 47 |
+
matchedSkills: List[MatchedProjectSkill]
|
| 48 |
+
matchedSkillsCount: int
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class MatchingProjectsResponse(BaseModel):
|
| 52 |
+
allAnalyzedProjects: List[MatchedProject]
|
semantic_similarity/__init__.py
ADDED
|
File without changes
|
semantic_similarity/__pycache__/DTOs.cpython-39.pyc
ADDED
|
Binary file (2.23 kB). View file
|
|
|
semantic_similarity/__pycache__/__init__.cpython-39.pyc
ADDED
|
Binary file (171 Bytes). View file
|
|
|
semantic_similarity/__pycache__/models.cpython-39.pyc
ADDED
|
Binary file (1.04 kB). View file
|
|
|
semantic_similarity/__pycache__/semantic_similarity.cpython-39.pyc
ADDED
|
Binary file (3.33 kB). View file
|
|
|
semantic_similarity/models.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import List
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class BaseSkill(BaseModel):
|
| 6 |
+
id: int
|
| 7 |
+
skill: str
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class UserSkill(BaseSkill):
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ProjectSkill(BaseSkill):
|
| 15 |
+
pass
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class JobSkill(BaseSkill):
|
| 19 |
+
pass
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class Project(BaseModel):
|
| 23 |
+
id: int
|
| 24 |
+
skills: List[ProjectSkill]
|
semantic_similarity/semantic_similarity.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uvicorn
|
| 2 |
+
from fastapi import FastAPI
|
| 3 |
+
from sentence_transformers import SentenceTransformer, util
|
| 4 |
+
from .models import *
|
| 5 |
+
from .DTOs import *
|
| 6 |
+
|
| 7 |
+
app = FastAPI(title="Skill Embedding API")
|
| 8 |
+
|
| 9 |
+
# Load model once at startup
|
| 10 |
+
# You can replace this with, https://huggingface.co/burakkececi/bert-software-engineering ?
|
| 11 |
+
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# --- Endpoints ---
|
| 15 |
+
|
| 16 |
+
@app.post("/similarity")
|
| 17 |
+
def similarity(pair: SkillPairInput):
|
| 18 |
+
emb1 = model.encode(pair.skill1, convert_to_tensor=True)
|
| 19 |
+
emb2 = model.encode(pair.skill2, convert_to_tensor=True)
|
| 20 |
+
sim_score = util.cos_sim(emb1, emb2).item()
|
| 21 |
+
return {
|
| 22 |
+
"skill1": pair.skill1,
|
| 23 |
+
"skill2": pair.skill2,
|
| 24 |
+
"similarity": sim_score
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@app.post("/match-skills", response_model=MatchingSkillsResponse)
|
| 29 |
+
def match_skills(req: SkillsMatchingRequest):
|
| 30 |
+
job_skills = req.jobSkills
|
| 31 |
+
user_skills = req.userSkills
|
| 32 |
+
threshold = req.similarityThreshold
|
| 33 |
+
|
| 34 |
+
job_texts = [j.skill for j in job_skills]
|
| 35 |
+
user_texts = [u.skill for u in user_skills]
|
| 36 |
+
|
| 37 |
+
job_embeddings = model.encode(job_texts, convert_to_tensor=True)
|
| 38 |
+
user_embeddings = model.encode(user_texts, convert_to_tensor=True)
|
| 39 |
+
|
| 40 |
+
similarity_matrix = util.cos_sim(job_embeddings, user_embeddings)
|
| 41 |
+
|
| 42 |
+
matched = []
|
| 43 |
+
unmatched_job_indices = set(range(len(job_skills)))
|
| 44 |
+
unmatched_user_indices = set(range(len(user_skills)))
|
| 45 |
+
|
| 46 |
+
for i, job_row in enumerate(similarity_matrix):
|
| 47 |
+
best_idx = int(job_row.argmax())
|
| 48 |
+
best_score = float(job_row[best_idx])
|
| 49 |
+
|
| 50 |
+
if best_score >= threshold:
|
| 51 |
+
matched.append(MatchedSkill(
|
| 52 |
+
jobSkill=job_skills[i],
|
| 53 |
+
userSkill=user_skills[best_idx],
|
| 54 |
+
similarity=best_score
|
| 55 |
+
))
|
| 56 |
+
unmatched_job_indices.discard(i)
|
| 57 |
+
unmatched_user_indices.discard(best_idx)
|
| 58 |
+
|
| 59 |
+
unmatched_jobs = [job_skills[i] for i in unmatched_job_indices]
|
| 60 |
+
unmatched_users = [user_skills[i] for i in unmatched_user_indices]
|
| 61 |
+
|
| 62 |
+
return MatchingSkillsResponse(
|
| 63 |
+
matchedSkills=matched,
|
| 64 |
+
unmatchedJobSkills=unmatched_jobs,
|
| 65 |
+
unmatchedUserSkills=unmatched_users
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
@app.post("/match-projects-skills", response_model=MatchingProjectsResponse)
|
| 70 |
+
def match_projects_skills(req: ProjectsMatchingRequest):
|
| 71 |
+
job_skills = req.jobSkills
|
| 72 |
+
projects = req.projects
|
| 73 |
+
threshold = req.similarityThreshold
|
| 74 |
+
|
| 75 |
+
job_texts = [job.skill for job in job_skills]
|
| 76 |
+
job_embeddings = model.encode(job_texts, convert_to_tensor=True)
|
| 77 |
+
|
| 78 |
+
matched_projects: List[MatchedProject] = []
|
| 79 |
+
|
| 80 |
+
for project in projects:
|
| 81 |
+
project_texts = [ps.skill for ps in project.skills]
|
| 82 |
+
project_embeddings = model.encode(project_texts, convert_to_tensor=True)
|
| 83 |
+
|
| 84 |
+
similarity_matrix = util.cos_sim(job_embeddings, project_embeddings)
|
| 85 |
+
|
| 86 |
+
matched_skills: List[MatchedProjectSkill] = []
|
| 87 |
+
|
| 88 |
+
for i, job_skill in enumerate(job_skills):
|
| 89 |
+
job_row = similarity_matrix[i]
|
| 90 |
+
best_idx = int(job_row.argmax())
|
| 91 |
+
best_score = float(job_row[best_idx])
|
| 92 |
+
|
| 93 |
+
if best_score >= threshold:
|
| 94 |
+
matched_skills.append(MatchedProjectSkill(
|
| 95 |
+
jobSkill=job_skill,
|
| 96 |
+
projectSkill=project.skills[best_idx],
|
| 97 |
+
similarity=best_score
|
| 98 |
+
))
|
| 99 |
+
|
| 100 |
+
matched_projects.append(MatchedProject(
|
| 101 |
+
project=project,
|
| 102 |
+
matchedSkills=matched_skills,
|
| 103 |
+
matchedSkillsCount=len(matched_skills)
|
| 104 |
+
))
|
| 105 |
+
|
| 106 |
+
return MatchingProjectsResponse(allAnalyzedProjects=matched_projects)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
# uvicorn semantic_similarity:app --host 0.0.0.0 --port 8001
|
| 110 |
+
# if __name__ == "__main__":
|
| 111 |
+
# uvicorn.run(app, host="0.0.0.0", port=8001, reload=False)
|
skills_extraction/__pycache__/skills_extraction.cpython-39.pyc
ADDED
|
Binary file (1.85 kB). View file
|
|
|
skills_extraction/skills_extraction.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
from fastapi import FastAPI
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from transformers import pipeline
|
| 7 |
+
|
| 8 |
+
# Load models and tokenizers
|
| 9 |
+
knowledge_nlp = pipeline(model="jjzha/jobbert_knowledge_extraction", aggregation_strategy="first")
|
| 10 |
+
skill_nlp = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="first")
|
| 11 |
+
|
| 12 |
+
app = FastAPI()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class TextInput(BaseModel):
|
| 16 |
+
jobDescription: str
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def convert_from_numpy(predictions):
|
| 20 |
+
for pred in predictions:
|
| 21 |
+
for key, value in pred.items():
|
| 22 |
+
if isinstance(value, (np.float32, np.int32, np.int64)): # Handle NumPy numeric types
|
| 23 |
+
pred[key] = float(value)
|
| 24 |
+
return predictions
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def merge_BI_and_get_results(predictions):
|
| 28 |
+
results, curSkill, curScore, curNoWords = [], "", 0, 0
|
| 29 |
+
for pred in predictions:
|
| 30 |
+
if pred['entity_group'] == 'B':
|
| 31 |
+
if curSkill:
|
| 32 |
+
results.append({"name": curSkill, "confidence": curScore / curNoWords}) # Average confidence
|
| 33 |
+
curSkill, curScore, curNoWords = pred['word'], pred['score'], 1
|
| 34 |
+
else:
|
| 35 |
+
curSkill += " " + pred['word']
|
| 36 |
+
curScore += pred['score']
|
| 37 |
+
curNoWords += 1
|
| 38 |
+
if curSkill:
|
| 39 |
+
results.append({"name": curSkill, "confidence": curScore / curNoWords})
|
| 40 |
+
return results
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@app.post("/predict_knowledge")
|
| 44 |
+
def predict_knowledge(input_data: TextInput):
|
| 45 |
+
predictions = knowledge_nlp(input_data.jobDescription)
|
| 46 |
+
predictions = convert_from_numpy(predictions)
|
| 47 |
+
# print(json.dumps(predictions, indent=2))
|
| 48 |
+
return {"knowledge_predictions": merge_BI_and_get_results(predictions)}
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@app.post("/predict_skills")
|
| 52 |
+
def predict_skills(input_data: TextInput):
|
| 53 |
+
predictions = skill_nlp(input_data.jobDescription)
|
| 54 |
+
predictions = convert_from_numpy(predictions)
|
| 55 |
+
# print(json.dumps(predictions, indent=2))
|
| 56 |
+
return {"skills_predictions": merge_BI_and_get_results(predictions)}
|
| 57 |
+
|
| 58 |
+
# Run with:
|
| 59 |
+
# uvicorn main:app --host 0.0.0.0 --port 8000
|