uhith commited on
Commit
adbf293
·
verified ·
1 Parent(s): ee33055

sbert based roles matching

Browse files
Files changed (3) hide show
  1. app.py +99 -0
  2. requirements.txt +6 -0
  3. roles_list.py +71 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # imports
2
+ import json
3
+ import time
4
+
5
+ import gradio as gr
6
+ from transformers import AutoTokenizer, AutoModel
7
+ import openai
8
+ # pytorch library
9
+ import torch
10
+ import torch.nn.functional as f
11
+
12
+ from fuzzywuzzy import process
13
+
14
+ from roles_list import roles
15
+ from openai import OpenAI
16
+ # Load the model from the specified directory
17
+ embed_store = {}
18
+ model = 'sentence-transformers/all-MiniLM-L12-v2'
19
+ sbert_model = AutoModel.from_pretrained(model)
20
+ sbert_tokenizer = AutoTokenizer.from_pretrained(model)
21
+
22
+ client = OpenAI(
23
+ # defaults to os.environ.get("OPENAI_API_KEY")
24
+ api_key="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",
25
+ )
26
+
27
+
28
+ for role in roles:
29
+ encoding = sbert_tokenizer(role, # the texts to be tokenized
30
+ max_length=10,
31
+ padding="max_length",
32
+ return_tensors='pt' # return the tensors (not lists)
33
+ )
34
+ with torch.no_grad():
35
+ # get the model embeddings
36
+ embed = sbert_model(**encoding)
37
+ embed = embed.pooler_output
38
+ embed_store[role] = f.normalize(embed, p=2, dim=1)
39
+ print("Model is ready for inference")
40
+
41
+
42
+ def get_role_from_sbert(title):
43
+ start_time = time.time()
44
+ encoding = sbert_tokenizer(title,
45
+ max_length=10,
46
+ padding="max_length",
47
+ return_tensors='pt'
48
+ )
49
+ # Run the model prediction on the input data
50
+ with torch.no_grad():
51
+ # get the model embeddings
52
+ embed = sbert_model(**encoding)
53
+ embed = embed.pooler_output
54
+ store_cos = {}
55
+ for role in embed_store:
56
+ cos_sim = torch.nn.functional.cosine_similarity(f.normalize(embed, p=2, dim=1), embed_store[role])
57
+ store_cos[role] = round(cos_sim.item(), 3)
58
+ # Get the top 3 items with the highest cosine similarity
59
+ top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
60
+ job_scores_str = '\n'.join([f"{job}: {score}" for job, score in top_3_keys_values])
61
+
62
+ end_time = time.time()
63
+ execution_time = end_time - start_time
64
+ # Convert to dictionary if needed or keep as list of tuples
65
+ return job_scores_str + f" \nExecution time: {str(execution_time)}"
66
+
67
+
68
+ def fuzzy_match(title):
69
+ """
70
+ Find the best matches for a query from a list of choices using fuzzy matching.
71
+
72
+ Parameters:
73
+ - query: The search string.
74
+ - choices: A list of strings to search through.
75
+ - limit: The maximum number of matches to return.
76
+
77
+ Returns:
78
+ A list of tuples with the match and its score. Higher score means closer match.
79
+ """
80
+ matches = process.extract(title, roles, limit=3)
81
+ return matches
82
+
83
+
84
+ def fuzzy_match_sbert(title):
85
+ matches = fuzzy_match(title)
86
+ sbert_results = get_role_from_sbert(title)
87
+
88
+ new_list = [matches, sbert_results]
89
+ return new_list
90
+
91
+
92
+ demo = gr.Interface(fn=get_role_from_sbert,
93
+ inputs=gr.Textbox(label="Job Title"),
94
+ outputs=gr.Textbox(label="Role"),
95
+ title="HackerRank Role Classifier")
96
+
97
+
98
+ gr.close_all()
99
+ demo.launch(server_name='0.0.0.0', server_port=8081, share=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch==1.13.1
2
+ torchvision==0.14.1
3
+ transformers==4.26.1
4
+ gradio==3.18.0
5
+ openai
6
+
roles_list.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ roles = [
2
+ 'Machine Learning Engineer',
3
+ 'Sr. Machine Learning Engineer',
4
+ 'Cloud Engineer',
5
+ 'Cloud Engineer (AWS)',
6
+ 'Cloud Security Engineer',
7
+ 'Site Reliability Engineer',
8
+ 'Sr. Cloud Engineer',
9
+ 'Sr. Cloud Engineer (AWS)',
10
+ 'Sr. Cloud Security Engineer',
11
+ 'Sr. Site Reliability Engineer',
12
+ 'Cybersecurity Engineer',
13
+ 'Sr. Cybersecurity Engineer',
14
+ 'Data Engineer',
15
+ 'Data Engineer (Java Spark)',
16
+ 'Data Engineer (PySpark)',
17
+ 'Data Engineer (Scala Spark)',
18
+ 'Sr. Data Engineer',
19
+ 'Sr. Data Engineer (Java Spark)',
20
+ 'Sr. Data Engineer (PySpark)',
21
+ 'Sr. Data Engineer (Scala Spark)',
22
+ 'Data Analyst',
23
+ 'Data Analyst (Python)',
24
+ 'Data Analyst (R)',
25
+ 'Data Scientist',
26
+ 'Sr. Data Analyst',
27
+ 'Sr. Data Analyst (Python)',
28
+ 'Sr. Data Analyst (R)',
29
+ 'Sr. Data Scientist',
30
+ 'Mobile Applications Developer (Android - Java)',
31
+ 'Mobile Applications Developer (Android - Kotlin)',
32
+ 'Mobile Applications Developer (React Native)',
33
+ 'Sr. Mobile Applications Developer (Android - Java)',
34
+ 'Sr. Mobile Applications Developer (Android - Kotlin)',
35
+ 'Sr. Mobile Applications Developer (React Native)',
36
+ 'QA Engineer (Selenium)',
37
+ 'Quality Assurance Engineer',
38
+ 'Quality Assurance Engineer (Mobile)',
39
+ 'Sr. QA Engineer (Selenium)',
40
+ 'Sr. Quality Assurance Engineer',
41
+ 'Sr. Quality Assurance Engineer (Mobile)',
42
+ 'Software Engineer',
43
+ 'Software Engineer Intern',
44
+ 'Sr. Software Engineer',
45
+ 'Back-End Developer',
46
+ 'Back-End Developer (.NET)',
47
+ 'Back-End Developer (Django)',
48
+ 'Back-End Developer (Laravel)',
49
+ 'Back-End Developer (Node)',
50
+ 'Back-End Developer (Rails)',
51
+ 'Back-End Developer (Spring Boot)',
52
+ 'Front-End Developer',
53
+ 'Front-End Developer (Angular)',
54
+ 'Front-End Developer (React)',
55
+ 'Front-End Developer (Vue.js)',
56
+ 'Full-Stack Engineer (Angular & Node)',
57
+ 'Full-Stack Engineer (React & Node)',
58
+ 'Sr. Back-End Developer',
59
+ 'Sr. Back-End Developer (.NET)',
60
+ 'Sr. Back-End Developer (Django)',
61
+ 'Sr. Back-End Developer (Laravel)',
62
+ 'Sr. Back-End Developer (Node)',
63
+ 'Sr. Back-End Developer (Rails)',
64
+ 'Sr. Back-End Developer (Spring Boot)',
65
+ 'Sr. Front-End Developer',
66
+ 'Sr. Front-End Developer (Angular)',
67
+ 'Sr. Front-End Developer (React)',
68
+ 'Sr. Front-End Developer (Vue.js)',
69
+ 'Sr. Full-Stack Engineer (Angular & Node)',
70
+ 'Sr. Full-Stack Engineer (React & Node)'
71
+ ]