File size: 2,290 Bytes
adbf293
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed359cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# imports
import json
import time

import gradio as gr
from transformers import AutoTokenizer, AutoModel
# pytorch library
import torch
import torch.nn.functional as f


from roles_list import roles
# Load the model from the specified directory
embed_store = {}
model = 'sentence-transformers/all-MiniLM-L12-v2'
sbert_model = AutoModel.from_pretrained(model)
sbert_tokenizer = AutoTokenizer.from_pretrained(model)


for role in roles:
    encoding = sbert_tokenizer(role,  # the texts to be tokenized
                               max_length=10,
                               padding="max_length",
                               return_tensors='pt'  # return the tensors (not lists)
                               )
    with torch.no_grad():
        # get the model embeddings
        embed = sbert_model(**encoding)
        embed = embed.pooler_output
    embed_store[role] = f.normalize(embed, p=2, dim=1)
print("Model is ready for inference")


def get_role_from_sbert(title):
    start_time = time.time()
    encoding = sbert_tokenizer(title,
                         max_length=10,
                         padding="max_length",
                         return_tensors='pt'
                         )
    # Run the model prediction on the input data
    with torch.no_grad():
        # get the model embeddings
        embed = sbert_model(**encoding)
        embed = embed.pooler_output
    store_cos = {}
    for role in embed_store:
        cos_sim = torch.nn.functional.cosine_similarity(f.normalize(embed, p=2, dim=1), embed_store[role])
        store_cos[role] = round(cos_sim.item(), 3)
    # Get the top 3 items with the highest cosine similarity
    top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
    job_scores_str = '\n'.join([f"{job}: {score}" for job, score in top_3_keys_values])

    end_time = time.time()
    execution_time = end_time - start_time
    # Convert to dictionary if needed or keep as list of tuples
    return job_scores_str + f" \nExecution time: {str(execution_time)}"


demo = gr.Interface(fn=get_role_from_sbert,
                    inputs=gr.Textbox(label="Job Title"),
                    outputs=gr.Textbox(label="Role"),
                    title="HackerRank Role Classifier")


gr.close_all()
demo.launch()