Spaces:
Sleeping
Sleeping
# imports | |
import json | |
import time | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification | |
import torch | |
import torch.nn.functional as f | |
from roles_list import roles | |
# Load the SBERT model and tokenizer | |
sbert_model_name = 'sentence-transformers/all-MiniLM-L12-v2' | |
sbert_model = AutoModel.from_pretrained(sbert_model_name) | |
sbert_tokenizer = AutoTokenizer.from_pretrained(sbert_model_name) | |
# Load the LLM model and tokenizer | |
llm_model_name = 'bert-base-uncased' # Using BERT for sequence classification | |
llm_model = AutoModelForSequenceClassification.from_pretrained(llm_model_name) | |
llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name) | |
embed_store = {} | |
for role in roles: | |
encoding = sbert_tokenizer(role, # the texts to be tokenized | |
max_length=10, | |
padding="max_length", | |
return_tensors='pt' # return the tensors (not lists) | |
) | |
with torch.no_grad(): | |
# get the model embeddings | |
embed = sbert_model(**encoding) | |
embed = embed.pooler_output | |
embed_store[role] = f.normalize(embed, p=2, dim=1) | |
print("SBERT model is ready for inference") | |
def get_role_from_sbert(title): | |
start_time = time.time() | |
encoding = sbert_tokenizer(title, | |
max_length=10, | |
padding="max_length", | |
return_tensors='pt' | |
) | |
# Run the model prediction on the input data | |
with torch.no_grad(): | |
# get the model embeddings | |
embed = sbert_model(**encoding) | |
embed = embed.pooler_output | |
store_cos = {} | |
for role in embed_store: | |
cos_sim = torch.nn.functional.cosine_similarity(f.normalize(embed, p=2, dim=1), embed_store[role]) | |
store_cos[role] = round(cos_sim.item(), 3) | |
# Get the top 3 items with the highest cosine similarity | |
top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True) | |
job_scores = [{"Role": job, "SBERT Score": score} for job, score in top_3_keys_values] | |
end_time = time.time() | |
execution_time = end_time - start_time | |
return job_scores, execution_time | |
def get_role_from_llm(title): | |
start_time = time.time() | |
llm_scores = [] | |
for role in roles: | |
inputs = llm_tokenizer.encode_plus(title, role, return_tensors='pt', max_length=512, truncation=True) | |
with torch.no_grad(): | |
outputs = llm_model(**inputs) | |
score = torch.softmax(outputs.logits, dim=1)[0][1].item() | |
llm_scores.append({"Role": role, "LLM Score": round(score, 3)}) | |
end_time = time.time() | |
execution_time = end_time - start_time | |
return llm_scores, execution_time | |
def classify_role(title): | |
sbert_scores, sbert_execution_time = get_role_from_sbert(title) | |
llm_scores, llm_execution_time = get_role_from_llm(title) | |
# Merge results into a single table | |
role_dict = {item["Role"]: item for item in sbert_scores} | |
for item in llm_scores: | |
if item["Role"] in role_dict: | |
role_dict[item["Role"]]["LLM Score"] = item["LLM Score"] | |
else: | |
role_dict[item["Role"]] = {"Role": item["Role"], "SBERT Score": "", "LLM Score": item["LLM Score"]} | |
results = [] | |
for role, scores in role_dict.items(): | |
results.append([role, scores.get("SBERT Score", ""), scores.get("LLM Score", "")]) | |
execution_time_info = f"SBERT Execution Time: {sbert_execution_time:.4f} seconds, LLM Execution Time: {llm_execution_time:.4f} seconds" | |
return results, execution_time_info | |
# Gradio Blocks interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# HackerRank Role Classifier") | |
with gr.Column(): | |
input_text = gr.Textbox(label="Job Title") | |
classify_button = gr.Button("Classify") | |
output_table = gr.Dataframe(headers=["Role", "SBERT Score", "LLM Score"], label="Role Scores") | |
execution_time_text = gr.Textbox(label="Execution Time", interactive=False) | |
classify_button.click(fn=classify_role, inputs=input_text, outputs=[output_table, execution_time_text]) | |
gr.close_all() | |
demo.launch() | |