screen-HR / app.py
Rafik Matta
adding GPT2 as an option
4fa2adf
raw
history blame
4.23 kB
# imports
import json
import time
import gradio as gr
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
import torch
import torch.nn.functional as f
from roles_list import roles
# Load the SBERT model and tokenizer
sbert_model_name = 'sentence-transformers/all-MiniLM-L12-v2'
sbert_model = AutoModel.from_pretrained(sbert_model_name)
sbert_tokenizer = AutoTokenizer.from_pretrained(sbert_model_name)
# Load the LLM model and tokenizer
llm_model_name = 'bert-base-uncased' # Using BERT for sequence classification
llm_model = AutoModelForSequenceClassification.from_pretrained(llm_model_name)
llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
embed_store = {}
for role in roles:
encoding = sbert_tokenizer(role, # the texts to be tokenized
max_length=10,
padding="max_length",
return_tensors='pt' # return the tensors (not lists)
)
with torch.no_grad():
# get the model embeddings
embed = sbert_model(**encoding)
embed = embed.pooler_output
embed_store[role] = f.normalize(embed, p=2, dim=1)
print("SBERT model is ready for inference")
def get_role_from_sbert(title):
start_time = time.time()
encoding = sbert_tokenizer(title,
max_length=10,
padding="max_length",
return_tensors='pt'
)
# Run the model prediction on the input data
with torch.no_grad():
# get the model embeddings
embed = sbert_model(**encoding)
embed = embed.pooler_output
store_cos = {}
for role in embed_store:
cos_sim = torch.nn.functional.cosine_similarity(f.normalize(embed, p=2, dim=1), embed_store[role])
store_cos[role] = round(cos_sim.item(), 3)
# Get the top 3 items with the highest cosine similarity
top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
job_scores = [{"Role": job, "SBERT Score": score} for job, score in top_3_keys_values]
end_time = time.time()
execution_time = end_time - start_time
return job_scores, execution_time
def get_role_from_llm(title):
start_time = time.time()
llm_scores = []
for role in roles:
inputs = llm_tokenizer.encode_plus(title, role, return_tensors='pt', max_length=512, truncation=True)
with torch.no_grad():
outputs = llm_model(**inputs)
score = torch.softmax(outputs.logits, dim=1)[0][1].item()
llm_scores.append({"Role": role, "LLM Score": round(score, 3)})
end_time = time.time()
execution_time = end_time - start_time
return llm_scores, execution_time
def classify_role(title):
sbert_scores, sbert_execution_time = get_role_from_sbert(title)
llm_scores, llm_execution_time = get_role_from_llm(title)
# Merge results into a single table
role_dict = {item["Role"]: item for item in sbert_scores}
for item in llm_scores:
if item["Role"] in role_dict:
role_dict[item["Role"]]["LLM Score"] = item["LLM Score"]
else:
role_dict[item["Role"]] = {"Role": item["Role"], "SBERT Score": "", "LLM Score": item["LLM Score"]}
results = []
for role, scores in role_dict.items():
results.append([role, scores.get("SBERT Score", ""), scores.get("LLM Score", "")])
execution_time_info = f"SBERT Execution Time: {sbert_execution_time:.4f} seconds, LLM Execution Time: {llm_execution_time:.4f} seconds"
return results, execution_time_info
# Gradio Blocks interface
with gr.Blocks() as demo:
gr.Markdown("# HackerRank Role Classifier")
with gr.Column():
input_text = gr.Textbox(label="Job Title")
classify_button = gr.Button("Classify")
output_table = gr.Dataframe(headers=["Role", "SBERT Score", "LLM Score"], label="Role Scores")
execution_time_text = gr.Textbox(label="Execution Time", interactive=False)
classify_button.click(fn=classify_role, inputs=input_text, outputs=[output_table, execution_time_text])
gr.close_all()
demo.launch()