# imports import json import time import gradio as gr from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification import torch import torch.nn.functional as f from roles_list import roles # Load the SBERT model and tokenizer sbert_model_name = 'sentence-transformers/all-MiniLM-L12-v2' sbert_model = AutoModel.from_pretrained(sbert_model_name) sbert_tokenizer = AutoTokenizer.from_pretrained(sbert_model_name) # Load the LLM model and tokenizer llm_model_name = 'bert-base-uncased' # Using BERT for sequence classification llm_model = AutoModelForSequenceClassification.from_pretrained(llm_model_name) llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name) embed_store = {} for role in roles: encoding = sbert_tokenizer(role, # the texts to be tokenized max_length=10, padding="max_length", return_tensors='pt' # return the tensors (not lists) ) with torch.no_grad(): # get the model embeddings embed = sbert_model(**encoding) embed = embed.pooler_output embed_store[role] = f.normalize(embed, p=2, dim=1) print("SBERT model is ready for inference") def get_role_from_sbert(title): start_time = time.time() encoding = sbert_tokenizer(title, max_length=10, padding="max_length", return_tensors='pt' ) # Run the model prediction on the input data with torch.no_grad(): # get the model embeddings embed = sbert_model(**encoding) embed = embed.pooler_output store_cos = {} for role in embed_store: cos_sim = torch.nn.functional.cosine_similarity(f.normalize(embed, p=2, dim=1), embed_store[role]) store_cos[role] = round(cos_sim.item(), 3) # Get the top 3 items with the highest cosine similarity top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True) job_scores = [{"Role": job, "SBERT Score": score} for job, score in top_3_keys_values] end_time = time.time() execution_time = end_time - start_time return job_scores, execution_time def get_role_from_llm(title): start_time = time.time() llm_scores = [] for role in roles: inputs = llm_tokenizer.encode_plus(title, role, return_tensors='pt', max_length=512, truncation=True) with torch.no_grad(): outputs = llm_model(**inputs) score = torch.softmax(outputs.logits, dim=1)[0][1].item() llm_scores.append({"Role": role, "LLM Score": round(score, 3)}) end_time = time.time() execution_time = end_time - start_time return llm_scores, execution_time def classify_role(title): sbert_scores, sbert_execution_time = get_role_from_sbert(title) llm_scores, llm_execution_time = get_role_from_llm(title) # Merge results into a single table role_dict = {item["Role"]: item for item in sbert_scores} for item in llm_scores: if item["Role"] in role_dict: role_dict[item["Role"]]["LLM Score"] = item["LLM Score"] else: role_dict[item["Role"]] = {"Role": item["Role"], "SBERT Score": "", "LLM Score": item["LLM Score"]} results = [] for role, scores in role_dict.items(): results.append([role, scores.get("SBERT Score", ""), scores.get("LLM Score", "")]) execution_time_info = f"SBERT Execution Time: {sbert_execution_time:.4f} seconds, LLM Execution Time: {llm_execution_time:.4f} seconds" return results, execution_time_info # Gradio Blocks interface with gr.Blocks() as demo: gr.Markdown("# HackerRank Role Classifier") with gr.Column(): input_text = gr.Textbox(label="Job Title") classify_button = gr.Button("Classify") output_table = gr.Dataframe(headers=["Role", "SBERT Score", "LLM Score"], label="Role Scores") execution_time_text = gr.Textbox(label="Execution Time", interactive=False) classify_button.click(fn=classify_role, inputs=input_text, outputs=[output_table, execution_time_text]) gr.close_all() demo.launch()