Spaces:

hackerrank
/

screen-HR

Sleeping

App Files Files Community

screen-HR / app.py

uhith

sbert based roles matching

adbf293 verified over 1 year ago

raw

history blame

3.17 kB

	# imports
	import json
	import time

	import gradio as gr
	from transformers import AutoTokenizer, AutoModel
	import openai
	# pytorch library
	import torch
	import torch.nn.functional as f

	from fuzzywuzzy import process

	from roles_list import roles
	from openai import OpenAI
	# Load the model from the specified directory
	embed_store = {}
	model = 'sentence-transformers/all-MiniLM-L12-v2'
	sbert_model = AutoModel.from_pretrained(model)
	sbert_tokenizer = AutoTokenizer.from_pretrained(model)

	client = OpenAI(
	# defaults to os.environ.get("OPENAI_API_KEY")
	api_key="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",
	)


	for role in roles:
	encoding = sbert_tokenizer(role, # the texts to be tokenized
	max_length=10,
	padding="max_length",
	return_tensors='pt' # return the tensors (not lists)
	)
	with torch.no_grad():
	# get the model embeddings
	embed = sbert_model(**encoding)
	embed = embed.pooler_output
	embed_store[role] = f.normalize(embed, p=2, dim=1)
	print("Model is ready for inference")


	def get_role_from_sbert(title):
	start_time = time.time()
	encoding = sbert_tokenizer(title,
	max_length=10,
	padding="max_length",
	return_tensors='pt'
	)
	# Run the model prediction on the input data
	with torch.no_grad():
	# get the model embeddings
	embed = sbert_model(**encoding)
	embed = embed.pooler_output
	store_cos = {}
	for role in embed_store:
	cos_sim = torch.nn.functional.cosine_similarity(f.normalize(embed, p=2, dim=1), embed_store[role])
	store_cos[role] = round(cos_sim.item(), 3)
	# Get the top 3 items with the highest cosine similarity
	top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
	job_scores_str = '\n'.join([f"{job}: {score}" for job, score in top_3_keys_values])

	end_time = time.time()
	execution_time = end_time - start_time
	# Convert to dictionary if needed or keep as list of tuples
	return job_scores_str + f" \nExecution time: {str(execution_time)}"


	def fuzzy_match(title):
	"""
	Find the best matches for a query from a list of choices using fuzzy matching.

	Parameters:
	- query: The search string.
	- choices: A list of strings to search through.
	- limit: The maximum number of matches to return.

	Returns:
	A list of tuples with the match and its score. Higher score means closer match.
	"""
	matches = process.extract(title, roles, limit=3)
	return matches


	def fuzzy_match_sbert(title):
	matches = fuzzy_match(title)
	sbert_results = get_role_from_sbert(title)

	new_list = [matches, sbert_results]
	return new_list


	demo = gr.Interface(fn=get_role_from_sbert,
	inputs=gr.Textbox(label="Job Title"),
	outputs=gr.Textbox(label="Role"),
	title="HackerRank Role Classifier")


	gr.close_all()
	demo.launch(server_name='0.0.0.0', server_port=8081, share=True)