Spaces:
Sleeping
Sleeping
# imports | |
import json | |
import time | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModel | |
import openai | |
# pytorch library | |
import torch | |
import torch.nn.functional as f | |
from fuzzywuzzy import process | |
from roles_list import roles | |
from openai import OpenAI | |
# Load the model from the specified directory | |
embed_store = {} | |
model = 'sentence-transformers/all-MiniLM-L12-v2' | |
sbert_model = AutoModel.from_pretrained(model) | |
sbert_tokenizer = AutoTokenizer.from_pretrained(model) | |
client = OpenAI( | |
# defaults to os.environ.get("OPENAI_API_KEY") | |
api_key="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", | |
) | |
for role in roles: | |
encoding = sbert_tokenizer(role, # the texts to be tokenized | |
max_length=10, | |
padding="max_length", | |
return_tensors='pt' # return the tensors (not lists) | |
) | |
with torch.no_grad(): | |
# get the model embeddings | |
embed = sbert_model(**encoding) | |
embed = embed.pooler_output | |
embed_store[role] = f.normalize(embed, p=2, dim=1) | |
print("Model is ready for inference") | |
def get_role_from_sbert(title): | |
start_time = time.time() | |
encoding = sbert_tokenizer(title, | |
max_length=10, | |
padding="max_length", | |
return_tensors='pt' | |
) | |
# Run the model prediction on the input data | |
with torch.no_grad(): | |
# get the model embeddings | |
embed = sbert_model(**encoding) | |
embed = embed.pooler_output | |
store_cos = {} | |
for role in embed_store: | |
cos_sim = torch.nn.functional.cosine_similarity(f.normalize(embed, p=2, dim=1), embed_store[role]) | |
store_cos[role] = round(cos_sim.item(), 3) | |
# Get the top 3 items with the highest cosine similarity | |
top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True) | |
job_scores_str = '\n'.join([f"{job}: {score}" for job, score in top_3_keys_values]) | |
end_time = time.time() | |
execution_time = end_time - start_time | |
# Convert to dictionary if needed or keep as list of tuples | |
return job_scores_str + f" \nExecution time: {str(execution_time)}" | |
def fuzzy_match(title): | |
""" | |
Find the best matches for a query from a list of choices using fuzzy matching. | |
Parameters: | |
- query: The search string. | |
- choices: A list of strings to search through. | |
- limit: The maximum number of matches to return. | |
Returns: | |
A list of tuples with the match and its score. Higher score means closer match. | |
""" | |
matches = process.extract(title, roles, limit=3) | |
return matches | |
def fuzzy_match_sbert(title): | |
matches = fuzzy_match(title) | |
sbert_results = get_role_from_sbert(title) | |
new_list = [matches, sbert_results] | |
return new_list | |
demo = gr.Interface(fn=get_role_from_sbert, | |
inputs=gr.Textbox(label="Job Title"), | |
outputs=gr.Textbox(label="Role"), | |
title="HackerRank Role Classifier") | |
gr.close_all() | |
demo.launch(server_name='0.0.0.0', server_port=8081, share=True) |