Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
2ea5df0
1
Parent(s):
91ff4e5
Added company research and research summarization steps.
Browse files- test-job-app.py +76 -7
test-job-app.py
CHANGED
@@ -1,18 +1,28 @@
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
-
from smolagents import ToolCallingAgent
|
4 |
import torch
|
|
|
5 |
|
6 |
# Load the SmolLM model and tokenizer
|
7 |
-
# model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
|
8 |
model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
|
9 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
11 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
12 |
model.to(device)
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
prompt = f"""<|im_start|>system
|
17 |
{system_prompt}<|im_end|>
|
18 |
<|im_start|>user
|
@@ -20,15 +30,20 @@ def smol_lm_jd_process(job_description, system_prompt):
|
|
20 |
<|im_start|>assistant
|
21 |
"""
|
22 |
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
23 |
-
output = model.generate(**inputs, max_new_tokens=
|
24 |
response = tokenizer.decode(output[0], skip_special_tokens=False)
|
25 |
-
# Extract the assistant's response
|
26 |
start_idx = response.find("<|im_start|>assistant")
|
27 |
end_idx = response.find("<|im_end|>", start_idx)
|
28 |
response = response[start_idx + len("<|im_start|>assistant\n"):end_idx].strip()
|
29 |
return response
|
30 |
|
|
|
|
|
|
|
|
|
|
|
31 |
def process_job_description(company_name, company_url, job_description):
|
|
|
32 |
# Step 2: Extract key qualifications, skills, and requirements
|
33 |
system_prompt_requirements = "Extract key qualifications, skills, and requirements from this job description. Output as bullet points. Remove benefits/salary, bragging about the company, and other fluff not relevant to the skills, qualifications, and job requirements. ONLY INCLUDE INFORMATION THAT TELLS THE USER WHAT SKILLS THE EMPLOYER SEEKS."
|
34 |
role_requirements = smol_lm_jd_process(job_description, system_prompt_requirements)
|
@@ -37,12 +52,65 @@ def process_job_description(company_name, company_url, job_description):
|
|
37 |
system_prompt_summary = "Create a concise 150-200 word summary of this job description. Remove company bragging bragging about the company, and other fluff not relevant to the position and what is desired from the candidate. FOCUS ON ASPECTS THAT POINT THE USER IN WHAT THE EMPLOYER WANTS FROM A CANDIDATE IN TERMS OF SKILLS, ACCOMPLISHMENTS, AND SUCH"
|
38 |
clean_job_description = smol_lm_jd_process(job_description, system_prompt_summary)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
return {
|
41 |
"Company Name": company_name,
|
42 |
"Company URL": company_url,
|
|
|
43 |
"Original Job Description": job_description,
|
44 |
"Role Requirements": role_requirements,
|
45 |
-
"Clean Job Description": clean_job_description
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
}
|
47 |
|
48 |
# Create the Gradio app
|
@@ -62,3 +130,4 @@ with demo:
|
|
62 |
|
63 |
if __name__ == "__main__":
|
64 |
demo.launch()
|
|
|
|
1 |
+
|
2 |
+
from time import sleep
|
3 |
+
|
4 |
import gradio as gr
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
6 |
+
# from smolagents import ToolCallingAgent
|
7 |
import torch
|
8 |
+
from duckduckgo_search import DDGS
|
9 |
|
10 |
# Load the SmolLM model and tokenizer
|
|
|
11 |
model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
|
12 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
13 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
14 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
15 |
model.to(device)
|
16 |
|
17 |
+
class Role:
|
18 |
+
def __init__(self, company_name, company_url, job_description, clean_job_description, job_title):
|
19 |
+
self.company_name = company_name
|
20 |
+
self.company_url = company_url
|
21 |
+
self.job_description = job_description
|
22 |
+
self.clean_job_description = clean_job_description
|
23 |
+
self.job_title = job_title
|
24 |
+
|
25 |
+
def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
|
26 |
prompt = f"""<|im_start|>system
|
27 |
{system_prompt}<|im_end|>
|
28 |
<|im_start|>user
|
|
|
30 |
<|im_start|>assistant
|
31 |
"""
|
32 |
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
33 |
+
output = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1)
|
34 |
response = tokenizer.decode(output[0], skip_special_tokens=False)
|
|
|
35 |
start_idx = response.find("<|im_start|>assistant")
|
36 |
end_idx = response.find("<|im_end|>", start_idx)
|
37 |
response = response[start_idx + len("<|im_start|>assistant\n"):end_idx].strip()
|
38 |
return response
|
39 |
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
return smol_lm_jd_process(job_description, system_prompt_get_job_title)
|
44 |
+
|
45 |
def process_job_description(company_name, company_url, job_description):
|
46 |
+
|
47 |
# Step 2: Extract key qualifications, skills, and requirements
|
48 |
system_prompt_requirements = "Extract key qualifications, skills, and requirements from this job description. Output as bullet points. Remove benefits/salary, bragging about the company, and other fluff not relevant to the skills, qualifications, and job requirements. ONLY INCLUDE INFORMATION THAT TELLS THE USER WHAT SKILLS THE EMPLOYER SEEKS."
|
49 |
role_requirements = smol_lm_jd_process(job_description, system_prompt_requirements)
|
|
|
52 |
system_prompt_summary = "Create a concise 150-200 word summary of this job description. Remove company bragging bragging about the company, and other fluff not relevant to the position and what is desired from the candidate. FOCUS ON ASPECTS THAT POINT THE USER IN WHAT THE EMPLOYER WANTS FROM A CANDIDATE IN TERMS OF SKILLS, ACCOMPLISHMENTS, AND SUCH"
|
53 |
clean_job_description = smol_lm_jd_process(job_description, system_prompt_summary)
|
54 |
|
55 |
+
system_prompt_get_job_title = "Extract only the job title from the following job description. Respond with nothing but the job title—no labels, no comments, no summaries, no locations, or extra text. If the title is unusually long or nonstandard, replace it with the most common, concise, and widely recognized job title for the role. Your answer must be 7 words or fewer, with no punctuation, newlines, or additional information. Acceptable examples may look like: 'Systems Analyst', 'marketing director', 'patient advocate III', ..."
|
56 |
+
job_title = smol_lm_jd_process(job_description, system_prompt_get_job_title, max_new_tokens=150)[:50].lower().replace("job","").replace("title","").replace("\n","").replace(":","")
|
57 |
+
|
58 |
+
role = Role(company_name, company_url, job_description, clean_job_description, job_title)
|
59 |
+
|
60 |
+
# Step 4: Company Research
|
61 |
+
searches = {
|
62 |
+
"company_values": f"{role.company_name} company values",
|
63 |
+
"corporate_culture": f"{role.company_name} corporate culture",
|
64 |
+
"leadership_team": f"{role.company_name} leadership team members relevant to {role.job_title} role",
|
65 |
+
"recent_news": f"{role.company_name} recent news relevant to {role.job_title} role",
|
66 |
+
"competitive_advantages": f"{role.company_name} competitive advantages in {role.job_title} market"
|
67 |
+
}
|
68 |
+
search_client = DDGS()
|
69 |
+
search_results = {}
|
70 |
+
for key, query in searches.items():
|
71 |
+
print(f"searching {query}")
|
72 |
+
try:
|
73 |
+
results = search_client.text(query,
|
74 |
+
max_results=3)
|
75 |
+
print(f"searching {query} successful")
|
76 |
+
search_results[key] = results
|
77 |
+
except Exception as exc:
|
78 |
+
print(f"Rate limit error, will wait and retry searching {query}.")
|
79 |
+
sleep(5)
|
80 |
+
results = search_client.text(query,
|
81 |
+
max_results=3)
|
82 |
+
print(f"searching {query} successful")
|
83 |
+
search_results[key] = results
|
84 |
+
sleep(3)
|
85 |
+
|
86 |
+
|
87 |
+
# Summarize search results using SmolLM
|
88 |
+
summaries = {}
|
89 |
+
system_prompt_summary_search = "Summarize the following search results in 150 tokens or less."
|
90 |
+
for key, results in search_results.items():
|
91 |
+
search_result_text = "\n".join([result['body'] for result in results])
|
92 |
+
summary = smol_lm_jd_process(search_result_text, system_prompt_summary_search, max_new_tokens=150)
|
93 |
+
summaries[key] = summary
|
94 |
+
|
95 |
return {
|
96 |
"Company Name": company_name,
|
97 |
"Company URL": company_url,
|
98 |
+
"job_title": job_title,
|
99 |
"Original Job Description": job_description,
|
100 |
"Role Requirements": role_requirements,
|
101 |
+
"Clean Job Description": clean_job_description,
|
102 |
+
"Company Research": {
|
103 |
+
"Company Values Search Results": search_results['company_values'],
|
104 |
+
"Company Values Summary": summaries['company_values'],
|
105 |
+
"Corporate Culture Search Results": search_results['corporate_culture'],
|
106 |
+
"Corporate Culture Summary": summaries['corporate_culture'],
|
107 |
+
"Leadership Team Search Results": search_results['leadership_team'],
|
108 |
+
"Leadership Team Summary": summaries['leadership_team'],
|
109 |
+
"Recent News Search Results": search_results['recent_news'],
|
110 |
+
"Recent News Summary": summaries['recent_news'],
|
111 |
+
"Competitive Advantages Search Results": search_results['competitive_advantages'],
|
112 |
+
"Competitive Advantages Summary": summaries['competitive_advantages'],
|
113 |
+
}
|
114 |
}
|
115 |
|
116 |
# Create the Gradio app
|
|
|
130 |
|
131 |
if __name__ == "__main__":
|
132 |
demo.launch()
|
133 |
+
|