Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse filesAdded inference endpoint.
app.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
from time import sleep
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
import spaces
|
|
|
6 |
import torch
|
7 |
from duckduckgo_search import DDGS
|
8 |
import re
|
@@ -15,6 +17,8 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
15 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
16 |
model.to(device)
|
17 |
|
|
|
|
|
18 |
class DDGSSearchClient:
|
19 |
def __init__(self, max_retries=4, timeout=35, backoff_factor=1):
|
20 |
"""
|
@@ -323,10 +327,40 @@ class Applicant:
|
|
323 |
def __init__(self, resume):
|
324 |
self.resume = resume
|
325 |
|
326 |
-
@spaces.GPU
|
327 |
-
def write(inputs, max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1):
|
328 |
-
|
329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
|
331 |
def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
|
332 |
prompt = f"""<|im_start|>system
|
@@ -335,12 +369,13 @@ def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
|
|
335 |
{job_description}<|im_end|>
|
336 |
<|im_start|>assistant
|
337 |
"""
|
338 |
-
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
339 |
-
output = write(inputs, max_new_tokens=max_new_tokens)
|
340 |
-
response = tokenizer.decode(output[0], skip_special_tokens=False)
|
341 |
-
start_idx = response.find("<|im_start|>assistant")
|
342 |
-
end_idx = response.find("<|im_end|>", start_idx)
|
343 |
-
response = response[start_idx + len("<|im_start|>assistant\n"):end_idx].strip()
|
|
|
344 |
return response
|
345 |
|
346 |
def process_job_description(company_name, company_url, job_description, resume):
|
|
|
1 |
from time import sleep
|
2 |
+
from os import getenv
|
3 |
|
4 |
import gradio as gr
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
6 |
import spaces
|
7 |
+
from openai import OpenAI
|
8 |
import torch
|
9 |
from duckduckgo_search import DDGS
|
10 |
import re
|
|
|
17 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
18 |
model.to(device)
|
19 |
|
20 |
+
|
21 |
+
|
22 |
class DDGSSearchClient:
|
23 |
def __init__(self, max_retries=4, timeout=35, backoff_factor=1):
|
24 |
"""
|
|
|
327 |
def __init__(self, resume):
|
328 |
self.resume = resume
|
329 |
|
330 |
+
# @spaces.GPU
|
331 |
+
# def write(inputs, max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1):
|
332 |
+
# _output = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1)
|
333 |
+
# return _output
|
334 |
+
|
335 |
+
def writing_task(prompt: str) -> str:
|
336 |
+
api_key = getenv("HF_TOKEN")
|
337 |
+
if not api_key:
|
338 |
+
raise ValueError("Huggingface token missing. Need to set HF_TOKEN, refer to https://discuss.huggingface.co/t/how-to-manage-user-secrets-and-api-keys/67948")
|
339 |
+
client = OpenAI(
|
340 |
+
base_url="https://router.huggingface.co/v1",
|
341 |
+
api_key = getenv("HF_TOKEN")
|
342 |
+
)
|
343 |
+
|
344 |
+
completion = client.chat.completions.create(
|
345 |
+
model="HuggingFaceTB/SmolLM3-3B:hf-inference",
|
346 |
+
messages=[
|
347 |
+
{
|
348 |
+
"role": "user",
|
349 |
+
"content": prompt
|
350 |
+
}
|
351 |
+
],
|
352 |
+
)
|
353 |
+
|
354 |
+
raw_response_content = completion.choices[0].message.content
|
355 |
+
content_split = raw_response_content.split("</think>")
|
356 |
+
if len(content_split) > 1:
|
357 |
+
think = content_split[0]
|
358 |
+
content = "".join(content_split[1:])
|
359 |
+
else:
|
360 |
+
think = content_split[0]
|
361 |
+
content = "No data found."
|
362 |
+
|
363 |
+
return content
|
364 |
|
365 |
def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
|
366 |
prompt = f"""<|im_start|>system
|
|
|
369 |
{job_description}<|im_end|>
|
370 |
<|im_start|>assistant
|
371 |
"""
|
372 |
+
# inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
373 |
+
# output = write(inputs, max_new_tokens=max_new_tokens)
|
374 |
+
# response = tokenizer.decode(output[0], skip_special_tokens=False)
|
375 |
+
# start_idx = response.find("<|im_start|>assistant")
|
376 |
+
# end_idx = response.find("<|im_end|>", start_idx)
|
377 |
+
# response = response[start_idx + len("<|im_start|>assistant\n"):end_idx].strip()
|
378 |
+
response = writing_task(prompt)
|
379 |
return response
|
380 |
|
381 |
def process_job_description(company_name, company_url, job_description, resume):
|