david-thrower commited on
Commit
6dbc1cb
·
verified ·
1 Parent(s): 57fde96

Update app.py

Browse files

Added inference endpoint.

Files changed (1) hide show
  1. app.py +45 -10
app.py CHANGED
@@ -1,8 +1,10 @@
1
  from time import sleep
 
2
 
3
  import gradio as gr
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import spaces
 
6
  import torch
7
  from duckduckgo_search import DDGS
8
  import re
@@ -15,6 +17,8 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
  model.to(device)
17
 
 
 
18
  class DDGSSearchClient:
19
  def __init__(self, max_retries=4, timeout=35, backoff_factor=1):
20
  """
@@ -323,10 +327,40 @@ class Applicant:
323
  def __init__(self, resume):
324
  self.resume = resume
325
 
326
- @spaces.GPU
327
- def write(inputs, max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1):
328
- _output = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1)
329
- return _output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
  def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
332
  prompt = f"""<|im_start|>system
@@ -335,12 +369,13 @@ def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
335
  {job_description}<|im_end|>
336
  <|im_start|>assistant
337
  """
338
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
339
- output = write(inputs, max_new_tokens=max_new_tokens)
340
- response = tokenizer.decode(output[0], skip_special_tokens=False)
341
- start_idx = response.find("<|im_start|>assistant")
342
- end_idx = response.find("<|im_end|>", start_idx)
343
- response = response[start_idx + len("<|im_start|>assistant\n"):end_idx].strip()
 
344
  return response
345
 
346
  def process_job_description(company_name, company_url, job_description, resume):
 
1
  from time import sleep
2
+ from os import getenv
3
 
4
  import gradio as gr
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  import spaces
7
+ from openai import OpenAI
8
  import torch
9
  from duckduckgo_search import DDGS
10
  import re
 
17
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
  model.to(device)
19
 
20
+
21
+
22
  class DDGSSearchClient:
23
  def __init__(self, max_retries=4, timeout=35, backoff_factor=1):
24
  """
 
327
  def __init__(self, resume):
328
  self.resume = resume
329
 
330
+ # @spaces.GPU
331
+ # def write(inputs, max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1):
332
+ # _output = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1)
333
+ # return _output
334
+
335
+ def writing_task(prompt: str) -> str:
336
+ api_key = getenv("HF_TOKEN")
337
+ if not api_key:
338
+ raise ValueError("Huggingface token missing. Need to set HF_TOKEN, refer to https://discuss.huggingface.co/t/how-to-manage-user-secrets-and-api-keys/67948")
339
+ client = OpenAI(
340
+ base_url="https://router.huggingface.co/v1",
341
+ api_key = getenv("HF_TOKEN")
342
+ )
343
+
344
+ completion = client.chat.completions.create(
345
+ model="HuggingFaceTB/SmolLM3-3B:hf-inference",
346
+ messages=[
347
+ {
348
+ "role": "user",
349
+ "content": prompt
350
+ }
351
+ ],
352
+ )
353
+
354
+ raw_response_content = completion.choices[0].message.content
355
+ content_split = raw_response_content.split("</think>")
356
+ if len(content_split) > 1:
357
+ think = content_split[0]
358
+ content = "".join(content_split[1:])
359
+ else:
360
+ think = content_split[0]
361
+ content = "No data found."
362
+
363
+ return content
364
 
365
  def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
366
  prompt = f"""<|im_start|>system
 
369
  {job_description}<|im_end|>
370
  <|im_start|>assistant
371
  """
372
+ # inputs = tokenizer(prompt, return_tensors="pt").to(device)
373
+ # output = write(inputs, max_new_tokens=max_new_tokens)
374
+ # response = tokenizer.decode(output[0], skip_special_tokens=False)
375
+ # start_idx = response.find("<|im_start|>assistant")
376
+ # end_idx = response.find("<|im_end|>", start_idx)
377
+ # response = response[start_idx + len("<|im_start|>assistant\n"):end_idx].strip()
378
+ response = writing_task(prompt)
379
  return response
380
 
381
  def process_job_description(company_name, company_url, job_description, resume):