syberWolf commited on
Commit
b47e2d8
·
1 Parent(s): 5d540d6
Files changed (2) hide show
  1. handler.py +5 -7
  2. requirements.txt +0 -4
handler.py CHANGED
@@ -1,4 +1,3 @@
1
- import torch
2
  from typing import Dict, List, Any
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
@@ -6,13 +5,12 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
  class EndpointHandler:
7
  def __init__(self, path=""):
8
  # load the model
9
- tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")
10
  model = AutoModelForCausalLM.from_pretrained(
11
- "microsoft/Phi-3-mini-128k-instruct",
12
- device_map="cuda",
13
  torch_dtype="auto",
14
- trust_remote_code=True,
15
- )
16
  # create inference pipeline
17
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
18
 
@@ -26,4 +24,4 @@ class EndpointHandler:
26
  else:
27
  prediction = self.pipeline(inputs)
28
  # postprocess the prediction
29
- return prediction
 
 
1
  from typing import Dict, List, Any
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
 
 
5
  class EndpointHandler:
6
  def __init__(self, path=""):
7
  # load the model
8
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
9
  model = AutoModelForCausalLM.from_pretrained(
10
+ "Qwen/Qwen2-1.5B-Instruct",
 
11
  torch_dtype="auto",
12
+ device_map="auto"
13
+ )
14
  # create inference pipeline
15
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
16
 
 
24
  else:
25
  prediction = self.pipeline(inputs)
26
  # postprocess the prediction
27
+ return prediction
requirements.txt DELETED
@@ -1,4 +0,0 @@
1
- flash_attn==2.5.8
2
- torch==2.3.1
3
- accelerate==0.31.0
4
- transformers==4.41.2