syberWolf commited on
Commit
e126c73
·
1 Parent(s): 52afa01

update endpoint

Browse files
Files changed (1) hide show
  1. handler.py +4 -6
handler.py CHANGED
@@ -4,18 +4,16 @@ import torch
4
 
5
  class EndpointHandler:
6
  def __init__(self, path=""):
7
- device = 0 if torch.cuda.is_available() else -1 # 0 for GPU, -1 for CPU
8
-
9
  # Load the model
10
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
11
  model = AutoModelForCausalLM.from_pretrained(
12
  "Qwen/Qwen2-1.5B-Instruct",
13
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
14
- device_map="cuda" # for single instance one GPU
15
  )
16
-
17
- # Create inference pipeline with the correct device
18
- self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
19
 
20
  def __call__(self, data: Any) -> List[List[Dict[str, Any]]]:
21
  inputs = data.pop("inputs", data)
 
4
 
5
  class EndpointHandler:
6
  def __init__(self, path=""):
 
 
7
  # Load the model
8
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
9
  model = AutoModelForCausalLM.from_pretrained(
10
  "Qwen/Qwen2-1.5B-Instruct",
11
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
12
+ device_map="cuda" if torch.cuda.is_available() else "auto" # Include device_map for correct device allocation
13
  )
14
+
15
+ # Create inference pipeline without specifying the device
16
+ self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
17
 
18
  def __call__(self, data: Any) -> List[List[Dict[str, Any]]]:
19
  inputs = data.pop("inputs", data)