handraise-dev
/

gguf-inference

Text Generation

Model card Files Files and versions Community

syberWolf commited on Jul 4, 2024

Commit

6d33a5e

·

1 Parent(s): e126c73

changes for testin

Files changed (1) hide show

handler.py +13 -13

handler.py CHANGED Viewed

@@ -1,30 +1,30 @@
 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-import torch
 class EndpointHandler:
     def __init__(self, path=""):
-        # Load the model
         tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
         model = AutoModelForCausalLM.from_pretrained(
             "Qwen/Qwen2-1.5B-Instruct",
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-            device_map="cuda" if torch.cuda.is_available() else "auto" # Include device_map for correct device allocation
         )
-        # Create inference pipeline without specifying the device
         self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
-    def __call__(self, data: Any) -> List[List[Dict[str, Any]]]:
         inputs = data.pop("inputs", data)
-        parameters = data.pop("parameters", {})
-        if isinstance(inputs, str):
-            inputs = [inputs]
-        # Get predictions from the pipeline
-        prediction = self.pipeline(inputs, **parameters)
         return prediction
 # Example usage

 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+device = "cuda"
 class EndpointHandler:
     def __init__(self, path=""):
+        # load the model
         tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
         model = AutoModelForCausalLM.from_pretrained(
             "Qwen/Qwen2-1.5B-Instruct",
+            torch_dtype="auto",
+            device_map="auto"
         )
+        # create inference pipeline
         self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
+    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", None)
+        # pass inputs with all kwargs in data
+        if parameters is not None:
+            prediction = self.pipeline(inputs, **parameters)
+        else:
+            prediction = self.pipeline(inputs)
+        # postprocess the prediction
         return prediction
 # Example usage