File size: 982 Bytes

c1cb360
 
 
fe371ad
 
1aeb34c
c1cb360
b47e2d8
c1cb360
b47e2d8
7e91a22
b47e2d8
 
c1cb360
 
b3aebd1
c1cb360
1aeb34c
 
b3aebd1
1aeb34c
 
c1cb360
b3aebd1
c1cb360
1aeb34c
b47e2d8

from typing import Dict, List, Any
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


class EndpointHandler:
    def __init__(self, path=""):
        # load the model
        tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
        model = AutoModelForCausalLM.from_pretrained(
            "Qwen/Qwen2-1.5B-Instruct",
            torch_dtype="auto",
            device_map="auto"
        )
        # create inference pipeline
        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        inputs = data.pop("inputs", data)
        parameters = data.pop("parameters", None)

        # pass inputs with all kwargs in data
        if parameters is not None:
            prediction = self.pipeline(inputs, **parameters)
        else:
            prediction = self.pipeline(inputs)
        # postprocess the prediction
        return prediction