File size: 1,052 Bytes

1aeb34c
c1cb360
 
 
fe371ad
 
1aeb34c
c1cb360
5d540d6
c1cb360
 
7e91a22
 
 
 
c1cb360
 
b3aebd1
c1cb360
1aeb34c
 
b3aebd1
1aeb34c
 
c1cb360
b3aebd1
c1cb360
1aeb34c
c1cb360

import torch
from typing import Dict, List, Any
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


class EndpointHandler:
    def __init__(self, path=""):
        # load the model
        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")
        model = AutoModelForCausalLM.from_pretrained(
            "microsoft/Phi-3-mini-128k-instruct",
            device_map="cuda",
            torch_dtype="auto",
            trust_remote_code=True,
        ) 
        # create inference pipeline
        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        inputs = data.pop("inputs", data)
        parameters = data.pop("parameters", None)

        # pass inputs with all kwargs in data
        if parameters is not None:
            prediction = self.pipeline(inputs, **parameters)
        else:
            prediction = self.pipeline(inputs)
        # postprocess the prediction
        return prediction