File size: 1,135 Bytes

1aeb34c
c1cb360
 
 
fe371ad
 
1aeb34c
c1cb360
 
 
 
 
 
 
 
b3aebd1
c1cb360
1aeb34c
f96aa72
 
 
 
 
1aeb34c
b3aebd1
1aeb34c
 
c1cb360
b3aebd1
c1cb360
1aeb34c
c1cb360

import torch
from typing import Dict, List, Any
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


class EndpointHandler:
    def __init__(self, path=""):
        # load the model
        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            "microsoft/Phi-3-mini-128k-instruct",
            trust_remote_code=True
        )
        # create inference pipeline
        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        inputs = data.pop("inputs", data)

        for key in ['stop_sequences', 'watermark', 'stop']:
            if key in inputs:
                del inputs[key]

        parameters = data.pop("parameters", None)

        # pass inputs with all kwargs in data
        if parameters is not None:
            prediction = self.pipeline(inputs, **parameters)
        else:
            prediction = self.pipeline(inputs)
        # postprocess the prediction
        return prediction