File size: 2,658 Bytes

0bf46d3

import numpy as np
import cv2 as cv

class EfficientSAM:
    def __init__(self, modelPath, backendId=0, targetId=0):
        self._modelPath = modelPath
        self._backendId = backendId
        self._targetId = targetId

        self._model = cv.dnn.readNet(self._modelPath)
        self._model.setPreferableBackend(self._backendId)
        self._model.setPreferableTarget(self._targetId)
        # 3 inputs
        self._inputNames = ["batched_images", "batched_point_coords", "batched_point_labels"]  
        
        self._outputNames = ['output_masks']  # actual output layer name
        self._currentInputSize = None
        self._inputSize = [640, 640]  # input size for the model

    @property
    def name(self):
        return self.__class__.__name__

    def setBackendAndTarget(self, backendId, targetId):
        self._backendId = backendId
        self._targetId = targetId
        self._model.setPreferableBackend(self._backendId)
        self._model.setPreferableTarget(self._targetId)

    def _preprocess(self, image, points, labels):
        
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        # record the input image size, (width, height)
        self._currentInputSize = (image.shape[1], image.shape[0])
        
        image = cv.resize(image, self._inputSize)
        
        image = image.astype(np.float32, copy=False) / 255.0
        
        # convert points to (640*640) size space
        for p in points:
            p[0] = int(p[0] * self._inputSize[0]/self._currentInputSize[0])
            p[1] = int(p[1]* self._inputSize[1]/self._currentInputSize[1])
             
        image_blob = cv.dnn.blobFromImage(image)
        
        points_blob = np.array([[points]], dtype=np.float32)
        
        labels_blob = np.array([[[labels]]])
        
        return image_blob, points_blob, labels_blob

    def infer(self, image, points, labels):
        # Preprocess
        imageBlob, pointsBlob, labelsBlob = self._preprocess(image, points, labels)
        # Forward
        self._model.setInput(imageBlob, self._inputNames[0])
        self._model.setInput(pointsBlob, self._inputNames[1])
        self._model.setInput(labelsBlob, self._inputNames[2])
        outputBlob = self._model.forward()
        # Postprocess
        results = self._postprocess(outputBlob)

        return results

    def _postprocess(self, outputBlob):
        mask = outputBlob[0, 0, 0, :, :] >= 0
        
        mask_uint8 = (mask * 255).astype(np.uint8)
        # change to real image size
        mask_uint8 = cv.resize(mask_uint8, dsize=self._currentInputSize, interpolation=2)
                
        return mask_uint8