|
import numpy as np |
|
import cv2 as cv |
|
|
|
class EfficientSAM: |
|
def __init__(self, modelPath, backendId=0, targetId=0): |
|
self._modelPath = modelPath |
|
self._backendId = backendId |
|
self._targetId = targetId |
|
|
|
self._model = cv.dnn.readNet(self._modelPath) |
|
self._model.setPreferableBackend(self._backendId) |
|
self._model.setPreferableTarget(self._targetId) |
|
|
|
self._inputNames = ["batched_images", "batched_point_coords", "batched_point_labels"] |
|
|
|
self._outputNames = ['output_masks'] |
|
self._currentInputSize = None |
|
self._inputSize = [640, 640] |
|
|
|
@property |
|
def name(self): |
|
return self.__class__.__name__ |
|
|
|
def setBackendAndTarget(self, backendId, targetId): |
|
self._backendId = backendId |
|
self._targetId = targetId |
|
self._model.setPreferableBackend(self._backendId) |
|
self._model.setPreferableTarget(self._targetId) |
|
|
|
def _preprocess(self, image, points, labels): |
|
|
|
image = cv.cvtColor(image, cv.COLOR_BGR2RGB) |
|
|
|
self._currentInputSize = (image.shape[1], image.shape[0]) |
|
|
|
image = cv.resize(image, self._inputSize) |
|
|
|
image = image.astype(np.float32, copy=False) / 255.0 |
|
|
|
|
|
for p in points: |
|
p[0] = int(p[0] * self._inputSize[0]/self._currentInputSize[0]) |
|
p[1] = int(p[1]* self._inputSize[1]/self._currentInputSize[1]) |
|
|
|
image_blob = cv.dnn.blobFromImage(image) |
|
|
|
points_blob = np.array([[points]], dtype=np.float32) |
|
|
|
labels_blob = np.array([[[labels]]]) |
|
|
|
return image_blob, points_blob, labels_blob |
|
|
|
def infer(self, image, points, labels): |
|
|
|
imageBlob, pointsBlob, labelsBlob = self._preprocess(image, points, labels) |
|
|
|
self._model.setInput(imageBlob, self._inputNames[0]) |
|
self._model.setInput(pointsBlob, self._inputNames[1]) |
|
self._model.setInput(labelsBlob, self._inputNames[2]) |
|
outputBlob = self._model.forward() |
|
|
|
results = self._postprocess(outputBlob) |
|
|
|
return results |
|
|
|
def _postprocess(self, outputBlob): |
|
mask = outputBlob[0, 0, 0, :, :] >= 0 |
|
|
|
mask_uint8 = (mask * 255).astype(np.uint8) |
|
|
|
mask_uint8 = cv.resize(mask_uint8, dsize=self._currentInputSize, interpolation=2) |
|
|
|
return mask_uint8 |
|
|