Zhang-Yang-Sustech

Migrating the EfficientSAM model to the OpenCV model zoo (#258)

0bf46d3 about 1 year ago

2.66 kB

	import numpy as np
	import cv2 as cv

	class EfficientSAM:
	def __init__(self, modelPath, backendId=0, targetId=0):
	self._modelPath = modelPath
	self._backendId = backendId
	self._targetId = targetId

	self._model = cv.dnn.readNet(self._modelPath)
	self._model.setPreferableBackend(self._backendId)
	self._model.setPreferableTarget(self._targetId)
	# 3 inputs
	self._inputNames = ["batched_images", "batched_point_coords", "batched_point_labels"]

	self._outputNames = ['output_masks'] # actual output layer name
	self._currentInputSize = None
	self._inputSize = [640, 640] # input size for the model

	@property
	def name(self):
	return self.__class__.__name__

	def setBackendAndTarget(self, backendId, targetId):
	self._backendId = backendId
	self._targetId = targetId
	self._model.setPreferableBackend(self._backendId)
	self._model.setPreferableTarget(self._targetId)

	def _preprocess(self, image, points, labels):

	image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
	# record the input image size, (width, height)
	self._currentInputSize = (image.shape[1], image.shape[0])

	image = cv.resize(image, self._inputSize)

	image = image.astype(np.float32, copy=False) / 255.0

	# convert points to (640*640) size space
	for p in points:
	p[0] = int(p[0] * self._inputSize[0]/self._currentInputSize[0])
	p[1] = int(p[1]* self._inputSize[1]/self._currentInputSize[1])

	image_blob = cv.dnn.blobFromImage(image)

	points_blob = np.array([[points]], dtype=np.float32)

	labels_blob = np.array([[[labels]]])

	return image_blob, points_blob, labels_blob

	def infer(self, image, points, labels):
	# Preprocess
	imageBlob, pointsBlob, labelsBlob = self._preprocess(image, points, labels)
	# Forward
	self._model.setInput(imageBlob, self._inputNames[0])
	self._model.setInput(pointsBlob, self._inputNames[1])
	self._model.setInput(labelsBlob, self._inputNames[2])
	outputBlob = self._model.forward()
	# Postprocess
	results = self._postprocess(outputBlob)

	return results

	def _postprocess(self, outputBlob):
	mask = outputBlob[0, 0, 0, :, :] >= 0

	mask_uint8 = (mask * 255).astype(np.uint8)
	# change to real image size
	mask_uint8 = cv.resize(mask_uint8, dsize=self._currentInputSize, interpolation=2)

	return mask_uint8