Add options for demo scripts to select backend & targets (#43)

Browse files

* add options for selecting backend & targets

* add eol

Files changed (13) hide show

models/face_detection_yunet/demo.py +19 -2
models/face_detection_yunet/yunet.py +2 -1
models/face_recognition_sface/demo.py +20 -3
models/human_segmentation_pphumanseg/demo.py +17 -2
models/human_segmentation_pphumanseg/pphumanseg.py +12 -4
models/image_classification_ppresnet/demo.py +16 -1
models/image_classification_ppresnet/ppresnet.py +13 -5
models/person_reid_youtureid/demo.py +17 -2
models/person_reid_youtureid/youtureid.py +14 -4
models/text_detection_db/db.py +12 -4
models/text_detection_db/demo.py +18 -1
models/text_recognition_crnn/crnn.py +15 -5
models/text_recognition_crnn/demo.py +19 -2

models/face_detection_yunet/demo.py CHANGED Viewed

@@ -19,9 +19,23 @@ def str2bool(v):
     else:
         raise NotImplementedError
 parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
 parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
 parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the model.')
 parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')
 parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
 parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
@@ -61,7 +75,9 @@ if __name__ == '__main__':
                   inputSize=[320, 320],
                   confThreshold=args.conf_threshold,
                   nmsThreshold=args.nms_threshold,
-                  topK=args.top_k)
     # If input is an image
     if args.input is not None:
@@ -117,4 +133,5 @@ if __name__ == '__main__':
             # Visualize results in a new Window
             cv.imshow('YuNet Demo', frame)
-            tm.reset()

     else:
         raise NotImplementedError
+backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
+targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+try:
+    backends += [cv.dnn.DNN_BACKEND_TIMVX]
+    targets += [cv.dnn.DNN_TARGET_NPU]
+    help_msg_backends += "; {:d}: TIMVX"
+    help_msg_targets += "; {:d}: NPU"
+except:
+    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
 parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
 parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
 parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the model.')
+parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
+parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')
 parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
 parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
                   inputSize=[320, 320],
                   confThreshold=args.conf_threshold,
                   nmsThreshold=args.nms_threshold,
+                  topK=args.top_k,
+                  backendId=args.backend,
+                  targetId=args.target)
     # If input is an image
     if args.input is not None:
             # Visualize results in a new Window
             cv.imshow('YuNet Demo', frame)
+            tm.reset()

models/face_detection_yunet/yunet.py CHANGED Viewed

@@ -63,4 +63,5 @@ class YuNet:
     def infer(self, image):
         # Forward
         faces = self._model.detect(image)
-        return faces[1]

     def infer(self, image):
         # Forward
         faces = self._model.detect(image)
+        return faces[1]

models/face_recognition_sface/demo.py CHANGED Viewed

@@ -23,11 +23,25 @@ def str2bool(v):
     else:
         raise NotImplementedError
 parser = argparse.ArgumentParser(
     description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
 parser.add_argument('--input1', '-i1', type=str, help='Path to the input image 1.')
 parser.add_argument('--input2', '-i2', type=str, help='Path to the input image 2.')
 parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the model.')
 parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Distance type. \'0\': cosine, \'1\': norm_l1.')
 parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
 parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
@@ -35,13 +49,15 @@ args = parser.parse_args()
 if __name__ == '__main__':
     # Instantiate SFace for face recognition
-    recognizer = SFace(modelPath=args.model, disType=args.dis_type)
     # Instantiate YuNet for face detection
     detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2021dec.onnx',
                      inputSize=[320, 320],
                      confThreshold=0.9,
                      nmsThreshold=0.3,
-                     topK=5000)
     img1 = cv.imread(args.input1)
     img2 = cv.imread(args.input2)
@@ -56,4 +72,5 @@ if __name__ == '__main__':
     # Match
     result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
-    print('Result: {}.'.format('same identity' if result else 'different identities'))

     else:
         raise NotImplementedError
+backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
+targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+try:
+    backends += [cv.dnn.DNN_BACKEND_TIMVX]
+    targets += [cv.dnn.DNN_TARGET_NPU]
+    help_msg_backends += "; {:d}: TIMVX"
+    help_msg_targets += "; {:d}: NPU"
+except:
+    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
 parser = argparse.ArgumentParser(
     description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
 parser.add_argument('--input1', '-i1', type=str, help='Path to the input image 1.')
 parser.add_argument('--input2', '-i2', type=str, help='Path to the input image 2.')
 parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the model.')
+parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
+parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Distance type. \'0\': cosine, \'1\': norm_l1.')
 parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
 parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 if __name__ == '__main__':
     # Instantiate SFace for face recognition
+    recognizer = SFace(modelPath=args.model, disType=args.dis_type, backendId=args.backend, targetId=args.target)
     # Instantiate YuNet for face detection
     detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2021dec.onnx',
                      inputSize=[320, 320],
                      confThreshold=0.9,
                      nmsThreshold=0.3,
+                     topK=5000,
+                     backendId=args.backend,
+                     targetId=args.target)
     img1 = cv.imread(args.input1)
     img2 = cv.imread(args.input2)
     # Match
     result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
+    print('Result: {}.'.format('same identity' if result else 'different identities'))

models/human_segmentation_pphumanseg/demo.py CHANGED Viewed

@@ -19,9 +19,23 @@ def str2bool(v):
     else:
         raise NotImplementedError
 parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
 parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
 parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2021oct.onnx', help='Path to the model.')
 parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
 parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 args = parser.parse_args()
@@ -84,7 +98,7 @@ def visualize(image, result, weight=0.6, fps=None):
 if __name__ == '__main__':
     # Instantiate PPHumanSeg
-    model = PPHumanSeg(modelPath=args.model)
     if args.input is not None:
         # Read image and resize to 192x192
@@ -138,4 +152,5 @@ if __name__ == '__main__':
             # Visualize results in a new window
             cv.imshow('PPHumanSeg Demo', frame)
-            tm.reset()

     else:
         raise NotImplementedError
+backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
+targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+try:
+    backends += [cv.dnn.DNN_BACKEND_TIMVX]
+    targets += [cv.dnn.DNN_TARGET_NPU]
+    help_msg_backends += "; {:d}: TIMVX"
+    help_msg_targets += "; {:d}: NPU"
+except:
+    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
 parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
 parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
 parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2021oct.onnx', help='Path to the model.')
+parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
+parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
 parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 args = parser.parse_args()
 if __name__ == '__main__':
     # Instantiate PPHumanSeg
+    model = PPHumanSeg(modelPath=args.model, backendId=args.backend, targetId=args.target)
     if args.input is not None:
         # Read image and resize to 192x192
             # Visualize results in a new window
             cv.imshow('PPHumanSeg Demo', frame)
+            tm.reset()

models/human_segmentation_pphumanseg/pphumanseg.py CHANGED Viewed

@@ -8,9 +8,14 @@ import numpy as np
 import cv2 as cv
 class PPHumanSeg:
-    def __init__(self, modelPath):
         self._modelPath = modelPath
         self._model = cv.dnn.readNet(self._modelPath)
         self._inputNames = ''
         self._outputNames = ['save_infer_model/scale_0.tmp_1']
@@ -23,10 +28,12 @@ class PPHumanSeg:
         return self.__class__.__name__
     def setBackend(self, backend_id):
-        self._model.setPreferableBackend(backend_id)
     def setTarget(self, target_id):
-        self._model.setPreferableTarget(target_id)
     def _preprocess(self, image):
         image = image.astype(np.float32, copy=False) / 255.0
@@ -52,4 +59,5 @@ class PPHumanSeg:
     def _postprocess(self, outputBlob):
         result = np.argmax(outputBlob[0], axis=1).astype(np.uint8)
-        return result

 import cv2 as cv
 class PPHumanSeg:
+    def __init__(self, modelPath, backendId=0, targetId=0):
         self._modelPath = modelPath
+        self._backendId = backendId
+        self._targetId = targetId
         self._model = cv.dnn.readNet(self._modelPath)
+        self._model.setPreferableBackend(self._backendId)
+        self._model.setPreferableTarget(self._targetId)
         self._inputNames = ''
         self._outputNames = ['save_infer_model/scale_0.tmp_1']
         return self.__class__.__name__
     def setBackend(self, backend_id):
+        self._backendId = backend_id
+        self._model.setPreferableBackend(self._backendId)
     def setTarget(self, target_id):
+        self._targetId = target_id
+        self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image):
         image = image.astype(np.float32, copy=False) / 255.0
     def _postprocess(self, outputBlob):
         result = np.argmax(outputBlob[0], axis=1).astype(np.uint8)
+        return result

models/image_classification_ppresnet/demo.py CHANGED Viewed

@@ -19,15 +19,29 @@ def str2bool(v):
     else:
         raise NotImplementedError
 parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
 parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
 parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Path to the model.')
 parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Path to the dataset labels.')
 args = parser.parse_args()
 if __name__ == '__main__':
     # Instantiate ResNet
-    model = PPResNet(modelPath=args.model, labelPath=args.label)
     # Read image and get a 224x224 crop from a 256x256 resized
     image = cv.imread(args.input)
@@ -40,3 +54,4 @@ if __name__ == '__main__':
     # Print result
     print('label: {}'.format(result))

     else:
         raise NotImplementedError
+backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
+targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+try:
+    backends += [cv.dnn.DNN_BACKEND_TIMVX]
+    targets += [cv.dnn.DNN_TARGET_NPU]
+    help_msg_backends += "; {:d}: TIMVX"
+    help_msg_targets += "; {:d}: NPU"
+except:
+    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
 parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
 parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
 parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Path to the model.')
+parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
+parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Path to the dataset labels.')
 args = parser.parse_args()
 if __name__ == '__main__':
     # Instantiate ResNet
+    model = PPResNet(modelPath=args.model, labelPath=args.label, backendId=args.backend, targetId=args.target)
     # Read image and get a 224x224 crop from a 256x256 resized
     image = cv.imread(args.input)
     # Print result
     print('label: {}'.format(result))

models/image_classification_ppresnet/ppresnet.py CHANGED Viewed

@@ -9,10 +9,15 @@ import numpy as np
 import cv2 as cv
 class PPResNet:
-    def __init__(self, modelPath, labelPath):
         self._modelPath = modelPath
-        self._model = cv.dnn.readNet(self._modelPath)
         self._labelPath = labelPath
         self._inputNames = ''
         self._outputNames = ['save_infer_model/scale_0.tmp_0']
@@ -35,10 +40,12 @@ class PPResNet:
         return self.__class__.__name__
     def setBackend(self, backend_id):
-        self._model.setPreferableBackend(backend_id)
     def setTarget(self, target_id):
-        self._model.setPreferableTarget(target_id)
     def _preprocess(self, image):
         image = image.astype(np.float32, copy=False) / 255.0
@@ -64,4 +71,5 @@ class PPResNet:
     def _postprocess(self, outputBlob):
         class_id = np.argmax(outputBlob[0])
-        return self._labels[class_id]

 import cv2 as cv
 class PPResNet:
+    def __init__(self, modelPath, labelPath, backendId=0, targetId=0):
         self._modelPath = modelPath
         self._labelPath = labelPath
+        self._backendId = backendId
+        self._targetId = targetId
+        self._model = cv.dnn.readNet(self._modelPath)
+        self._model.setPreferableBackend(self._backendId)
+        self._model.setPreferableTarget(self._targetId)
         self._inputNames = ''
         self._outputNames = ['save_infer_model/scale_0.tmp_0']
         return self.__class__.__name__
     def setBackend(self, backend_id):
+        self._backendId = backend_id
+        self._model.setPreferableBackend(self._backendId)
     def setTarget(self, target_id):
+        self._targetId = target_id
+        self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image):
         image = image.astype(np.float32, copy=False) / 255.0
     def _postprocess(self, outputBlob):
         class_id = np.argmax(outputBlob[0])
+        return self._labels[class_id]

models/person_reid_youtureid/demo.py CHANGED Viewed

@@ -20,10 +20,24 @@ def str2bool(v):
     else:
         raise NotImplementedError
 parser = argparse.ArgumentParser(
     description="ReID baseline models from Tencent Youtu Lab")
 parser.add_argument('--query_dir', '-q', type=str, help='Query directory.')
 parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.')
 parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.')
 parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.')
 parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
@@ -65,7 +79,7 @@ def visualize(results, query_dir, gallery_dir, output_size=(128, 384)):
 if __name__ == '__main__':
     # Instantiate YoutuReID for person ReID
-    net = YoutuReID(modelPath=args.model)
     # Read images from dir
     query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
@@ -97,4 +111,5 @@ if __name__ == '__main__':
             cv.namedWindow('result-{}'.format(f), cv.WINDOW_AUTOSIZE)
             cv.imshow('result-{}'.format(f), img)
             cv.waitKey(0)
-            cv.destroyAllWindows()

     else:
         raise NotImplementedError
+backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
+targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+try:
+    backends += [cv.dnn.DNN_BACKEND_TIMVX]
+    targets += [cv.dnn.DNN_TARGET_NPU]
+    help_msg_backends += "; {:d}: TIMVX"
+    help_msg_targets += "; {:d}: NPU"
+except:
+    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
 parser = argparse.ArgumentParser(
     description="ReID baseline models from Tencent Youtu Lab")
 parser.add_argument('--query_dir', '-q', type=str, help='Query directory.')
 parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.')
+parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
+parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.')
 parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.')
 parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
 if __name__ == '__main__':
     # Instantiate YoutuReID for person ReID
+    net = YoutuReID(modelPath=args.model, backendId=args.backend, targetId=args.target)
     # Read images from dir
     query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
             cv.namedWindow('result-{}'.format(f), cv.WINDOW_AUTOSIZE)
             cv.imshow('result-{}'.format(f), img)
             cv.waitKey(0)
+            cv.destroyAllWindows()

models/person_reid_youtureid/youtureid.py CHANGED Viewed

@@ -8,8 +8,15 @@ import numpy as np
 import cv2 as cv
 class YoutuReID:
-    def __init__(self, modelPath):
         self._model = cv.dnn.readNet(modelPath)
         self._input_size = (128, 256) # fixed
         self._output_dim = 768
         self._mean = (0.485, 0.456, 0.406)
@@ -20,10 +27,12 @@ class YoutuReID:
         return self.__class__.__name__
     def setBackend(self, backend_id):
-        self._model.setPreferableBackend(backend_id)
     def setTarget(self, target_id):
-        self._model.setPreferableTarget(target_id)
     def _preprocess(self, image):
         image = image[:, :, ::-1]
@@ -57,4 +66,5 @@ class YoutuReID:
         dist = np.matmul(query_arr, gallery_arr.T)
         idx = np.argsort(-dist, axis=1)
-        return [i[0:topK] for i in idx]

 import cv2 as cv
 class YoutuReID:
+    def __init__(self, modelPath, backendId=0, targetId=0):
+        self._modelPath = modelPath
+        self._backendId = backendId
+        self._targetId = targetId
         self._model = cv.dnn.readNet(modelPath)
+        self._model.setPreferableBackend(self._backendId)
+        self._model.setPreferableTarget(self._targetId)
         self._input_size = (128, 256) # fixed
         self._output_dim = 768
         self._mean = (0.485, 0.456, 0.406)
         return self.__class__.__name__
     def setBackend(self, backend_id):
+        self._backendId = backend_id
+        self._model.setPreferableBackend(self._backendId)
     def setTarget(self, target_id):
+        self._targetId = target_id
+        self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image):
         image = image[:, :, ::-1]
         dist = np.matmul(query_arr, gallery_arr.T)
         idx = np.argsort(-dist, axis=1)
+        return [i[0:topK] for i in idx]

models/text_detection_db/db.py CHANGED Viewed

@@ -8,7 +8,7 @@ import numpy as np
 import cv2 as cv
 class DB:
-    def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0):
         self._modelPath = modelPath
         self._model = cv.dnn_TextDetectionModel_DB(
             cv.dnn.readNet(self._modelPath)
@@ -21,6 +21,11 @@ class DB:
         self._polygonThreshold = polygonThreshold
         self._maxCandidates = maxCandidates
         self._unclipRatio = unclipRatio
         self._model.setBinaryThreshold(self._binaryThreshold)
         self._model.setPolygonThreshold(self._polygonThreshold)
@@ -34,10 +39,12 @@ class DB:
         return self.__class__.__name__
     def setBackend(self, backend):
-        self._model.setPreferableBackend(backend)
     def setTarget(self, target):
-        self._model.setPreferableTarget(target)
     def setInputSize(self, input_size):
         self._inputSize = tuple(input_size)
@@ -47,4 +54,5 @@ class DB:
         assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
         assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
-        return self._model.detect(image)

 import cv2 as cv
 class DB:
+    def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0, backendId=0, targetId=0):
         self._modelPath = modelPath
         self._model = cv.dnn_TextDetectionModel_DB(
             cv.dnn.readNet(self._modelPath)
         self._polygonThreshold = polygonThreshold
         self._maxCandidates = maxCandidates
         self._unclipRatio = unclipRatio
+        self._backendId = backendId
+        self._targetId = targetId
+        self._model.setPreferableBackend(self._backendId)
+        self._model.setPreferableTarget(self._targetId)
         self._model.setBinaryThreshold(self._binaryThreshold)
         self._model.setPolygonThreshold(self._polygonThreshold)
         return self.__class__.__name__
     def setBackend(self, backend):
+        self._backendId = backend
+        self._model.setPreferableBackend(self._backendId)
     def setTarget(self, target):
+        self._targetId = target
+        self._model.setPreferableTarget(self._targetId)
     def setInputSize(self, input_size):
         self._inputSize = tuple(input_size)
         assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
         assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
+        return self._model.detect(image)

models/text_detection_db/demo.py CHANGED Viewed

@@ -19,9 +19,23 @@ def str2bool(v):
     else:
         raise NotImplementedError
 parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
 parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
 parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Path to the model.')
 parser.add_argument('--width', type=int, default=736,
                     help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
 parser.add_argument('--height', type=int, default=736,
@@ -53,6 +67,8 @@ if __name__ == '__main__':
                polygonThreshold=args.polygon_threshold,
                maxCandidates=args.max_candidates,
                unclipRatio=args.unclip_ratio
     )
     # If input is an image
@@ -104,4 +120,5 @@ if __name__ == '__main__':
             # Visualize results in a new Window
             cv.imshow('{} Demo'.format(model.name), frame)
-            tm.reset()

     else:
         raise NotImplementedError
+backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
+targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+try:
+    backends += [cv.dnn.DNN_BACKEND_TIMVX]
+    targets += [cv.dnn.DNN_TARGET_NPU]
+    help_msg_backends += "; {:d}: TIMVX"
+    help_msg_targets += "; {:d}: NPU"
+except:
+    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
 parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
 parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
 parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Path to the model.')
+parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
+parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 parser.add_argument('--width', type=int, default=736,
                     help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
 parser.add_argument('--height', type=int, default=736,
                polygonThreshold=args.polygon_threshold,
                maxCandidates=args.max_candidates,
                unclipRatio=args.unclip_ratio
+               backendId=args.backend,
+               targetId=args.target
     )
     # If input is an image
             # Visualize results in a new Window
             cv.imshow('{} Demo'.format(model.name), frame)
+            tm.reset()

models/text_recognition_crnn/crnn.py CHANGED Viewed

@@ -8,10 +8,17 @@ import numpy as np
 import cv2 as cv
 class CRNN:
-    def __init__(self, modelPath, charsetPath):
         self._model_path = modelPath
         self._model = cv.dnn.readNet(self._model_path)
-        self._charset = self._load_charset(charsetPath)
         self._inputSize = [100, 32] # Fixed
         self._targetVertices = np.array([
             [0, self._inputSize[1] - 1],
@@ -33,10 +40,12 @@ class CRNN:
         return charset
     def setBackend(self, backend_id):
-        self._model.setPreferableBackend(backend_id)
     def setTarget(self, target_id):
-        self._model.setPreferableTarget(target_id)
     def _preprocess(self, image, rbbox):
         # Remove conf, reshape and ensure all is np.float32
@@ -81,4 +90,5 @@ class CRNN:
         for i in range(len(text)):
             if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
                 char_list.append(text[i])
-        return ''.join(char_list)

 import cv2 as cv
 class CRNN:
+    def __init__(self, modelPath, charsetPath, backendId=0, targetId=0):
         self._model_path = modelPath
+        self._charsetPath = charsetPath
+        self._backendId = backendId
+        self._targetId = targetId
         self._model = cv.dnn.readNet(self._model_path)
+        self._model.setPreferableBackend(self._backendId)
+        self._model.setPreferableTarget(self._targetId)
+        self._charset = self._load_charset(self._charsetPath)
         self._inputSize = [100, 32] # Fixed
         self._targetVertices = np.array([
             [0, self._inputSize[1] - 1],
         return charset
     def setBackend(self, backend_id):
+        self._backendId = backend_id
+        self._model.setPreferableBackend(self._backendId)
     def setTarget(self, target_id):
+        self._targetId = target_id
+        self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image, rbbox):
         # Remove conf, reshape and ensure all is np.float32
         for i in range(len(text)):
             if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
                 char_list.append(text[i])
+        return ''.join(char_list)

models/text_recognition_crnn/demo.py CHANGED Viewed

@@ -23,10 +23,24 @@ def str2bool(v):
     else:
         raise NotImplementedError
 parser = argparse.ArgumentParser(
     description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
 parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
 parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.')
 parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.')
 parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
 parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
@@ -50,7 +64,9 @@ if __name__ == '__main__':
                   binaryThreshold=0.3,
                   polygonThreshold=0.5,
                   maxCandidates=200,
-                  unclipRatio=2.0
     )
     # If input is an image
@@ -118,4 +134,5 @@ if __name__ == '__main__':
                 print(texts)
             # Visualize results in a new Window
-            cv.imshow('{} Demo'.format(recognizer.name), frame)

     else:
         raise NotImplementedError
+backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
+targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+try:
+    backends += [cv.dnn.DNN_BACKEND_TIMVX]
+    targets += [cv.dnn.DNN_TARGET_NPU]
+    help_msg_backends += "; {:d}: TIMVX"
+    help_msg_targets += "; {:d}: NPU"
+except:
+    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
 parser = argparse.ArgumentParser(
     description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
 parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
 parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.')
+parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
+parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.')
 parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
 parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
                   binaryThreshold=0.3,
                   polygonThreshold=0.5,
                   maxCandidates=200,
+                  unclipRatio=2.0,
+                  backendId=args.backend,
+                  targetId=args.target
     )
     # If input is an image
                 print(texts)
             # Visualize results in a new Window
+            cv.imshow('{} Demo'.format(recognizer.name), frame)