ytfeng commited on Mar 17, 2023

Commit

a07f7bd

1 Parent(s): 584bcfa

Limit combinations of backends and targets in demos and benchmark (#145)

Browse files

* limit backend and target combination in demos and benchmark

* simpler version checking

Files changed (33) hide show

benchmark/benchmark.py +35 -14
models/face_detection_yunet/demo.py +39 -31
models/face_detection_yunet/yunet.py +1 -13
models/face_recognition_sface/demo.py +35 -30
models/face_recognition_sface/sface.py +1 -11
models/facial_expression_recognition/demo.py +33 -30
models/facial_expression_recognition/facial_fer_model.py +3 -5
models/handpose_estimation_mediapipe/demo.py +36 -30
models/handpose_estimation_mediapipe/mp_handpose.py +3 -6
models/human_segmentation_pphumanseg/demo.py +31 -26
models/human_segmentation_pphumanseg/pphumanseg.py +3 -6
models/image_classification_mobilenet/demo.py +26 -30
models/image_classification_mobilenet/mobilenet.py +3 -5
models/image_classification_ppresnet/demo.py +27 -24
models/image_classification_ppresnet/ppresnet.py +3 -5
models/license_plate_detection_yunet/demo.py +41 -32
models/license_plate_detection_yunet/lpd_yunet.py +2 -4
models/object_detection_nanodet/demo.py +56 -49
models/object_detection_nanodet/nanodet.py +3 -5
models/object_detection_yolox/demo.py +58 -50
models/object_detection_yolox/yolox.py +3 -5
models/object_tracking_dasiamrpn/dasiamrpn.py +4 -12
models/object_tracking_dasiamrpn/demo.py +37 -14
models/palm_detection_mediapipe/demo.py +37 -30
models/palm_detection_mediapipe/mp_palmdet.py +4 -6
models/person_reid_youtureid/demo.py +36 -28
models/person_reid_youtureid/youtureid.py +3 -6
models/qrcode_wechatqrcode/demo.py +41 -16
models/qrcode_wechatqrcode/wechatqrcode.py +10 -10
models/text_detection_db/db.py +3 -6
models/text_detection_db/demo.py +41 -33
models/text_recognition_crnn/crnn.py +3 -5
models/text_recognition_crnn/demo.py +35 -31

benchmark/benchmark.py CHANGED Viewed

@@ -8,9 +8,31 @@ import cv2 as cv
 from models import MODELS
 from utils import METRICS, DATALOADERS
 parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.")
 parser.add_argument('--cfg', '-c', type=str,
                     help='Benchmarking on the given config.')
 parser.add_argument("--fp32", action="store_true", help="Runs models of float32 precision only.")
 parser.add_argument("--fp16", action="store_true", help="Runs models of float16 precision only.")
 parser.add_argument("--int8", action="store_true", help="Runs models of int8 precision only.")
@@ -56,6 +78,8 @@ class Benchmark:
             opencv=cv.dnn.DNN_BACKEND_OPENCV,
             # vkcom=cv.dnn.DNN_BACKEND_VKCOM,
             cuda=cv.dnn.DNN_BACKEND_CUDA,
         )
         target_id = kwargs.pop('target', 'cpu')
@@ -69,28 +93,20 @@ class Benchmark:
             cuda=cv.dnn.DNN_TARGET_CUDA,
             cuda_fp16=cv.dnn.DNN_TARGET_CUDA_FP16,
             # hddl=cv.dnn.DNN_TARGET_HDDL,
         )
-        # add extra backends & targets
-        try:
-            available_backends['timvx'] = cv.dnn.DNN_BACKEND_TIMVX
-            available_targets['npu'] = cv.dnn.DNN_TARGET_NPU
-        except:
-            print('OpenCV is not compiled with TIM-VX backend enbaled. See https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more details on how to enable TIM-VX backend.')
-        try:
-            available_backends['cann'] = cv.dnn.DNN_BACKEND_CANN
-            available_targets['npu'] = cv.dnn.DNN_TARGET_NPU
-        except:
-            print('OpenCV is not compiled with CANN backend enabled. See https://github.com/opencv/opencv/wiki/Huawei-CANN-Backend for more details on how to enable CANN backend.')
         self._backend = available_backends[backend_id]
         self._target = available_targets[target_id]
         self._benchmark_results = dict()
     def run(self, model):
-        model.setBackend(self._backend)
-        model.setTarget(self._target)
         for idx, data in enumerate(self._dataloader):
             filename, input_data = data[:2]
@@ -118,6 +134,11 @@ if __name__ == '__main__':
     # Instantiate benchmark
     benchmark = Benchmark(**cfg['Benchmark'])
     # Instantiate model
     model_config = cfg['Model']
     model_handler, model_paths = MODELS.get(model_config.pop('name'))

 from models import MODELS
 from utils import METRICS, DATALOADERS
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.")
 parser.add_argument('--cfg', '-c', type=str,
                     help='Benchmarking on the given config.')
+parser.add_argument('--cfg_overwrite_backend_target', type=int, default=-1,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        others: (default) use the one from config,
+                        {:d}: OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
 parser.add_argument("--fp32", action="store_true", help="Runs models of float32 precision only.")
 parser.add_argument("--fp16", action="store_true", help="Runs models of float16 precision only.")
 parser.add_argument("--int8", action="store_true", help="Runs models of int8 precision only.")
             opencv=cv.dnn.DNN_BACKEND_OPENCV,
             # vkcom=cv.dnn.DNN_BACKEND_VKCOM,
             cuda=cv.dnn.DNN_BACKEND_CUDA,
+            timvx=cv.dnn.DNN_BACKEND_TIMVX,
+            cann=cv.dnn.DNN_BACKEND_CANN,
         )
         target_id = kwargs.pop('target', 'cpu')
             cuda=cv.dnn.DNN_TARGET_CUDA,
             cuda_fp16=cv.dnn.DNN_TARGET_CUDA_FP16,
             # hddl=cv.dnn.DNN_TARGET_HDDL,
+            npu=cv.dnn.DNN_TARGET_NPU,
         )
         self._backend = available_backends[backend_id]
         self._target = available_targets[target_id]
         self._benchmark_results = dict()
+    def setBackendAndTarget(self, backend_id, target_id):
+        self._backend = backend_id
+        self._target = target_id
     def run(self, model):
+        model.setBackendAndTarget(self._backend, self._target)
         for idx, data in enumerate(self._dataloader):
             filename, input_data = data[:2]
     # Instantiate benchmark
     benchmark = Benchmark(**cfg['Benchmark'])
+    if args.cfg_overwrite_backend_target >= 0:
+        backend_id = backend_target_pairs[args.backend_target][0]
+        target_id = backend_target_pairs[args.backend_target][1]
+        benchmark.setBackendAndTarget(backend_id, target_id)
     # Instantiate model
     model_config = cfg['Model']
     model_handler, model_paths = MODELS.get(model_config.pop('name'))

models/face_detection_yunet/demo.py CHANGED Viewed

@@ -11,36 +11,42 @@ import cv2 as cv
 from yunet import YuNet
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Choose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
-parser.add_argument('--input', '-i', type=str, help='Usage: Set input to a certain image, omit if using camera.')
-parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2022mar.onnx', help="Usage: Set model type, defaults to 'face_detection_yunet_2022mar.onnx'.")
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--conf_threshold', type=float, default=0.9, help='Usage: Set the minimum needed confidence for the model to identify a face, defauts to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.')
-parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
-parser.add_argument('--top_k', type=int, default=5000, help='Usage: Keep top_k bounding boxes before NMS.')
-parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
@@ -70,14 +76,17 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps
     return output
 if __name__ == '__main__':
     # Instantiate YuNet
     model = YuNet(modelPath=args.model,
                   inputSize=[320, 320],
                   confThreshold=args.conf_threshold,
                   nmsThreshold=args.nms_threshold,
                   topK=args.top_k,
-                  backendId=args.backend,
-                  targetId=args.target)
     # If input is an image
     if args.input is not None:
@@ -134,4 +143,3 @@ if __name__ == '__main__':
             cv.imshow('YuNet Demo', frame)
             tm.reset()

 from yunet import YuNet
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set input to a certain image, omit if using camera.')
+parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2022mar.onnx',
+                    help="Usage: Set model type, defaults to 'face_detection_yunet_2022mar.onnx'.")
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--conf_threshold', type=float, default=0.9,
+                    help='Usage: Set the minimum needed confidence for the model to identify a face, defauts to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.')
+parser.add_argument('--nms_threshold', type=float, default=0.3,
+                    help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
+parser.add_argument('--top_k', type=int, default=5000,
+                    help='Usage: Keep top_k bounding boxes before NMS.')
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
     return output
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate YuNet
     model = YuNet(modelPath=args.model,
                   inputSize=[320, 320],
                   confThreshold=args.conf_threshold,
                   nmsThreshold=args.nms_threshold,
                   topK=args.top_k,
+                  backendId=backend_id,
+                  targetId=target_id)
     # If input is an image
     if args.input is not None:
             cv.imshow('YuNet Demo', frame)
             tm.reset()

models/face_detection_yunet/yunet.py CHANGED Viewed

@@ -33,19 +33,8 @@ class YuNet:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backendId):
         self._backendId = backendId
-        self._model = cv.FaceDetectorYN.create(
-            model=self._modelPath,
-            config="",
-            input_size=self._inputSize,
-            score_threshold=self._confThreshold,
-            nms_threshold=self._nmsThreshold,
-            top_k=self._topK,
-            backend_id=self._backendId,
-            target_id=self._targetId)
-    def setTarget(self, targetId):
         self._targetId = targetId
         self._model = cv.FaceDetectorYN.create(
             model=self._modelPath,
@@ -64,4 +53,3 @@ class YuNet:
         # Forward
         faces = self._model.detect(image)
         return faces[1]

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
         self._backendId = backendId
         self._targetId = targetId
         self._model = cv.FaceDetectorYN.create(
             model=self._modelPath,
         # Forward
         faces = self._model.detect(image)
         return faces[1]

models/face_recognition_sface/demo.py CHANGED Viewed

@@ -15,49 +15,55 @@ from sface import SFace
 sys.path.append('../face_detection_yunet')
 from yunet import YuNet
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA \n Usage: Set backend DNN model, defaults to cv.dnn.DNN_BACKEND_OPENCV (int = 0). Based on your OpenCV version, it may or may not support cv.dnn.DNN_BACKEND_TIMVX. More details: [https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f]"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(
     description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
-parser.add_argument('--input1', '-i1', type=str, help='Usage: Set path to the input image 1 (original face).')
-parser.add_argument('--input2', '-i2', type=str, help='Usage: Set path to the input image 2 (comparison face).')
-parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Usage: Set model path, defaults to face_recognition_sface_2021dec.onnx.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Usage: Distance type. \'0\': cosine, \'1\': norm_l1. Defaults to \'0\'')
-parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 args = parser.parse_args()
 if __name__ == '__main__':
     # Instantiate SFace for face recognition
-    recognizer = SFace(modelPath=args.model, disType=args.dis_type, backendId=args.backend, targetId=args.target)
     # Instantiate YuNet for face detection
     detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx',
                      inputSize=[320, 320],
                      confThreshold=0.9,
                      nmsThreshold=0.3,
                      topK=5000,
-                     backendId=args.backend,
-                     targetId=args.target)
     img1 = cv.imread(args.input1)
     img2 = cv.imread(args.input2)
@@ -73,4 +79,3 @@ if __name__ == '__main__':
     # Match
     result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
     print('Result: {}.'.format('same identity' if result else 'different identities'))

 sys.path.append('../face_detection_yunet')
 from yunet import YuNet
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(
     description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
+parser.add_argument('--input1', '-i1', type=str,
+                    help='Usage: Set path to the input image 1 (original face).')
+parser.add_argument('--input2', '-i2', type=str,
+                    help='Usage: Set path to the input image 2 (comparison face).')
+parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx',
+                    help='Usage: Set model path, defaults to face_recognition_sface_2021dec.onnx.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0,
+                    help='Usage: Distance type. \'0\': cosine, \'1\': norm_l1. Defaults to \'0\'')
 args = parser.parse_args()
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate SFace for face recognition
+    recognizer = SFace(modelPath=args.model,
+                       disType=args.dis_type,
+                       backendId=backend_id,
+                       targetId=target_id)
     # Instantiate YuNet for face detection
     detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx',
                      inputSize=[320, 320],
                      confThreshold=0.9,
                      nmsThreshold=0.3,
                      topK=5000,
+                     backendId=backend_id,
+                     targetId=target_id)
     img1 = cv.imread(args.input1)
     img2 = cv.imread(args.input2)
     # Match
     result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
     print('Result: {}.'.format('same identity' if result else 'different identities'))

models/face_recognition_sface/sface.py CHANGED Viewed

@@ -7,8 +7,6 @@
 import numpy as np
 import cv2 as cv
-from _testcapi import FLT_MIN
 class SFace:
     def __init__(self, modelPath, disType=0, backendId=0, targetId=0):
         self._modelPath = modelPath
@@ -30,15 +28,8 @@ class SFace:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backendId):
         self._backendId = backendId
-        self._model = cv.FaceRecognizerSF.create(
-            model=self._modelPath,
-            config="",
-            backend_id=self._backendId,
-            target_id=self._targetId)
-    def setTarget(self, targetId):
         self._targetId = targetId
         self._model = cv.FaceRecognizerSF.create(
             model=self._modelPath,
@@ -70,4 +61,3 @@ class SFace:
         else: # NORM_L2
             norml2_distance = self._model.match(feature1, feature2, self._disType)
             return 1 if norml2_distance <= self._threshold_norml2 else 0

 import numpy as np
 import cv2 as cv
 class SFace:
     def __init__(self, modelPath, disType=0, backendId=0, targetId=0):
         self._modelPath = modelPath
     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
         self._backendId = backendId
         self._targetId = targetId
         self._model = cv.FaceRecognizerSF.create(
             model=self._modelPath,
         else: # NORM_L2
             norml2_distance = self._model.match(feature1, feature2, self._disType)
             return 1 if norml2_distance <= self._threshold_norml2 else 0

models/facial_expression_recognition/demo.py CHANGED Viewed

@@ -11,38 +11,38 @@ from facial_fer_model import FacialExpressionRecog
 sys.path.append('../face_detection_yunet')
 from yunet import YuNet
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(description='Facial Expression Recognition')
-parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
-parser.add_argument('--model', '-m', type=str, default='./facial_expression_recognition_mobilefacenet_2022july.onnx', help='Path to the facial expression recognition model.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 args = parser.parse_args()
 def visualize(image, det_res, fer_res, box_color=(0, 255, 0), text_color=(0, 0, 255)):
     print('%s %3d faces detected.' % (datetime.datetime.now(), len(det_res)))
@@ -83,11 +83,14 @@ def process(detect_model, fer_model, frame):
 if __name__ == '__main__':
     detect_model = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx')
     fer_model = FacialExpressionRecog(modelPath=args.model,
-                                      backendId=args.backend,
-                                      targetId=args.target)
     # If input is an image
     if args.input is not None:

 sys.path.append('../face_detection_yunet')
 from yunet import YuNet
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(description='Facial Expression Recognition')
+parser.add_argument('--input', '-i', type=str,
+                    help='Path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='./facial_expression_recognition_mobilefacenet_2022july.onnx',
+                    help='Path to the facial expression recognition model.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Specify to save results. This flag is invalid when using camera.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Specify to open a window for result visualization. This flag is invalid when using camera.')
 args = parser.parse_args()
 def visualize(image, det_res, fer_res, box_color=(0, 255, 0), text_color=(0, 0, 255)):
     print('%s %3d faces detected.' % (datetime.datetime.now(), len(det_res)))
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     detect_model = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx')
     fer_model = FacialExpressionRecog(modelPath=args.model,
+                                      backendId=backend_id,
+                                      targetId=target_id)
     # If input is an image
     if args.input is not None:

models/facial_expression_recognition/facial_fer_model.py CHANGED Viewed

@@ -29,12 +29,10 @@ class FacialExpressionRecog:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backend_id):
-        self._backendId = backend_id
         self._model.setPreferableBackend(self._backendId)
-    def setTarget(self, target_id):
-        self._targetId = target_id
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image, bbox):

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self._model.setPreferableBackend(self._backendId)
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image, bbox):

models/handpose_estimation_mediapipe/demo.py CHANGED Viewed

@@ -9,34 +9,38 @@ from mp_handpose import MPHandPose
 sys.path.append('../palm_detection_mediapipe')
 from mp_palmdet import MPPalmDet
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(description='Hand Pose Estimation from MediaPipe')
-parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
-parser.add_argument('--model', '-m', type=str, default='./handpose_estimation_mediapipe_2023feb.onnx', help='Path to the model.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out hands of confidence < conf_threshold.')
-parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 args = parser.parse_args()
@@ -147,17 +151,19 @@ def visualize(image, hands, print_result=False):
 if __name__ == '__main__':
     # palm detector
     palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx',
                               nmsThreshold=0.3,
                               scoreThreshold=0.6,
-                              backendId=args.backend,
-                              targetId=args.target)
     # handpose detector
     handpose_detector = MPHandPose(modelPath=args.model,
                                    confThreshold=args.conf_threshold,
-                                   backendId=args.backend,
-                                   targetId=args.target)
     # If input is an image
     if args.input is not None:

 sys.path.append('../palm_detection_mediapipe')
 from mp_palmdet import MPPalmDet
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(description='Hand Pose Estimation from MediaPipe')
+parser.add_argument('--input', '-i', type=str,
+                    help='Path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='./handpose_estimation_mediapipe_2023feb.onnx',
+                    help='Path to the model.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--conf_threshold', type=float, default=0.9,
+                    help='Filter out hands of confidence < conf_threshold.')
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Specify to save results. This flag is invalid when using camera.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Specify to open a window for result visualization. This flag is invalid when using camera.')
 args = parser.parse_args()
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # palm detector
     palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx',
                               nmsThreshold=0.3,
                               scoreThreshold=0.6,
+                              backendId=backend_id,
+                              targetId=target_id)
     # handpose detector
     handpose_detector = MPHandPose(modelPath=args.model,
                                    confThreshold=args.conf_threshold,
+                                   backendId=backend_id,
+                                   targetId=target_id)
     # If input is an image
     if args.input is not None:

models/handpose_estimation_mediapipe/mp_handpose.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import numpy as np
 import cv2 as cv
 class MPHandPose:
     def __init__(self, modelPath, confThreshold=0.8, backendId=0, targetId=0):
         self.model_path = modelPath
@@ -28,12 +27,10 @@ class MPHandPose:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backendId):
-        self.backend_id = backendId
         self.model.setPreferableBackend(self.backend_id)
-    def setTarget(self, targetId):
-        self.target_id = targetId
         self.model.setPreferableTarget(self.target_id)
     def _cropAndPadFromPalm(self, image, palm_bbox, for_rotation = False):

 import numpy as np
 import cv2 as cv
 class MPHandPose:
     def __init__(self, modelPath, confThreshold=0.8, backendId=0, targetId=0):
         self.model_path = modelPath
     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self.model.setPreferableBackend(self.backend_id)
         self.model.setPreferableTarget(self.target_id)
     def _cropAndPadFromPalm(self, image, palm_bbox, for_rotation = False):

models/human_segmentation_pphumanseg/demo.py CHANGED Viewed

@@ -11,33 +11,36 @@ import cv2 as cv
 from pphumanseg import PPHumanSeg
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
-parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.')
-parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2023mar.onnx', help='Usage: Set model path, defaults to human_segmentation_pphumanseg_2023mar.onnx.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 args = parser.parse_args()
 def get_color_map_list(num_classes):
@@ -97,8 +100,10 @@ def visualize(image, result, weight=0.6, fps=None):
 if __name__ == '__main__':
     # Instantiate PPHumanSeg
-    model = PPHumanSeg(modelPath=args.model, backendId=args.backend, targetId=args.target)
     if args.input is not None:
         # Read image and resize to 192x192

 from pphumanseg import PPHumanSeg
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set input path to a certain image, omit if using camera.')
+parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2023mar.onnx',
+                    help='Usage: Set model path, defaults to human_segmentation_pphumanseg_2023mar.onnx.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save a file with results. Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
 args = parser.parse_args()
 def get_color_map_list(num_classes):
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate PPHumanSeg
+    model = PPHumanSeg(modelPath=args.model, backendId=backend_id, targetId=target_id)
     if args.input is not None:
         # Read image and resize to 192x192

models/human_segmentation_pphumanseg/pphumanseg.py CHANGED Viewed

@@ -28,12 +28,10 @@ class PPHumanSeg:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backend_id):
-        self._backendId = backend_id
         self._model.setPreferableBackend(self._backendId)
-    def setTarget(self, target_id):
-        self._targetId = target_id
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image):
@@ -69,4 +67,3 @@ class PPHumanSeg:
         result = np.argmax(outputBlob, axis=1).astype(np.uint8)
         return result

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self._model.setPreferableBackend(self._backendId)
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image):
         result = np.argmax(outputBlob, axis=1).astype(np.uint8)
         return result

models/image_classification_mobilenet/demo.py CHANGED Viewed

@@ -5,43 +5,39 @@ import cv2 as cv
 from mobilenet import MobileNet
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
-all_mobilenets = [
-    'image_classification_mobilenetv1_2022apr.onnx',
-    'image_classification_mobilenetv2_2022apr.onnx',
-    'image_classification_mobilenetv1_2022apr-int8-quantized.onnx',
-    'image_classification_mobilenetv2_2022apr-int8-quantized.onnx'
 ]
 parser = argparse.ArgumentParser(description='Demo for MobileNet V1 & V2.')
-parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.')
-parser.add_argument('--model', '-m', type=str, choices=all_mobilenets, default=all_mobilenets[0], help='Usage: Set model type, defaults to image_classification_mobilenetv1_2022apr.onnx (v1).')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 args = parser.parse_args()
 if __name__ == '__main__':
     # Instantiate MobileNet
-    model = MobileNet(modelPath=args.model, backendId=args.backend, targetId=args.target)
     # Read image and get a 224x224 crop from a 256x256 resized
     image = cv.imread(args.input)

 from mobilenet import MobileNet
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
 ]
 parser = argparse.ArgumentParser(description='Demo for MobileNet V1 & V2.')
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set input path to a certain image, omit if using camera.')
+parser.add_argument('--model', '-m', type=str, default='image_classification_mobilenetv1_2022apr.onnx',
+                    help='Usage: Set model type, defaults to image_classification_mobilenetv1_2022apr.onnx (v1).')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
 args = parser.parse_args()
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate MobileNet
+    model = MobileNet(modelPath=args.model, backendId=backend_id, targetId=target_id)
     # Read image and get a 224x224 crop from a 256x256 resized
     image = cv.imread(args.input)

models/image_classification_mobilenet/mobilenet.py CHANGED Viewed

@@ -33,12 +33,10 @@ class MobileNet:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backendId):
-        self.backend_id = backendId
         self.model.setPreferableBackend(self.backend_id)
-    def setTarget(self, targetId):
-        self.target_id = targetId
         self.model.setPreferableTarget(self.target_id)
     def _preprocess(self, image):

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self.model.setPreferableBackend(self.backend_id)
         self.model.setPreferableTarget(self.target_id)
     def _preprocess(self, image):

models/image_classification_ppresnet/demo.py CHANGED Viewed

@@ -11,36 +11,39 @@ import cv2 as cv
 from ppresnet import PPResNet
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
-parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.')
-parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Usage: Set model path, defaults to image_classification_ppresnet50_2022jan.onnx.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 args = parser.parse_args()
 if __name__ == '__main__':
     # Instantiate ResNet
-    model = PPResNet(modelPath=args.model, backendId=args.backend, targetId=args.target)
     # Read image and get a 224x224 crop from a 256x256 resized
     image = cv.imread(args.input)

 from ppresnet import PPResNet
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set input path to a certain image, omit if using camera.')
+parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx',
+                    help='Usage: Set model path, defaults to image_classification_ppresnet50_2022jan.onnx.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
 args = parser.parse_args()
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate ResNet
+    model = PPResNet(modelPath=args.model, backendId=backend_id, targetId=target_id)
     # Read image and get a 224x224 crop from a 256x256 resized
     image = cv.imread(args.input)

models/image_classification_ppresnet/ppresnet.py CHANGED Viewed

@@ -36,12 +36,10 @@ class PPResNet:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backend_id):
-        self._backendId = backend_id
         self._model.setPreferableBackend(self._backendId)
-    def setTarget(self, target_id):
-        self._targetId = target_id
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image):

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self._model.setPreferableBackend(self._backendId)
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image):

models/license_plate_detection_yunet/demo.py CHANGED Viewed

@@ -5,37 +5,44 @@ import cv2 as cv
 from lpd_yunet import LPD_YuNet
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(description='LPD-YuNet for License Plate Detection')
-parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
-parser.add_argument('--model', '-m', type=str, default='license_plate_detection_lpd_yunet_2022may.onnx', help='Usage: Set model path, defaults to license_plate_detection_lpd_yunet_2022may.onnx.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--conf_threshold', type=float, default=0.9, help='Usage: Set the minimum needed confidence for the model to identify a license plate, defaults to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.')
-parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3. Suppress bounding boxes of iou >= nms_threshold.')
-parser.add_argument('--top_k', type=int, default=5000, help='Usage: Keep top_k bounding boxes before NMS.')
-parser.add_argument('--keep_top_k', type=int, default=750, help='Usage: Keep keep_top_k bounding boxes after NMS.')
-parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
@@ -57,14 +64,17 @@ def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=N
     return output
 if __name__ == '__main__':
     # Instantiate LPD-YuNet
     model = LPD_YuNet(modelPath=args.model,
                       confThreshold=args.conf_threshold,
                       nmsThreshold=args.nms_threshold,
                       topK=args.top_k,
                       keepTopK=args.keep_top_k,
-                      backendId=args.backend,
-                      targetId=args.target)
     # If input is an image
     if args.input is not None:
@@ -117,4 +127,3 @@ if __name__ == '__main__':
             cv.imshow('LPD-YuNet Demo', frame)
             tm.reset()

 from lpd_yunet import LPD_YuNet
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(description='LPD-YuNet for License Plate Detection')
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='license_plate_detection_lpd_yunet_2023mar.onnx',
+                    help='Usage: Set model path, defaults to license_plate_detection_lpd_yunet_2023mar.onnx.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--conf_threshold', type=float, default=0.9,
+                    help='Usage: Set the minimum needed confidence for the model to identify a license plate, defaults to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.')
+parser.add_argument('--nms_threshold', type=float, default=0.3,
+                    help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3. Suppress bounding boxes of iou >= nms_threshold.')
+parser.add_argument('--top_k', type=int, default=5000,
+                    help='Usage: Keep top_k bounding boxes before NMS.')
+parser.add_argument('--keep_top_k', type=int, default=750,
+                    help='Usage: Keep keep_top_k bounding boxes after NMS.')
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
     return output
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate LPD-YuNet
     model = LPD_YuNet(modelPath=args.model,
                       confThreshold=args.conf_threshold,
                       nmsThreshold=args.nms_threshold,
                       topK=args.top_k,
                       keepTopK=args.keep_top_k,
+                      backendId=backend_id,
+                      targetId=target_id)
     # If input is an image
     if args.input is not None:
             cv.imshow('LPD-YuNet Demo', frame)
             tm.reset()

models/license_plate_detection_yunet/lpd_yunet.py CHANGED Viewed

@@ -28,12 +28,10 @@ class LPD_YuNet:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backendId):
         self.backend_id = backendId
-        self.model.setPreferableBackend(self.backend_id)
-    def setTarget(self, targetId):
         self.target_id = targetId
         self.model.setPreferableTarget(self.target_id)
     def setInputSize(self, inputSize):

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
         self.backend_id = backendId
         self.target_id = targetId
+        self.model.setPreferableBackend(self.backend_id)
         self.model.setPreferableTarget(self.target_id)
     def setInputSize(self, inputSize):

models/object_detection_nanodet/demo.py CHANGED Viewed

@@ -1,29 +1,21 @@
 import numpy as np
-import cv2
 import argparse
 from nanodet import NanoDet
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
-targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv2.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv2.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
@@ -48,16 +40,16 @@ def letterbox(srcimg, target_size=(416, 416)):
         hw_scale = img.shape[0] / img.shape[1]
         if hw_scale > 1:
             newh, neww = target_size[0], int(target_size[1] / hw_scale)
-            img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA)
             left = int((target_size[1] - neww) * 0.5)
-            img = cv2.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv2.BORDER_CONSTANT, value=0)  # add border
         else:
             newh, neww = int(target_size[0] * hw_scale), target_size[1]
-            img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA)
             top = int((target_size[0] - newh) * 0.5)
-            img = cv2.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0)
     else:
-        img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
     letterbox_scale = [top, left, newh, neww]
     return img, letterbox_scale
@@ -87,7 +79,7 @@ def vis(preds, res_img, letterbox_scale, fps=None):
     # draw FPS
     if fps is not None:
         fps_label = "FPS: %.2f" % fps
-        cv2.putText(ret, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
     # draw bboxes and labels
     for pred in preds:
@@ -97,37 +89,52 @@ def vis(preds, res_img, letterbox_scale, fps=None):
         # bbox
         xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale)
-        cv2.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2)
         # label
         label = "{:s}: {:.2f}".format(classes[classid], conf)
-        cv2.putText(ret, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
     return ret
 if __name__=='__main__':
     parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
-    parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
-    parser.add_argument('--model', '-m', type=str, default='object_detection_nanodet_2022nov.onnx', help="Path to the model")
-    parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-    parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-    parser.add_argument('--confidence', default=0.35, type=float, help='Class confidence')
-    parser.add_argument('--nms', default=0.6, type=float, help='Enter nms IOU threshold')
-    parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
-    parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
     args = parser.parse_args()
     model = NanoDet(modelPath= args.model,
                     prob_threshold=args.confidence,
                     iou_threshold=args.nms,
-                    backend_id=args.backend,
-                    target_id=args.target)
-    tm = cv2.TickMeter()
     tm.reset()
     if args.input is not None:
-        image = cv2.imread(args.input)
-        input_blob = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
         # Letterbox transformation
         input_blob, letterbox_scale = letterbox(input_blob)
@@ -142,25 +149,25 @@ if __name__=='__main__':
         if args.save:
             print('Resutls saved to result.jpg\n')
-            cv2.imwrite('result.jpg', img)
         if args.vis:
-            cv2.namedWindow(args.input, cv2.WINDOW_AUTOSIZE)
-            cv2.imshow(args.input, img)
-            cv2.waitKey(0)
     else:
         print("Press any key to stop video capture")
         deviceId = 0
-        cap = cv2.VideoCapture(deviceId)
-        while cv2.waitKey(1) < 0:
             hasFrame, frame = cap.read()
             if not hasFrame:
                 print('No frames grabbed!')
                 break
-            input_blob = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             input_blob, letterbox_scale = letterbox(input_blob)
             # Inference
             tm.start()
@@ -169,6 +176,6 @@ if __name__=='__main__':
             img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
-            cv2.imshow("NanoDet Demo", img)
             tm.reset()

 import numpy as np
+import cv2 as cv
 import argparse
 from nanodet import NanoDet
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
         hw_scale = img.shape[0] / img.shape[1]
         if hw_scale > 1:
             newh, neww = target_size[0], int(target_size[1] / hw_scale)
+            img = cv.resize(img, (neww, newh), interpolation=cv.INTER_AREA)
             left = int((target_size[1] - neww) * 0.5)
+            img = cv.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv.BORDER_CONSTANT, value=0)  # add border
         else:
             newh, neww = int(target_size[0] * hw_scale), target_size[1]
+            img = cv.resize(img, (neww, newh), interpolation=cv.INTER_AREA)
             top = int((target_size[0] - newh) * 0.5)
+            img = cv.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv.BORDER_CONSTANT, value=0)
     else:
+        img = cv.resize(img, target_size, interpolation=cv.INTER_AREA)
     letterbox_scale = [top, left, newh, neww]
     return img, letterbox_scale
     # draw FPS
     if fps is not None:
         fps_label = "FPS: %.2f" % fps
+        cv.putText(ret, fps_label, (10, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
     # draw bboxes and labels
     for pred in preds:
         # bbox
         xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale)
+        cv.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2)
         # label
         label = "{:s}: {:.2f}".format(classes[classid], conf)
+        cv.putText(ret, label, (xmin, ymin - 10), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
     return ret
 if __name__=='__main__':
     parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
+    parser.add_argument('--input', '-i', type=str,
+                        help='Path to the input image. Omit for using default camera.')
+    parser.add_argument('--model', '-m', type=str,
+                        default='object_detection_nanodet_2022nov.onnx', help="Path to the model")
+    parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+    parser.add_argument('--confidence', default=0.35, type=float,
+                        help='Class confidence')
+    parser.add_argument('--nms', default=0.6, type=float,
+                        help='Enter nms IOU threshold')
+    parser.add_argument('--save', '-s', action='store_true',
+                        help='Specify to save results. This flag is invalid when using camera.')
+    parser.add_argument('--vis', '-v', action='store_true',
+                        help='Specify to open a window for result visualization. This flag is invalid when using camera.')
     args = parser.parse_args()
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     model = NanoDet(modelPath= args.model,
                     prob_threshold=args.confidence,
                     iou_threshold=args.nms,
+                    backend_id=backend_id,
+                    target_id=target_id)
+    tm = cv.TickMeter()
     tm.reset()
     if args.input is not None:
+        image = cv.imread(args.input)
+        input_blob = cv.cvtColor(image, cv.COLOR_BGR2RGB)
         # Letterbox transformation
         input_blob, letterbox_scale = letterbox(input_blob)
         if args.save:
             print('Resutls saved to result.jpg\n')
+            cv.imwrite('result.jpg', img)
         if args.vis:
+            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
+            cv.imshow(args.input, img)
+            cv.waitKey(0)
     else:
         print("Press any key to stop video capture")
         deviceId = 0
+        cap = cv.VideoCapture(deviceId)
+        while cv.waitKey(1) < 0:
             hasFrame, frame = cap.read()
             if not hasFrame:
                 print('No frames grabbed!')
                 break
+            input_blob = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
             input_blob, letterbox_scale = letterbox(input_blob)
             # Inference
             tm.start()
             img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
+            cv.imshow("NanoDet Demo", img)
             tm.reset()

models/object_detection_nanodet/nanodet.py CHANGED Viewed

@@ -37,12 +37,10 @@ class NanoDet:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backendId):
-        self.backend_id = backendId
         self.net.setPreferableBackend(self.backend_id)
-    def setTarget(self, targetId):
-        self.target_id = targetId
         self.net.setPreferableTarget(self.target_id)
     def pre_process(self, img):

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self.net.setPreferableBackend(self.backend_id)
         self.net.setPreferableTarget(self.target_id)
     def pre_process(self, img):

models/object_detection_yolox/demo.py CHANGED Viewed

@@ -1,29 +1,21 @@
 import numpy as np
-import cv2
 import argparse
 from yolox import YoloX
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
-targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv2.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv2.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
@@ -43,8 +35,8 @@ classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
 def letterbox(srcimg, target_size=(640, 640)):
     padded_img = np.ones((target_size[0], target_size[1], 3)) * 114.0
     ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
-    resized_img = cv2.resize(
-        srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv2.INTER_LINEAR
     ).astype(np.float32)
     padded_img[: int(srcimg.shape[0] * ratio), : int(srcimg.shape[1] * ratio)] = resized_img
@@ -58,7 +50,7 @@ def vis(dets, srcimg, letterbox_scale, fps=None):
     if fps is not None:
         fps_label = "FPS: %.2f" % fps
-        cv2.putText(res_img, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
     for det in dets:
         box = unletterbox(det[:4], letterbox_scale).astype(np.int32)
@@ -68,39 +60,55 @@ def vis(dets, srcimg, letterbox_scale, fps=None):
         x0, y0, x1, y1 = box
         text = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
-        cv2.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), 2)
-        cv2.rectangle(res_img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1)
-        cv2.putText(res_img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 0, 0), thickness=1)
     return res_img
 if __name__=='__main__':
     parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
-    parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
-    parser.add_argument('--model', '-m', type=str, default='object_detection_yolox_2022nov.onnx', help="Path to the model")
-    parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-    parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-    parser.add_argument('--confidence', default=0.5, type=float, help='Class confidence')
-    parser.add_argument('--nms', default=0.5, type=float, help='Enter nms IOU threshold')
-    parser.add_argument('--obj', default=0.5, type=float, help='Enter object threshold')
-    parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
-    parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
     args = parser.parse_args()
     model_net = YoloX(modelPath= args.model,
                       confThreshold=args.confidence,
                       nmsThreshold=args.nms,
                       objThreshold=args.obj,
-                      backendId=args.backend,
-                      targetId=args.target)
-    tm = cv2.TickMeter()
     tm.reset()
     if args.input is not None:
-        image = cv2.imread(args.input)
-        input_blob = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
         input_blob, letterbox_scale = letterbox(input_blob)
         # Inference
@@ -113,25 +121,25 @@ if __name__=='__main__':
         if args.save:
             print('Resutls saved to result.jpg\n')
-            cv2.imwrite('result.jpg', img)
         if args.vis:
-            cv2.namedWindow(args.input, cv2.WINDOW_AUTOSIZE)
-            cv2.imshow(args.input, img)
-            cv2.waitKey(0)
     else:
         print("Press any key to stop video capture")
         deviceId = 0
-        cap = cv2.VideoCapture(deviceId)
-        while cv2.waitKey(1) < 0:
             hasFrame, frame = cap.read()
             if not hasFrame:
                 print('No frames grabbed!')
                 break
-            input_blob = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             input_blob, letterbox_scale = letterbox(input_blob)
             # Inference
@@ -141,6 +149,6 @@ if __name__=='__main__':
             img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
-            cv2.imshow("YoloX Demo", img)
             tm.reset()

 import numpy as np
+import cv2 as cv
 import argparse
 from yolox import YoloX
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
 def letterbox(srcimg, target_size=(640, 640)):
     padded_img = np.ones((target_size[0], target_size[1], 3)) * 114.0
     ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
+    resized_img = cv.resize(
+        srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv.INTER_LINEAR
     ).astype(np.float32)
     padded_img[: int(srcimg.shape[0] * ratio), : int(srcimg.shape[1] * ratio)] = resized_img
     if fps is not None:
         fps_label = "FPS: %.2f" % fps
+        cv.putText(res_img, fps_label, (10, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
     for det in dets:
         box = unletterbox(det[:4], letterbox_scale).astype(np.int32)
         x0, y0, x1, y1 = box
         text = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
+        font = cv.FONT_HERSHEY_SIMPLEX
+        txt_size = cv.getTextSize(text, font, 0.4, 1)[0]
+        cv.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), 2)
+        cv.rectangle(res_img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1)
+        cv.putText(res_img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 0, 0), thickness=1)
     return res_img
 if __name__=='__main__':
     parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
+    parser.add_argument('--input', '-i', type=str,
+                        help='Path to the input image. Omit for using default camera.')
+    parser.add_argument('--model', '-m', type=str, default='object_detection_yolox_2022nov.onnx',
+                        help="Path to the model")
+    parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+    parser.add_argument('--confidence', default=0.5, type=float,
+                        help='Class confidence')
+    parser.add_argument('--nms', default=0.5, type=float,
+                        help='Enter nms IOU threshold')
+    parser.add_argument('--obj', default=0.5, type=float,
+                        help='Enter object threshold')
+    parser.add_argument('--save', '-s', action='store_true',
+                        help='Specify to save results. This flag is invalid when using camera.')
+    parser.add_argument('--vis', '-v', action='store_true',
+                        help='Specify to open a window for result visualization. This flag is invalid when using camera.')
     args = parser.parse_args()
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     model_net = YoloX(modelPath= args.model,
                       confThreshold=args.confidence,
                       nmsThreshold=args.nms,
                       objThreshold=args.obj,
+                      backendId=backend_id,
+                      targetId=target_id)
+    tm = cv.TickMeter()
     tm.reset()
     if args.input is not None:
+        image = cv.imread(args.input)
+        input_blob = cv.cvtColor(image, cv.COLOR_BGR2RGB)
         input_blob, letterbox_scale = letterbox(input_blob)
         # Inference
         if args.save:
             print('Resutls saved to result.jpg\n')
+            cv.imwrite('result.jpg', img)
         if args.vis:
+            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
+            cv.imshow(args.input, img)
+            cv.waitKey(0)
     else:
         print("Press any key to stop video capture")
         deviceId = 0
+        cap = cv.VideoCapture(deviceId)
+        while cv.waitKey(1) < 0:
             hasFrame, frame = cap.read()
             if not hasFrame:
                 print('No frames grabbed!')
                 break
+            input_blob = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
             input_blob, letterbox_scale = letterbox(input_blob)
             # Inference
             img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
+            cv.imshow("YoloX Demo", img)
             tm.reset()

models/object_detection_yolox/yolox.py CHANGED Viewed

@@ -23,12 +23,10 @@ class YoloX:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backendId):
-        self.backendId = backendId
         self.net.setPreferableBackend(self.backendId)
-    def setTarget(self, targetId):
-        self.targetId = targetId
         self.net.setPreferableTarget(self.targetId)
     def preprocess(self, img):

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self.net.setPreferableBackend(self.backendId)
         self.net.setPreferableTarget(self.targetId)
     def preprocess(self, img):

models/object_tracking_dasiamrpn/dasiamrpn.py CHANGED Viewed

@@ -27,18 +27,10 @@ class DaSiamRPN:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backend_id):
-        self._backend_id = backend_id
-        self._param = cv.TrackerDaSiamRPN_Params()
-        self._param.model = self._model_path
-        self._param.kernel_cls1 = self._kernel_cls1_path
-        self._param.kernel_r1 = self._kernel_r1_path
-        self._param.backend = self._backend_id
-        self._param.target = self._target_id
-        self._model = cv.TrackerDaSiamRPN.create(self._param)
-    def setTarget(self, target_id):
-        self._target_id = target_id
         self._param = cv.TrackerDaSiamRPN_Params()
         self._param.model = self._model_path
         self._param.kernel_cls1 = self._kernel_cls1_path
@@ -53,4 +45,4 @@ class DaSiamRPN:
     def infer(self, image):
         isLocated, bbox = self._model.update(image)
         score = self._model.getTrackingScore()
-        return isLocated, bbox, score

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backend_id = backendId
+        self._target_id = targetId
         self._param = cv.TrackerDaSiamRPN_Params()
         self._param.model = self._model_path
         self._param.kernel_cls1 = self._kernel_cls1_path
     def infer(self, image):
         isLocated, bbox = self._model.update(image)
         score = self._model.getTrackingScore()
+        return isLocated, bbox, score

models/object_tracking_dasiamrpn/demo.py CHANGED Viewed

@@ -11,22 +11,41 @@ import cv2 as cv
 from dasiamrpn import DaSiamRPN
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
 parser = argparse.ArgumentParser(
     description="Distractor-aware Siamese Networks for Visual Object Tracking (https://arxiv.org/abs/1808.06048)")
-parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input video. Omit for using default camera.')
-parser.add_argument('--model_path', type=str, default='object_tracking_dasiamrpn_model_2021nov.onnx', help='Usage: Set model path, defaults to object_tracking_dasiamrpn_model_2021nov.onnx.')
-parser.add_argument('--kernel_cls1_path', type=str, default='object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx', help='Usage: Set path to dasiamrpn_kernel_cls1.onnx.')
-parser.add_argument('--kernel_r1_path', type=str, default='object_tracking_dasiamrpn_kernel_r1_2021nov.onnx', help='Usage: Set path to dasiamrpn_kernel_r1.onnx.')
-parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
@@ -50,12 +69,16 @@ def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),tex
     return output
 if __name__ == '__main__':
     # Instantiate DaSiamRPN
     model = DaSiamRPN(
         kernel_cls1_path=args.kernel_cls1_path,
         kernel_r1_path=args.kernel_r1_path,
         model_path=args.model_path,
-    )
     # Read from args.input
     _input = args.input

 from dasiamrpn import DaSiamRPN
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(
     description="Distractor-aware Siamese Networks for Visual Object Tracking (https://arxiv.org/abs/1808.06048)")
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set path to the input video. Omit for using default camera.')
+parser.add_argument('--model_path', type=str, default='object_tracking_dasiamrpn_model_2021nov.onnx',
+                    help='Usage: Set model path, defaults to object_tracking_dasiamrpn_model_2021nov.onnx.')
+parser.add_argument('--kernel_cls1_path', type=str, default='object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx',
+                    help='Usage: Set path to dasiamrpn_kernel_cls1.onnx.')
+parser.add_argument('--kernel_r1_path', type=str, default='object_tracking_dasiamrpn_kernel_r1_2021nov.onnx',
+                    help='Usage: Set path to dasiamrpn_kernel_r1.onnx.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save a file with results. Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
     return output
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate DaSiamRPN
     model = DaSiamRPN(
         kernel_cls1_path=args.kernel_cls1_path,
         kernel_r1_path=args.kernel_r1_path,
         model_path=args.model_path,
+        backend_id=backend_id,
+        target_id=target_id)
     # Read from args.input
     _input = args.input

models/palm_detection_mediapipe/demo.py CHANGED Viewed

@@ -5,35 +5,40 @@ import cv2 as cv
 from mp_palmdet import MPPalmDet
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe')
-parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
-parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2023feb.onnx', help='Usage: Set model path, defaults to palm_detection_mediapipe_2023feb.onnx.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--score_threshold', type=float, default=0.8, help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.8. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.')
-parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
-parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, results, print_results=False, fps=None):
@@ -71,12 +76,15 @@ def visualize(image, results, print_results=False, fps=None):
     return output
 if __name__ == '__main__':
     # Instantiate MPPalmDet
     model = MPPalmDet(modelPath=args.model,
                       nmsThreshold=args.nms_threshold,
                       scoreThreshold=args.score_threshold,
-                      backendId=args.backend,
-                      targetId=args.target)
     # If input is an image
     if args.input is not None:
@@ -123,4 +131,3 @@ if __name__ == '__main__':
             cv.imshow('MPPalmDet Demo', frame)
             tm.reset()

 from mp_palmdet import MPPalmDet
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe')
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2023feb.onnx',
+                    help='Usage: Set model path, defaults to palm_detection_mediapipe_2023feb.onnx.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--score_threshold', type=float, default=0.8,
+                    help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.8. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.')
+parser.add_argument('--nms_threshold', type=float, default=0.3,
+                    help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, results, print_results=False, fps=None):
     return output
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate MPPalmDet
     model = MPPalmDet(modelPath=args.model,
                       nmsThreshold=args.nms_threshold,
                       scoreThreshold=args.score_threshold,
+                      backendId=backend_id,
+                      targetId=target_id)
     # If input is an image
     if args.input is not None:
             cv.imshow('MPPalmDet Demo', frame)
             tm.reset()

models/palm_detection_mediapipe/mp_palmdet.py CHANGED Viewed

@@ -22,12 +22,10 @@ class MPPalmDet:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backendId):
         self.backend_id = backendId
-        self.model.setPreferableBackend(self.backend_id)
-    def setTarget(self, targetId):
         self.target_id = targetId
         self.model.setPreferableTarget(self.target_id)
     def _preprocess(self, image):
@@ -35,7 +33,7 @@ class MPPalmDet:
         ratio = min(self.input_size / image.shape[:2])
         if image.shape[0] != self.input_size[0] or image.shape[1] != self.input_size[1]:
             # keep aspect ratio when resize
-            ratio_size = (np.array(image.shape[:2]) * ratio).astype(np.int)
             image = cv.resize(image, (ratio_size[1], ratio_size[0]))
             pad_h = self.input_size[0] - ratio_size[0]
             pad_w = self.input_size[1] - ratio_size[1]
@@ -46,7 +44,7 @@ class MPPalmDet:
             image = cv.copyMakeBorder(image, top, bottom, left, right, cv.BORDER_CONSTANT, None, (0, 0, 0))
         image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
         image = image.astype(np.float32) / 255.0 # norm
-        pad_bias = (pad_bias / ratio).astype(np.int)
         return image[np.newaxis, :, :, :], pad_bias # hwc -> nhwc
     def infer(self, image):

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
         self.backend_id = backendId
         self.target_id = targetId
+        self.model.setPreferableBackend(self.backend_id)
         self.model.setPreferableTarget(self.target_id)
     def _preprocess(self, image):
         ratio = min(self.input_size / image.shape[:2])
         if image.shape[0] != self.input_size[0] or image.shape[1] != self.input_size[1]:
             # keep aspect ratio when resize
+            ratio_size = (np.array(image.shape[:2]) * ratio).astype(np.int32)
             image = cv.resize(image, (ratio_size[1], ratio_size[0]))
             pad_h = self.input_size[0] - ratio_size[0]
             pad_w = self.input_size[1] - ratio_size[1]
             image = cv.copyMakeBorder(image, top, bottom, left, right, cv.BORDER_CONSTANT, None, (0, 0, 0))
         image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
         image = image.astype(np.float32) / 255.0 # norm
+        pad_bias = (pad_bias / ratio).astype(np.int32)
         return image[np.newaxis, :, :, :], pad_bias # hwc -> nhwc
     def infer(self, image):

models/person_reid_youtureid/demo.py CHANGED Viewed

@@ -12,36 +12,41 @@ import cv2 as cv
 from youtureid import YoutuReID
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(
     description="ReID baseline models from Tencent Youtu Lab")
-parser.add_argument('--query_dir', '-q', type=str, help='Query directory.')
-parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.')
-parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.')
-parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 args = parser.parse_args()
 def readImageFromDirectory(img_dir, w=128, h=256):
@@ -78,8 +83,11 @@ def visualize(results, query_dir, gallery_dir, output_size=(128, 384)):
     return results_vis
 if __name__ == '__main__':
     # Instantiate YoutuReID for person ReID
-    net = YoutuReID(modelPath=args.model, backendId=args.backend, targetId=args.target)
     # Read images from dir
     query_img_list, query_file_list = readImageFromDirectory(args.query_dir)

 from youtureid import YoutuReID
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(
     description="ReID baseline models from Tencent Youtu Lab")
+parser.add_argument('--query_dir', '-q', type=str,
+                    help='Query directory.')
+parser.add_argument('--gallery_dir', '-g', type=str,
+                    help='Gallery directory.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--topk', type=int, default=10,
+                    help='Top-K closest from gallery for each query.')
+parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx',
+                    help='Path to the model.')
+parser.add_argument('--save', '-s', type=str2bool, default=False,
+                    help='Set true to save results. This flag is invalid when using camera.')
+parser.add_argument('--vis', '-v', type=str2bool, default=True,
+                    help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 args = parser.parse_args()
 def readImageFromDirectory(img_dir, w=128, h=256):
     return results_vis
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate YoutuReID for person ReID
+    net = YoutuReID(modelPath=args.model, backendId=backend_id, targetId=target_id)
     # Read images from dir
     query_img_list, query_file_list = readImageFromDirectory(args.query_dir)

models/person_reid_youtureid/youtureid.py CHANGED Viewed

@@ -26,12 +26,10 @@ class YoutuReID:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backend_id):
-        self._backendId = backend_id
         self._model.setPreferableBackend(self._backendId)
-    def setTarget(self, target_id):
-        self._targetId = target_id
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image):
@@ -67,4 +65,3 @@ class YoutuReID:
         dist = np.matmul(query_arr, gallery_arr.T)
         idx = np.argsort(-dist, axis=1)
         return [i[0:topK] for i in idx]

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self._model.setPreferableBackend(self._backendId)
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image):
         dist = np.matmul(query_arr, gallery_arr.T)
         idx = np.argsort(-dist, axis=1)
         return [i[0:topK] for i in idx]

models/qrcode_wechatqrcode/demo.py CHANGED Viewed

@@ -11,23 +11,43 @@ import cv2 as cv
 from wechatqrcode import WeChatQRCode
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
 parser = argparse.ArgumentParser(
     description="WeChat QR code detector for detecting and parsing QR code (https://github.com/opencv/opencv_contrib/tree/master/modules/wechat_qrcode)")
-parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
-parser.add_argument('--detect_prototxt_path', type=str, default='detect_2021sep.prototxt', help='Usage: Set path to detect.prototxt.')
-parser.add_argument('--detect_model_path', type=str, default='detect_2021sep.caffemodel', help='Usage: Set path to detect.caffemodel.')
-parser.add_argument('--sr_prototxt_path', type=str, default='sr_2021sep.prototxt', help='Usage: Set path to sr.prototxt.')
-parser.add_argument('--sr_model_path', type=str, default='sr_2021sep.caffemodel', help='Usage: Set path to sr.caffemodel.')
-parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255, 0), fps=None):
@@ -56,11 +76,16 @@ def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255,
 if __name__ == '__main__':
     # Instantiate WeChatQRCode
     model = WeChatQRCode(args.detect_prototxt_path,
         args.detect_model_path,
         args.sr_prototxt_path,
-        args.sr_model_path)
     # If input is an image:
     if args.input is not None:
@@ -107,4 +132,4 @@ if __name__ == '__main__':
             # Visualize results in a new window
             cv.imshow('WeChatQRCode Demo', frame)
-            tm.reset()

 from wechatqrcode import WeChatQRCode
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(
     description="WeChat QR code detector for detecting and parsing QR code (https://github.com/opencv/opencv_contrib/tree/master/modules/wechat_qrcode)")
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set path to the input image. Omit for using default camera.')
+parser.add_argument('--detect_prototxt_path', type=str, default='detect_2021sep.prototxt',
+                    help='Usage: Set path to detect.prototxt.')
+parser.add_argument('--detect_model_path', type=str, default='detect_2021sep.caffemodel',
+                    help='Usage: Set path to detect.caffemodel.')
+parser.add_argument('--sr_prototxt_path', type=str, default='sr_2021sep.prototxt',
+                    help='Usage: Set path to sr.prototxt.')
+parser.add_argument('--sr_model_path', type=str, default='sr_2021sep.caffemodel',
+                    help='Usage: Set path to sr.caffemodel.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255, 0), fps=None):
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate WeChatQRCode
     model = WeChatQRCode(args.detect_prototxt_path,
         args.detect_model_path,
         args.sr_prototxt_path,
+        args.sr_model_path,
+        backendId=backend_id,
+        targetId=target_id)
     # If input is an image:
     if args.input is not None:
             # Visualize results in a new window
             cv.imshow('WeChatQRCode Demo', frame)
+            tm.reset()

models/qrcode_wechatqrcode/wechatqrcode.py CHANGED Viewed

@@ -8,27 +8,27 @@ import numpy as np
 import cv2 as cv # needs to have cv.wechat_qrcode_WeChatQRCode, which requires compile from source with opencv_contrib/modules/wechat_qrcode
 class WeChatQRCode:
-    def __init__(self, detect_prototxt_path, detect_model_path, sr_prototxt_path, sr_model_path):
         self._model = cv.wechat_qrcode_WeChatQRCode(
             detect_prototxt_path,
             detect_model_path,
             sr_prototxt_path,
             sr_model_path
         )
     @property
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backend_id):
-        # self._model.setPreferableBackend(backend_id)
-        if backend_id != 0:
-            raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
-    def setTarget(self, target_id):
-        # self._model.setPreferableTarget(target_id)
-        if target_id != 0:
             raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
     def infer(self, image):
-        return self._model.detectAndDecode(image)

 import cv2 as cv # needs to have cv.wechat_qrcode_WeChatQRCode, which requires compile from source with opencv_contrib/modules/wechat_qrcode
 class WeChatQRCode:
+    def __init__(self, detect_prototxt_path, detect_model_path, sr_prototxt_path, sr_model_path, backendId=0, targetId=0):
         self._model = cv.wechat_qrcode_WeChatQRCode(
             detect_prototxt_path,
             detect_model_path,
             sr_prototxt_path,
             sr_model_path
         )
+        if backendId != 0:
+            raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()".format(backendId))
+        if targetId != 0:
+            raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
     @property
     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        if backendId != 0:
+            raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()".format(backendId))
+        if targetId != 0:
             raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
     def infer(self, image):
+        return self._model.detectAndDecode(image)

models/text_detection_db/db.py CHANGED Viewed

@@ -38,12 +38,10 @@ class DB:
     def name(self):
         return self.__class__.__name__
-    def setBackend(self, backend):
-        self._backendId = backend
         self._model.setPreferableBackend(self._backendId)
-    def setTarget(self, target):
-        self._targetId = target
         self._model.setPreferableTarget(self._targetId)
     def setInputSize(self, input_size):
@@ -55,4 +53,3 @@ class DB:
         assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
         return self._model.detect(image)

     def name(self):
         return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self._model.setPreferableBackend(self._backendId)
         self._model.setPreferableTarget(self._targetId)
     def setInputSize(self, input_size):
         assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
         return self._model.detect(image)

models/text_detection_db/demo.py CHANGED Viewed

@@ -11,41 +11,48 @@ import cv2 as cv
 from db import DB
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
-parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
-parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 parser.add_argument('--width', type=int, default=736,
                     help='Usage: Resize input image to certain width, default = 736. It should be multiple by 32.')
 parser.add_argument('--height', type=int, default=736,
                     help='Usage: Resize input image to certain height, default = 736. It should be multiple by 32.')
-parser.add_argument('--binary_threshold', type=float, default=0.3, help='Usage: Threshold of the binary map, default = 0.3.')
-parser.add_argument('--polygon_threshold', type=float, default=0.5, help='Usage: Threshold of polygons, default = 0.5.')
-parser.add_argument('--max_candidates', type=int, default=200, help='Usage: Set maximum number of polygon candidates, default = 200.')
-parser.add_argument('--unclip_ratio', type=np.float64, default=2.0, help=' Usage: The unclip ratio of the detected text region, which determines the output size, default = 2.0.')
-parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None):
@@ -60,6 +67,9 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isC
     return output
 if __name__ == '__main__':
     # Instantiate DB
     model = DB(modelPath=args.model,
                inputSize=[args.width, args.height],
@@ -67,9 +77,8 @@ if __name__ == '__main__':
                polygonThreshold=args.polygon_threshold,
                maxCandidates=args.max_candidates,
                unclipRatio=args.unclip_ratio,
-               backendId=args.backend,
-               targetId=args.target
-    )
     # If input is an image
     if args.input is not None:
@@ -143,4 +152,3 @@ if __name__ == '__main__':
             cv.imshow('{} Demo'.format(model.name), original_image)
             tm.reset()

 from db import DB
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx',
+                    help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
 parser.add_argument('--width', type=int, default=736,
                     help='Usage: Resize input image to certain width, default = 736. It should be multiple by 32.')
 parser.add_argument('--height', type=int, default=736,
                     help='Usage: Resize input image to certain height, default = 736. It should be multiple by 32.')
+parser.add_argument('--binary_threshold', type=float, default=0.3,
+                    help='Usage: Threshold of the binary map, default = 0.3.')
+parser.add_argument('--polygon_threshold', type=float, default=0.5,
+                    help='Usage: Threshold of polygons, default = 0.5.')
+parser.add_argument('--max_candidates', type=int, default=200,
+                    help='Usage: Set maximum number of polygon candidates, default = 200.')
+parser.add_argument('--unclip_ratio', type=np.float64, default=2.0,
+                    help=' Usage: The unclip ratio of the detected text region, which determines the output size, default = 2.0.')
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None):
     return output
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate DB
     model = DB(modelPath=args.model,
                inputSize=[args.width, args.height],
                polygonThreshold=args.polygon_threshold,
                maxCandidates=args.max_candidates,
                unclipRatio=args.unclip_ratio,
+               backendId=backend_id,
+               targetId=target_id)
     # If input is an image
     if args.input is not None:
             cv.imshow('{} Demo'.format(model.name), original_image)
             tm.reset()

models/text_recognition_crnn/crnn.py CHANGED Viewed

@@ -43,12 +43,10 @@ class CRNN:
     def _load_charset(self, charset):
         return ''.join(charset.splitlines())
-    def setBackend(self, backend_id):
-        self._backendId = backend_id
         self._model.setPreferableBackend(self._backendId)
-    def setTarget(self, target_id):
-        self._targetId = target_id
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image, rbbox):

     def _load_charset(self, charset):
         return ''.join(charset.splitlines())
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
         self._model.setPreferableBackend(self._backendId)
         self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image, rbbox):

models/text_recognition_crnn/demo.py CHANGED Viewed

@@ -15,38 +15,41 @@ from crnn import CRNN
 sys.path.append('../text_detection_db')
 from db import DB
-def str2bool(v):
-    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
-        return True
-    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
-        return False
-    else:
-        raise NotImplementedError
-backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
-targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
-help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
-help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
-try:
-    backends += [cv.dnn.DNN_BACKEND_TIMVX]
-    targets += [cv.dnn.DNN_TARGET_NPU]
-    help_msg_backends += "; {:d}: TIMVX"
-    help_msg_targets += "; {:d}: NPU"
-except:
-    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 parser = argparse.ArgumentParser(
     description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
-parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
-parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.')
-parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
-parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 parser.add_argument('--width', type=int, default=736,
                     help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
 parser.add_argument('--height', type=int, default=736,
                     help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
 args = parser.parse_args()
 def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
@@ -59,8 +62,9 @@ def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2
     return output
 if __name__ == '__main__':
-    # Instantiate CRNN for text recognition
-    recognizer = CRNN(modelPath=args.model)
     # Instantiate DB for text detection
     detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
                   inputSize=[args.width, args.height],
@@ -68,9 +72,10 @@ if __name__ == '__main__':
                   polygonThreshold=0.5,
                   maxCandidates=200,
                   unclipRatio=2.0,
-                  backendId=args.backend,
-                  targetId=args.target
-    )
     # If input is an image
     if args.input is not None:
@@ -161,4 +166,3 @@ if __name__ == '__main__':
             # Visualize results in a new Window
             cv.imshow('{} Demo'.format(recognizer.name), original_image)

 sys.path.append('../text_detection_db')
 from db import DB
+# Check OpenCV version
+assert cv.__version__ >= "4.7.0", \
+       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(
     description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
+parser.add_argument('--input', '-i', type=str,
+                    help='Usage: Set path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx',
+                    help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
 parser.add_argument('--width', type=int, default=736,
                     help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
 parser.add_argument('--height', type=int, default=736,
                     help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save a file with results. Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
     return output
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate DB for text detection
     detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
                   inputSize=[args.width, args.height],
                   polygonThreshold=0.5,
                   maxCandidates=200,
                   unclipRatio=2.0,
+                  backendId=backend_id,
+                  targetId=target_id)
+    # Instantiate CRNN for text recognition
+    recognizer = CRNN(modelPath=args.model, backendId=backend_id, targetId=target_id)
     # If input is an image
     if args.input is not None:
             # Visualize results in a new Window
             cv.imshow('{} Demo'.format(recognizer.name), original_image)