ytfeng commited on
Commit
a07f7bd
·
1 Parent(s): 584bcfa

Limit combinations of backends and targets in demos and benchmark (#145)

Browse files

* limit backend and target combination in demos and benchmark

* simpler version checking

Files changed (33) hide show
  1. benchmark/benchmark.py +35 -14
  2. models/face_detection_yunet/demo.py +39 -31
  3. models/face_detection_yunet/yunet.py +1 -13
  4. models/face_recognition_sface/demo.py +35 -30
  5. models/face_recognition_sface/sface.py +1 -11
  6. models/facial_expression_recognition/demo.py +33 -30
  7. models/facial_expression_recognition/facial_fer_model.py +3 -5
  8. models/handpose_estimation_mediapipe/demo.py +36 -30
  9. models/handpose_estimation_mediapipe/mp_handpose.py +3 -6
  10. models/human_segmentation_pphumanseg/demo.py +31 -26
  11. models/human_segmentation_pphumanseg/pphumanseg.py +3 -6
  12. models/image_classification_mobilenet/demo.py +26 -30
  13. models/image_classification_mobilenet/mobilenet.py +3 -5
  14. models/image_classification_ppresnet/demo.py +27 -24
  15. models/image_classification_ppresnet/ppresnet.py +3 -5
  16. models/license_plate_detection_yunet/demo.py +41 -32
  17. models/license_plate_detection_yunet/lpd_yunet.py +2 -4
  18. models/object_detection_nanodet/demo.py +56 -49
  19. models/object_detection_nanodet/nanodet.py +3 -5
  20. models/object_detection_yolox/demo.py +58 -50
  21. models/object_detection_yolox/yolox.py +3 -5
  22. models/object_tracking_dasiamrpn/dasiamrpn.py +4 -12
  23. models/object_tracking_dasiamrpn/demo.py +37 -14
  24. models/palm_detection_mediapipe/demo.py +37 -30
  25. models/palm_detection_mediapipe/mp_palmdet.py +4 -6
  26. models/person_reid_youtureid/demo.py +36 -28
  27. models/person_reid_youtureid/youtureid.py +3 -6
  28. models/qrcode_wechatqrcode/demo.py +41 -16
  29. models/qrcode_wechatqrcode/wechatqrcode.py +10 -10
  30. models/text_detection_db/db.py +3 -6
  31. models/text_detection_db/demo.py +41 -33
  32. models/text_recognition_crnn/crnn.py +3 -5
  33. models/text_recognition_crnn/demo.py +35 -31
benchmark/benchmark.py CHANGED
@@ -8,9 +8,31 @@ import cv2 as cv
8
  from models import MODELS
9
  from utils import METRICS, DATALOADERS
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.")
12
  parser.add_argument('--cfg', '-c', type=str,
13
  help='Benchmarking on the given config.')
 
 
 
 
 
 
 
 
 
14
  parser.add_argument("--fp32", action="store_true", help="Runs models of float32 precision only.")
15
  parser.add_argument("--fp16", action="store_true", help="Runs models of float16 precision only.")
16
  parser.add_argument("--int8", action="store_true", help="Runs models of int8 precision only.")
@@ -56,6 +78,8 @@ class Benchmark:
56
  opencv=cv.dnn.DNN_BACKEND_OPENCV,
57
  # vkcom=cv.dnn.DNN_BACKEND_VKCOM,
58
  cuda=cv.dnn.DNN_BACKEND_CUDA,
 
 
59
  )
60
 
61
  target_id = kwargs.pop('target', 'cpu')
@@ -69,28 +93,20 @@ class Benchmark:
69
  cuda=cv.dnn.DNN_TARGET_CUDA,
70
  cuda_fp16=cv.dnn.DNN_TARGET_CUDA_FP16,
71
  # hddl=cv.dnn.DNN_TARGET_HDDL,
 
72
  )
73
 
74
- # add extra backends & targets
75
- try:
76
- available_backends['timvx'] = cv.dnn.DNN_BACKEND_TIMVX
77
- available_targets['npu'] = cv.dnn.DNN_TARGET_NPU
78
- except:
79
- print('OpenCV is not compiled with TIM-VX backend enbaled. See https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more details on how to enable TIM-VX backend.')
80
- try:
81
- available_backends['cann'] = cv.dnn.DNN_BACKEND_CANN
82
- available_targets['npu'] = cv.dnn.DNN_TARGET_NPU
83
- except:
84
- print('OpenCV is not compiled with CANN backend enabled. See https://github.com/opencv/opencv/wiki/Huawei-CANN-Backend for more details on how to enable CANN backend.')
85
-
86
  self._backend = available_backends[backend_id]
87
  self._target = available_targets[target_id]
88
 
89
  self._benchmark_results = dict()
90
 
 
 
 
 
91
  def run(self, model):
92
- model.setBackend(self._backend)
93
- model.setTarget(self._target)
94
 
95
  for idx, data in enumerate(self._dataloader):
96
  filename, input_data = data[:2]
@@ -118,6 +134,11 @@ if __name__ == '__main__':
118
  # Instantiate benchmark
119
  benchmark = Benchmark(**cfg['Benchmark'])
120
 
 
 
 
 
 
121
  # Instantiate model
122
  model_config = cfg['Model']
123
  model_handler, model_paths = MODELS.get(model_config.pop('name'))
 
8
  from models import MODELS
9
  from utils import METRICS, DATALOADERS
10
 
11
+ # Check OpenCV version
12
+ assert cv.__version__ >= "4.7.0", \
13
+ "Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python"
14
+
15
+ # Valid combinations of backends and targets
16
+ backend_target_pairs = [
17
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
18
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
19
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
20
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
21
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
22
+ ]
23
+
24
  parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.")
25
  parser.add_argument('--cfg', '-c', type=str,
26
  help='Benchmarking on the given config.')
27
+ parser.add_argument('--cfg_overwrite_backend_target', type=int, default=-1,
28
+ help='''Choose one of the backend-target pair to run this demo:
29
+ others: (default) use the one from config,
30
+ {:d}: OpenCV implementation + CPU,
31
+ {:d}: CUDA + GPU (CUDA),
32
+ {:d}: CUDA + GPU (CUDA FP16),
33
+ {:d}: TIM-VX + NPU,
34
+ {:d}: CANN + NPU
35
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
36
  parser.add_argument("--fp32", action="store_true", help="Runs models of float32 precision only.")
37
  parser.add_argument("--fp16", action="store_true", help="Runs models of float16 precision only.")
38
  parser.add_argument("--int8", action="store_true", help="Runs models of int8 precision only.")
 
78
  opencv=cv.dnn.DNN_BACKEND_OPENCV,
79
  # vkcom=cv.dnn.DNN_BACKEND_VKCOM,
80
  cuda=cv.dnn.DNN_BACKEND_CUDA,
81
+ timvx=cv.dnn.DNN_BACKEND_TIMVX,
82
+ cann=cv.dnn.DNN_BACKEND_CANN,
83
  )
84
 
85
  target_id = kwargs.pop('target', 'cpu')
 
93
  cuda=cv.dnn.DNN_TARGET_CUDA,
94
  cuda_fp16=cv.dnn.DNN_TARGET_CUDA_FP16,
95
  # hddl=cv.dnn.DNN_TARGET_HDDL,
96
+ npu=cv.dnn.DNN_TARGET_NPU,
97
  )
98
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  self._backend = available_backends[backend_id]
100
  self._target = available_targets[target_id]
101
 
102
  self._benchmark_results = dict()
103
 
104
+ def setBackendAndTarget(self, backend_id, target_id):
105
+ self._backend = backend_id
106
+ self._target = target_id
107
+
108
  def run(self, model):
109
+ model.setBackendAndTarget(self._backend, self._target)
 
110
 
111
  for idx, data in enumerate(self._dataloader):
112
  filename, input_data = data[:2]
 
134
  # Instantiate benchmark
135
  benchmark = Benchmark(**cfg['Benchmark'])
136
 
137
+ if args.cfg_overwrite_backend_target >= 0:
138
+ backend_id = backend_target_pairs[args.backend_target][0]
139
+ target_id = backend_target_pairs[args.backend_target][1]
140
+ benchmark.setBackendAndTarget(backend_id, target_id)
141
+
142
  # Instantiate model
143
  model_config = cfg['Model']
144
  model_handler, model_paths = MODELS.get(model_config.pop('name'))
models/face_detection_yunet/demo.py CHANGED
@@ -11,36 +11,42 @@ import cv2 as cv
11
 
12
  from yunet import YuNet
13
 
14
- def str2bool(v):
15
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
16
- return True
17
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
18
- return False
19
- else:
20
- raise NotImplementedError
21
-
22
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
23
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
24
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
25
- help_msg_targets = "Choose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
26
- try:
27
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
28
- targets += [cv.dnn.DNN_TARGET_NPU]
29
- help_msg_backends += "; {:d}: TIMVX"
30
- help_msg_targets += "; {:d}: NPU"
31
- except:
32
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
33
 
34
  parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
35
- parser.add_argument('--input', '-i', type=str, help='Usage: Set input to a certain image, omit if using camera.')
36
- parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2022mar.onnx', help="Usage: Set model type, defaults to 'face_detection_yunet_2022mar.onnx'.")
37
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
38
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
39
- parser.add_argument('--conf_threshold', type=float, default=0.9, help='Usage: Set the minimum needed confidence for the model to identify a face, defauts to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.')
40
- parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
41
- parser.add_argument('--top_k', type=int, default=5000, help='Usage: Keep top_k bounding boxes before NMS.')
42
- parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
43
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  args = parser.parse_args()
45
 
46
  def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
@@ -70,14 +76,17 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps
70
  return output
71
 
72
  if __name__ == '__main__':
 
 
 
73
  # Instantiate YuNet
74
  model = YuNet(modelPath=args.model,
75
  inputSize=[320, 320],
76
  confThreshold=args.conf_threshold,
77
  nmsThreshold=args.nms_threshold,
78
  topK=args.top_k,
79
- backendId=args.backend,
80
- targetId=args.target)
81
 
82
  # If input is an image
83
  if args.input is not None:
@@ -134,4 +143,3 @@ if __name__ == '__main__':
134
  cv.imshow('YuNet Demo', frame)
135
 
136
  tm.reset()
137
-
 
11
 
12
  from yunet import YuNet
13
 
14
+ # Check OpenCV version
15
+ assert cv.__version__ >= "4.7.0", \
16
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
17
+
18
+ # Valid combinations of backends and targets
19
+ backend_target_pairs = [
20
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
21
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
22
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
23
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
24
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
25
+ ]
 
 
 
 
 
 
 
26
 
27
  parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
28
+ parser.add_argument('--input', '-i', type=str,
29
+ help='Usage: Set input to a certain image, omit if using camera.')
30
+ parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2022mar.onnx',
31
+ help="Usage: Set model type, defaults to 'face_detection_yunet_2022mar.onnx'.")
32
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
33
+ help='''Choose one of the backend-target pair to run this demo:
34
+ {:d}: (default) OpenCV implementation + CPU,
35
+ {:d}: CUDA + GPU (CUDA),
36
+ {:d}: CUDA + GPU (CUDA FP16),
37
+ {:d}: TIM-VX + NPU,
38
+ {:d}: CANN + NPU
39
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
40
+ parser.add_argument('--conf_threshold', type=float, default=0.9,
41
+ help='Usage: Set the minimum needed confidence for the model to identify a face, defauts to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.')
42
+ parser.add_argument('--nms_threshold', type=float, default=0.3,
43
+ help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
44
+ parser.add_argument('--top_k', type=int, default=5000,
45
+ help='Usage: Keep top_k bounding boxes before NMS.')
46
+ parser.add_argument('--save', '-s', action='store_true',
47
+ help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
48
+ parser.add_argument('--vis', '-v', action='store_true',
49
+ help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
50
  args = parser.parse_args()
51
 
52
  def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
 
76
  return output
77
 
78
  if __name__ == '__main__':
79
+ backend_id = backend_target_pairs[args.backend_target][0]
80
+ target_id = backend_target_pairs[args.backend_target][1]
81
+
82
  # Instantiate YuNet
83
  model = YuNet(modelPath=args.model,
84
  inputSize=[320, 320],
85
  confThreshold=args.conf_threshold,
86
  nmsThreshold=args.nms_threshold,
87
  topK=args.top_k,
88
+ backendId=backend_id,
89
+ targetId=target_id)
90
 
91
  # If input is an image
92
  if args.input is not None:
 
143
  cv.imshow('YuNet Demo', frame)
144
 
145
  tm.reset()
 
models/face_detection_yunet/yunet.py CHANGED
@@ -33,19 +33,8 @@ class YuNet:
33
  def name(self):
34
  return self.__class__.__name__
35
 
36
- def setBackend(self, backendId):
37
  self._backendId = backendId
38
- self._model = cv.FaceDetectorYN.create(
39
- model=self._modelPath,
40
- config="",
41
- input_size=self._inputSize,
42
- score_threshold=self._confThreshold,
43
- nms_threshold=self._nmsThreshold,
44
- top_k=self._topK,
45
- backend_id=self._backendId,
46
- target_id=self._targetId)
47
-
48
- def setTarget(self, targetId):
49
  self._targetId = targetId
50
  self._model = cv.FaceDetectorYN.create(
51
  model=self._modelPath,
@@ -64,4 +53,3 @@ class YuNet:
64
  # Forward
65
  faces = self._model.detect(image)
66
  return faces[1]
67
-
 
33
  def name(self):
34
  return self.__class__.__name__
35
 
36
+ def setBackendAndTarget(self, backendId, targetId):
37
  self._backendId = backendId
 
 
 
 
 
 
 
 
 
 
 
38
  self._targetId = targetId
39
  self._model = cv.FaceDetectorYN.create(
40
  model=self._modelPath,
 
53
  # Forward
54
  faces = self._model.detect(image)
55
  return faces[1]
 
models/face_recognition_sface/demo.py CHANGED
@@ -15,49 +15,55 @@ from sface import SFace
15
  sys.path.append('../face_detection_yunet')
16
  from yunet import YuNet
17
 
18
- def str2bool(v):
19
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
20
- return True
21
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
22
- return False
23
- else:
24
- raise NotImplementedError
25
 
26
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
27
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
28
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA \n Usage: Set backend DNN model, defaults to cv.dnn.DNN_BACKEND_OPENCV (int = 0). Based on your OpenCV version, it may or may not support cv.dnn.DNN_BACKEND_TIMVX. More details: [https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f]"
29
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
30
- try:
31
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
32
- targets += [cv.dnn.DNN_TARGET_NPU]
33
- help_msg_backends += "; {:d}: TIMVX"
34
- help_msg_targets += "; {:d}: NPU"
35
- except:
36
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
37
 
38
  parser = argparse.ArgumentParser(
39
  description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
40
- parser.add_argument('--input1', '-i1', type=str, help='Usage: Set path to the input image 1 (original face).')
41
- parser.add_argument('--input2', '-i2', type=str, help='Usage: Set path to the input image 2 (comparison face).')
42
- parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Usage: Set model path, defaults to face_recognition_sface_2021dec.onnx.')
43
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
44
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
45
- parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Usage: Distance type. \'0\': cosine, \'1\': norm_l1. Defaults to \'0\'')
46
- parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
47
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 
 
 
 
 
 
 
 
48
  args = parser.parse_args()
49
 
50
  if __name__ == '__main__':
 
 
51
  # Instantiate SFace for face recognition
52
- recognizer = SFace(modelPath=args.model, disType=args.dis_type, backendId=args.backend, targetId=args.target)
 
 
 
53
  # Instantiate YuNet for face detection
54
  detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx',
55
  inputSize=[320, 320],
56
  confThreshold=0.9,
57
  nmsThreshold=0.3,
58
  topK=5000,
59
- backendId=args.backend,
60
- targetId=args.target)
61
 
62
  img1 = cv.imread(args.input1)
63
  img2 = cv.imread(args.input2)
@@ -73,4 +79,3 @@ if __name__ == '__main__':
73
  # Match
74
  result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
75
  print('Result: {}.'.format('same identity' if result else 'different identities'))
76
-
 
15
  sys.path.append('../face_detection_yunet')
16
  from yunet import YuNet
17
 
18
+ # Check OpenCV version
19
+ assert cv.__version__ >= "4.7.0", \
20
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
 
 
 
 
21
 
22
+ # Valid combinations of backends and targets
23
+ backend_target_pairs = [
24
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
25
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
26
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
27
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
28
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
29
+ ]
 
 
 
30
 
31
  parser = argparse.ArgumentParser(
32
  description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
33
+ parser.add_argument('--input1', '-i1', type=str,
34
+ help='Usage: Set path to the input image 1 (original face).')
35
+ parser.add_argument('--input2', '-i2', type=str,
36
+ help='Usage: Set path to the input image 2 (comparison face).')
37
+ parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx',
38
+ help='Usage: Set model path, defaults to face_recognition_sface_2021dec.onnx.')
39
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
40
+ help='''Choose one of the backend-target pair to run this demo:
41
+ {:d}: (default) OpenCV implementation + CPU,
42
+ {:d}: CUDA + GPU (CUDA),
43
+ {:d}: CUDA + GPU (CUDA FP16),
44
+ {:d}: TIM-VX + NPU,
45
+ {:d}: CANN + NPU
46
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
47
+ parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0,
48
+ help='Usage: Distance type. \'0\': cosine, \'1\': norm_l1. Defaults to \'0\'')
49
  args = parser.parse_args()
50
 
51
  if __name__ == '__main__':
52
+ backend_id = backend_target_pairs[args.backend_target][0]
53
+ target_id = backend_target_pairs[args.backend_target][1]
54
  # Instantiate SFace for face recognition
55
+ recognizer = SFace(modelPath=args.model,
56
+ disType=args.dis_type,
57
+ backendId=backend_id,
58
+ targetId=target_id)
59
  # Instantiate YuNet for face detection
60
  detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx',
61
  inputSize=[320, 320],
62
  confThreshold=0.9,
63
  nmsThreshold=0.3,
64
  topK=5000,
65
+ backendId=backend_id,
66
+ targetId=target_id)
67
 
68
  img1 = cv.imread(args.input1)
69
  img2 = cv.imread(args.input2)
 
79
  # Match
80
  result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
81
  print('Result: {}.'.format('same identity' if result else 'different identities'))
 
models/face_recognition_sface/sface.py CHANGED
@@ -7,8 +7,6 @@
7
  import numpy as np
8
  import cv2 as cv
9
 
10
- from _testcapi import FLT_MIN
11
-
12
  class SFace:
13
  def __init__(self, modelPath, disType=0, backendId=0, targetId=0):
14
  self._modelPath = modelPath
@@ -30,15 +28,8 @@ class SFace:
30
  def name(self):
31
  return self.__class__.__name__
32
 
33
- def setBackend(self, backendId):
34
  self._backendId = backendId
35
- self._model = cv.FaceRecognizerSF.create(
36
- model=self._modelPath,
37
- config="",
38
- backend_id=self._backendId,
39
- target_id=self._targetId)
40
-
41
- def setTarget(self, targetId):
42
  self._targetId = targetId
43
  self._model = cv.FaceRecognizerSF.create(
44
  model=self._modelPath,
@@ -70,4 +61,3 @@ class SFace:
70
  else: # NORM_L2
71
  norml2_distance = self._model.match(feature1, feature2, self._disType)
72
  return 1 if norml2_distance <= self._threshold_norml2 else 0
73
-
 
7
  import numpy as np
8
  import cv2 as cv
9
 
 
 
10
  class SFace:
11
  def __init__(self, modelPath, disType=0, backendId=0, targetId=0):
12
  self._modelPath = modelPath
 
28
  def name(self):
29
  return self.__class__.__name__
30
 
31
+ def setBackendAndTarget(self, backendId, targetId):
32
  self._backendId = backendId
 
 
 
 
 
 
 
33
  self._targetId = targetId
34
  self._model = cv.FaceRecognizerSF.create(
35
  model=self._modelPath,
 
61
  else: # NORM_L2
62
  norml2_distance = self._model.match(feature1, feature2, self._disType)
63
  return 1 if norml2_distance <= self._threshold_norml2 else 0
 
models/facial_expression_recognition/demo.py CHANGED
@@ -11,38 +11,38 @@ from facial_fer_model import FacialExpressionRecog
11
  sys.path.append('../face_detection_yunet')
12
  from yunet import YuNet
13
 
14
-
15
- def str2bool(v):
16
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
17
- return True
18
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
19
- return False
20
- else:
21
- raise NotImplementedError
22
-
23
-
24
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
25
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
26
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
27
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
28
- try:
29
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
30
- targets += [cv.dnn.DNN_TARGET_NPU]
31
- help_msg_backends += "; {:d}: TIMVX"
32
- help_msg_targets += "; {:d}: NPU"
33
- except:
34
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
35
 
36
  parser = argparse.ArgumentParser(description='Facial Expression Recognition')
37
- parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
38
- parser.add_argument('--model', '-m', type=str, default='./facial_expression_recognition_mobilefacenet_2022july.onnx', help='Path to the facial expression recognition model.')
39
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
40
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
41
- parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
42
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 
 
 
 
 
 
 
 
 
 
43
  args = parser.parse_args()
44
 
45
-
46
  def visualize(image, det_res, fer_res, box_color=(0, 255, 0), text_color=(0, 0, 255)):
47
 
48
  print('%s %3d faces detected.' % (datetime.datetime.now(), len(det_res)))
@@ -83,11 +83,14 @@ def process(detect_model, fer_model, frame):
83
 
84
 
85
  if __name__ == '__main__':
 
 
 
86
  detect_model = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx')
87
 
88
  fer_model = FacialExpressionRecog(modelPath=args.model,
89
- backendId=args.backend,
90
- targetId=args.target)
91
 
92
  # If input is an image
93
  if args.input is not None:
 
11
  sys.path.append('../face_detection_yunet')
12
  from yunet import YuNet
13
 
14
+ # Check OpenCV version
15
+ assert cv.__version__ >= "4.7.0", \
16
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
17
+
18
+ # Valid combinations of backends and targets
19
+ backend_target_pairs = [
20
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
21
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
22
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
23
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
24
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
25
+ ]
 
 
 
 
 
 
 
 
 
26
 
27
  parser = argparse.ArgumentParser(description='Facial Expression Recognition')
28
+ parser.add_argument('--input', '-i', type=str,
29
+ help='Path to the input image. Omit for using default camera.')
30
+ parser.add_argument('--model', '-m', type=str, default='./facial_expression_recognition_mobilefacenet_2022july.onnx',
31
+ help='Path to the facial expression recognition model.')
32
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
33
+ help='''Choose one of the backend-target pair to run this demo:
34
+ {:d}: (default) OpenCV implementation + CPU,
35
+ {:d}: CUDA + GPU (CUDA),
36
+ {:d}: CUDA + GPU (CUDA FP16),
37
+ {:d}: TIM-VX + NPU,
38
+ {:d}: CANN + NPU
39
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
40
+ parser.add_argument('--save', '-s', action='store_true',
41
+ help='Specify to save results. This flag is invalid when using camera.')
42
+ parser.add_argument('--vis', '-v', action='store_true',
43
+ help='Specify to open a window for result visualization. This flag is invalid when using camera.')
44
  args = parser.parse_args()
45
 
 
46
  def visualize(image, det_res, fer_res, box_color=(0, 255, 0), text_color=(0, 0, 255)):
47
 
48
  print('%s %3d faces detected.' % (datetime.datetime.now(), len(det_res)))
 
83
 
84
 
85
  if __name__ == '__main__':
86
+ backend_id = backend_target_pairs[args.backend_target][0]
87
+ target_id = backend_target_pairs[args.backend_target][1]
88
+
89
  detect_model = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx')
90
 
91
  fer_model = FacialExpressionRecog(modelPath=args.model,
92
+ backendId=backend_id,
93
+ targetId=target_id)
94
 
95
  # If input is an image
96
  if args.input is not None:
models/facial_expression_recognition/facial_fer_model.py CHANGED
@@ -29,12 +29,10 @@ class FacialExpressionRecog:
29
  def name(self):
30
  return self.__class__.__name__
31
 
32
- def setBackend(self, backend_id):
33
- self._backendId = backend_id
 
34
  self._model.setPreferableBackend(self._backendId)
35
-
36
- def setTarget(self, target_id):
37
- self._targetId = target_id
38
  self._model.setPreferableTarget(self._targetId)
39
 
40
  def _preprocess(self, image, bbox):
 
29
  def name(self):
30
  return self.__class__.__name__
31
 
32
+ def setBackendAndTarget(self, backendId, targetId):
33
+ self._backendId = backendId
34
+ self._targetId = targetId
35
  self._model.setPreferableBackend(self._backendId)
 
 
 
36
  self._model.setPreferableTarget(self._targetId)
37
 
38
  def _preprocess(self, image, bbox):
models/handpose_estimation_mediapipe/demo.py CHANGED
@@ -9,34 +9,38 @@ from mp_handpose import MPHandPose
9
  sys.path.append('../palm_detection_mediapipe')
10
  from mp_palmdet import MPPalmDet
11
 
12
- def str2bool(v):
13
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
14
- return True
15
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
16
- return False
17
- else:
18
- raise NotImplementedError
19
-
20
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
21
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
22
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
23
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
24
- try:
25
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
26
- targets += [cv.dnn.DNN_TARGET_NPU]
27
- help_msg_backends += "; {:d}: TIMVX"
28
- help_msg_targets += "; {:d}: NPU"
29
- except:
30
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
31
 
32
  parser = argparse.ArgumentParser(description='Hand Pose Estimation from MediaPipe')
33
- parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
34
- parser.add_argument('--model', '-m', type=str, default='./handpose_estimation_mediapipe_2023feb.onnx', help='Path to the model.')
35
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
36
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
37
- parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out hands of confidence < conf_threshold.')
38
- parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
39
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 
 
 
 
 
 
 
 
 
 
 
40
  args = parser.parse_args()
41
 
42
 
@@ -147,17 +151,19 @@ def visualize(image, hands, print_result=False):
147
 
148
 
149
  if __name__ == '__main__':
 
 
150
  # palm detector
151
  palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx',
152
  nmsThreshold=0.3,
153
  scoreThreshold=0.6,
154
- backendId=args.backend,
155
- targetId=args.target)
156
  # handpose detector
157
  handpose_detector = MPHandPose(modelPath=args.model,
158
  confThreshold=args.conf_threshold,
159
- backendId=args.backend,
160
- targetId=args.target)
161
 
162
  # If input is an image
163
  if args.input is not None:
 
9
  sys.path.append('../palm_detection_mediapipe')
10
  from mp_palmdet import MPPalmDet
11
 
12
+ # Check OpenCV version
13
+ assert cv.__version__ >= "4.7.0", \
14
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
15
+
16
+ # Valid combinations of backends and targets
17
+ backend_target_pairs = [
18
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
19
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
20
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
21
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
22
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
23
+ ]
 
 
 
 
 
 
 
24
 
25
  parser = argparse.ArgumentParser(description='Hand Pose Estimation from MediaPipe')
26
+ parser.add_argument('--input', '-i', type=str,
27
+ help='Path to the input image. Omit for using default camera.')
28
+ parser.add_argument('--model', '-m', type=str, default='./handpose_estimation_mediapipe_2023feb.onnx',
29
+ help='Path to the model.')
30
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
31
+ help='''Choose one of the backend-target pair to run this demo:
32
+ {:d}: (default) OpenCV implementation + CPU,
33
+ {:d}: CUDA + GPU (CUDA),
34
+ {:d}: CUDA + GPU (CUDA FP16),
35
+ {:d}: TIM-VX + NPU,
36
+ {:d}: CANN + NPU
37
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
38
+ parser.add_argument('--conf_threshold', type=float, default=0.9,
39
+ help='Filter out hands of confidence < conf_threshold.')
40
+ parser.add_argument('--save', '-s', action='store_true',
41
+ help='Specify to save results. This flag is invalid when using camera.')
42
+ parser.add_argument('--vis', '-v', action='store_true',
43
+ help='Specify to open a window for result visualization. This flag is invalid when using camera.')
44
  args = parser.parse_args()
45
 
46
 
 
151
 
152
 
153
  if __name__ == '__main__':
154
+ backend_id = backend_target_pairs[args.backend_target][0]
155
+ target_id = backend_target_pairs[args.backend_target][1]
156
  # palm detector
157
  palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx',
158
  nmsThreshold=0.3,
159
  scoreThreshold=0.6,
160
+ backendId=backend_id,
161
+ targetId=target_id)
162
  # handpose detector
163
  handpose_detector = MPHandPose(modelPath=args.model,
164
  confThreshold=args.conf_threshold,
165
+ backendId=backend_id,
166
+ targetId=target_id)
167
 
168
  # If input is an image
169
  if args.input is not None:
models/handpose_estimation_mediapipe/mp_handpose.py CHANGED
@@ -1,7 +1,6 @@
1
  import numpy as np
2
  import cv2 as cv
3
 
4
-
5
  class MPHandPose:
6
  def __init__(self, modelPath, confThreshold=0.8, backendId=0, targetId=0):
7
  self.model_path = modelPath
@@ -28,12 +27,10 @@ class MPHandPose:
28
  def name(self):
29
  return self.__class__.__name__
30
 
31
- def setBackend(self, backendId):
32
- self.backend_id = backendId
 
33
  self.model.setPreferableBackend(self.backend_id)
34
-
35
- def setTarget(self, targetId):
36
- self.target_id = targetId
37
  self.model.setPreferableTarget(self.target_id)
38
 
39
  def _cropAndPadFromPalm(self, image, palm_bbox, for_rotation = False):
 
1
  import numpy as np
2
  import cv2 as cv
3
 
 
4
  class MPHandPose:
5
  def __init__(self, modelPath, confThreshold=0.8, backendId=0, targetId=0):
6
  self.model_path = modelPath
 
27
  def name(self):
28
  return self.__class__.__name__
29
 
30
+ def setBackendAndTarget(self, backendId, targetId):
31
+ self._backendId = backendId
32
+ self._targetId = targetId
33
  self.model.setPreferableBackend(self.backend_id)
 
 
 
34
  self.model.setPreferableTarget(self.target_id)
35
 
36
  def _cropAndPadFromPalm(self, image, palm_bbox, for_rotation = False):
models/human_segmentation_pphumanseg/demo.py CHANGED
@@ -11,33 +11,36 @@ import cv2 as cv
11
 
12
  from pphumanseg import PPHumanSeg
13
 
14
- def str2bool(v):
15
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
16
- return True
17
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
18
- return False
19
- else:
20
- raise NotImplementedError
21
-
22
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
23
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
24
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
25
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
26
- try:
27
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
28
- targets += [cv.dnn.DNN_TARGET_NPU]
29
- help_msg_backends += "; {:d}: TIMVX"
30
- help_msg_targets += "; {:d}: NPU"
31
- except:
32
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
33
 
34
  parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
35
- parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.')
36
- parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2023mar.onnx', help='Usage: Set model path, defaults to human_segmentation_pphumanseg_2023mar.onnx.')
37
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
38
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
39
- parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.')
40
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 
 
 
 
 
 
 
 
 
 
41
  args = parser.parse_args()
42
 
43
  def get_color_map_list(num_classes):
@@ -97,8 +100,10 @@ def visualize(image, result, weight=0.6, fps=None):
97
 
98
 
99
  if __name__ == '__main__':
 
 
100
  # Instantiate PPHumanSeg
101
- model = PPHumanSeg(modelPath=args.model, backendId=args.backend, targetId=args.target)
102
 
103
  if args.input is not None:
104
  # Read image and resize to 192x192
 
11
 
12
  from pphumanseg import PPHumanSeg
13
 
14
+ # Check OpenCV version
15
+ assert cv.__version__ >= "4.7.0", \
16
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
17
+
18
+ # Valid combinations of backends and targets
19
+ backend_target_pairs = [
20
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
21
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
22
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
23
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
24
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
25
+ ]
 
 
 
 
 
 
 
26
 
27
  parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
28
+ parser.add_argument('--input', '-i', type=str,
29
+ help='Usage: Set input path to a certain image, omit if using camera.')
30
+ parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2023mar.onnx',
31
+ help='Usage: Set model path, defaults to human_segmentation_pphumanseg_2023mar.onnx.')
32
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
33
+ help='''Choose one of the backend-target pair to run this demo:
34
+ {:d}: (default) OpenCV implementation + CPU,
35
+ {:d}: CUDA + GPU (CUDA),
36
+ {:d}: CUDA + GPU (CUDA FP16),
37
+ {:d}: TIM-VX + NPU,
38
+ {:d}: CANN + NPU
39
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
40
+ parser.add_argument('--save', '-s', action='store_true',
41
+ help='Usage: Specify to save a file with results. Invalid in case of camera input.')
42
+ parser.add_argument('--vis', '-v', action='store_true',
43
+ help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
44
  args = parser.parse_args()
45
 
46
  def get_color_map_list(num_classes):
 
100
 
101
 
102
  if __name__ == '__main__':
103
+ backend_id = backend_target_pairs[args.backend_target][0]
104
+ target_id = backend_target_pairs[args.backend_target][1]
105
  # Instantiate PPHumanSeg
106
+ model = PPHumanSeg(modelPath=args.model, backendId=backend_id, targetId=target_id)
107
 
108
  if args.input is not None:
109
  # Read image and resize to 192x192
models/human_segmentation_pphumanseg/pphumanseg.py CHANGED
@@ -28,12 +28,10 @@ class PPHumanSeg:
28
  def name(self):
29
  return self.__class__.__name__
30
 
31
- def setBackend(self, backend_id):
32
- self._backendId = backend_id
 
33
  self._model.setPreferableBackend(self._backendId)
34
-
35
- def setTarget(self, target_id):
36
- self._targetId = target_id
37
  self._model.setPreferableTarget(self._targetId)
38
 
39
  def _preprocess(self, image):
@@ -69,4 +67,3 @@ class PPHumanSeg:
69
 
70
  result = np.argmax(outputBlob, axis=1).astype(np.uint8)
71
  return result
72
-
 
28
  def name(self):
29
  return self.__class__.__name__
30
 
31
+ def setBackendAndTarget(self, backendId, targetId):
32
+ self._backendId = backendId
33
+ self._targetId = targetId
34
  self._model.setPreferableBackend(self._backendId)
 
 
 
35
  self._model.setPreferableTarget(self._targetId)
36
 
37
  def _preprocess(self, image):
 
67
 
68
  result = np.argmax(outputBlob, axis=1).astype(np.uint8)
69
  return result
 
models/image_classification_mobilenet/demo.py CHANGED
@@ -5,43 +5,39 @@ import cv2 as cv
5
 
6
  from mobilenet import MobileNet
7
 
8
- def str2bool(v):
9
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
10
- return True
11
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
12
- return False
13
- else:
14
- raise NotImplementedError
15
-
16
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
17
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
18
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
19
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
20
- try:
21
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
22
- targets += [cv.dnn.DNN_TARGET_NPU]
23
- help_msg_backends += "; {:d}: TIMVX"
24
- help_msg_targets += "; {:d}: NPU"
25
- except:
26
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
27
-
28
- all_mobilenets = [
29
- 'image_classification_mobilenetv1_2022apr.onnx',
30
- 'image_classification_mobilenetv2_2022apr.onnx',
31
- 'image_classification_mobilenetv1_2022apr-int8-quantized.onnx',
32
- 'image_classification_mobilenetv2_2022apr-int8-quantized.onnx'
33
  ]
34
 
35
  parser = argparse.ArgumentParser(description='Demo for MobileNet V1 & V2.')
36
- parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.')
37
- parser.add_argument('--model', '-m', type=str, choices=all_mobilenets, default=all_mobilenets[0], help='Usage: Set model type, defaults to image_classification_mobilenetv1_2022apr.onnx (v1).')
38
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
39
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 
 
 
 
 
 
 
 
40
  args = parser.parse_args()
41
 
42
  if __name__ == '__main__':
 
 
43
  # Instantiate MobileNet
44
- model = MobileNet(modelPath=args.model, backendId=args.backend, targetId=args.target)
45
 
46
  # Read image and get a 224x224 crop from a 256x256 resized
47
  image = cv.imread(args.input)
 
5
 
6
  from mobilenet import MobileNet
7
 
8
+ # Check OpenCV version
9
+ assert cv.__version__ >= "4.7.0", \
10
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
11
+
12
+ # Valid combinations of backends and targets
13
+ backend_target_pairs = [
14
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
15
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
16
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
17
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
18
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ]
20
 
21
  parser = argparse.ArgumentParser(description='Demo for MobileNet V1 & V2.')
22
+ parser.add_argument('--input', '-i', type=str,
23
+ help='Usage: Set input path to a certain image, omit if using camera.')
24
+ parser.add_argument('--model', '-m', type=str, default='image_classification_mobilenetv1_2022apr.onnx',
25
+ help='Usage: Set model type, defaults to image_classification_mobilenetv1_2022apr.onnx (v1).')
26
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
27
+ help='''Choose one of the backend-target pair to run this demo:
28
+ {:d}: (default) OpenCV implementation + CPU,
29
+ {:d}: CUDA + GPU (CUDA),
30
+ {:d}: CUDA + GPU (CUDA FP16),
31
+ {:d}: TIM-VX + NPU,
32
+ {:d}: CANN + NPU
33
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
34
  args = parser.parse_args()
35
 
36
  if __name__ == '__main__':
37
+ backend_id = backend_target_pairs[args.backend_target][0]
38
+ target_id = backend_target_pairs[args.backend_target][1]
39
  # Instantiate MobileNet
40
+ model = MobileNet(modelPath=args.model, backendId=backend_id, targetId=target_id)
41
 
42
  # Read image and get a 224x224 crop from a 256x256 resized
43
  image = cv.imread(args.input)
models/image_classification_mobilenet/mobilenet.py CHANGED
@@ -33,12 +33,10 @@ class MobileNet:
33
  def name(self):
34
  return self.__class__.__name__
35
 
36
- def setBackend(self, backendId):
37
- self.backend_id = backendId
 
38
  self.model.setPreferableBackend(self.backend_id)
39
-
40
- def setTarget(self, targetId):
41
- self.target_id = targetId
42
  self.model.setPreferableTarget(self.target_id)
43
 
44
  def _preprocess(self, image):
 
33
  def name(self):
34
  return self.__class__.__name__
35
 
36
+ def setBackendAndTarget(self, backendId, targetId):
37
+ self._backendId = backendId
38
+ self._targetId = targetId
39
  self.model.setPreferableBackend(self.backend_id)
 
 
 
40
  self.model.setPreferableTarget(self.target_id)
41
 
42
  def _preprocess(self, image):
models/image_classification_ppresnet/demo.py CHANGED
@@ -11,36 +11,39 @@ import cv2 as cv
11
 
12
  from ppresnet import PPResNet
13
 
14
- def str2bool(v):
15
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
16
- return True
17
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
18
- return False
19
- else:
20
- raise NotImplementedError
21
-
22
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
23
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
24
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
25
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
26
- try:
27
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
28
- targets += [cv.dnn.DNN_TARGET_NPU]
29
- help_msg_backends += "; {:d}: TIMVX"
30
- help_msg_targets += "; {:d}: NPU"
31
- except:
32
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
33
 
34
  parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
35
- parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.')
36
- parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Usage: Set model path, defaults to image_classification_ppresnet50_2022jan.onnx.')
37
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
38
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 
 
 
 
 
 
 
 
39
  args = parser.parse_args()
40
 
41
  if __name__ == '__main__':
 
 
42
  # Instantiate ResNet
43
- model = PPResNet(modelPath=args.model, backendId=args.backend, targetId=args.target)
44
 
45
  # Read image and get a 224x224 crop from a 256x256 resized
46
  image = cv.imread(args.input)
 
11
 
12
  from ppresnet import PPResNet
13
 
14
+ # Check OpenCV version
15
+ assert cv.__version__ >= "4.7.0", \
16
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
17
+
18
+ # Valid combinations of backends and targets
19
+ backend_target_pairs = [
20
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
21
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
22
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
23
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
24
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
25
+ ]
 
 
 
 
 
 
 
26
 
27
  parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
28
+ parser.add_argument('--input', '-i', type=str,
29
+ help='Usage: Set input path to a certain image, omit if using camera.')
30
+ parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx',
31
+ help='Usage: Set model path, defaults to image_classification_ppresnet50_2022jan.onnx.')
32
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
33
+ help='''Choose one of the backend-target pair to run this demo:
34
+ {:d}: (default) OpenCV implementation + CPU,
35
+ {:d}: CUDA + GPU (CUDA),
36
+ {:d}: CUDA + GPU (CUDA FP16),
37
+ {:d}: TIM-VX + NPU,
38
+ {:d}: CANN + NPU
39
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
40
  args = parser.parse_args()
41
 
42
  if __name__ == '__main__':
43
+ backend_id = backend_target_pairs[args.backend_target][0]
44
+ target_id = backend_target_pairs[args.backend_target][1]
45
  # Instantiate ResNet
46
+ model = PPResNet(modelPath=args.model, backendId=backend_id, targetId=target_id)
47
 
48
  # Read image and get a 224x224 crop from a 256x256 resized
49
  image = cv.imread(args.input)
models/image_classification_ppresnet/ppresnet.py CHANGED
@@ -36,12 +36,10 @@ class PPResNet:
36
  def name(self):
37
  return self.__class__.__name__
38
 
39
- def setBackend(self, backend_id):
40
- self._backendId = backend_id
 
41
  self._model.setPreferableBackend(self._backendId)
42
-
43
- def setTarget(self, target_id):
44
- self._targetId = target_id
45
  self._model.setPreferableTarget(self._targetId)
46
 
47
  def _preprocess(self, image):
 
36
  def name(self):
37
  return self.__class__.__name__
38
 
39
+ def setBackendAndTarget(self, backendId, targetId):
40
+ self._backendId = backendId
41
+ self._targetId = targetId
42
  self._model.setPreferableBackend(self._backendId)
 
 
 
43
  self._model.setPreferableTarget(self._targetId)
44
 
45
  def _preprocess(self, image):
models/license_plate_detection_yunet/demo.py CHANGED
@@ -5,37 +5,44 @@ import cv2 as cv
5
 
6
  from lpd_yunet import LPD_YuNet
7
 
8
- def str2bool(v):
9
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
10
- return True
11
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
12
- return False
13
- else:
14
- raise NotImplementedError
15
-
16
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
17
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
18
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
19
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
20
- try:
21
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
22
- targets += [cv.dnn.DNN_TARGET_NPU]
23
- help_msg_backends += "; {:d}: TIMVX"
24
- help_msg_targets += "; {:d}: NPU"
25
- except:
26
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
27
 
28
  parser = argparse.ArgumentParser(description='LPD-YuNet for License Plate Detection')
29
- parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
30
- parser.add_argument('--model', '-m', type=str, default='license_plate_detection_lpd_yunet_2022may.onnx', help='Usage: Set model path, defaults to license_plate_detection_lpd_yunet_2022may.onnx.')
31
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
32
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
33
- parser.add_argument('--conf_threshold', type=float, default=0.9, help='Usage: Set the minimum needed confidence for the model to identify a license plate, defaults to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.')
34
- parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3. Suppress bounding boxes of iou >= nms_threshold.')
35
- parser.add_argument('--top_k', type=int, default=5000, help='Usage: Keep top_k bounding boxes before NMS.')
36
- parser.add_argument('--keep_top_k', type=int, default=750, help='Usage: Keep keep_top_k bounding boxes after NMS.')
37
- parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
38
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  args = parser.parse_args()
40
 
41
  def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
@@ -57,14 +64,17 @@ def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=N
57
  return output
58
 
59
  if __name__ == '__main__':
 
 
 
60
  # Instantiate LPD-YuNet
61
  model = LPD_YuNet(modelPath=args.model,
62
  confThreshold=args.conf_threshold,
63
  nmsThreshold=args.nms_threshold,
64
  topK=args.top_k,
65
  keepTopK=args.keep_top_k,
66
- backendId=args.backend,
67
- targetId=args.target)
68
 
69
  # If input is an image
70
  if args.input is not None:
@@ -117,4 +127,3 @@ if __name__ == '__main__':
117
  cv.imshow('LPD-YuNet Demo', frame)
118
 
119
  tm.reset()
120
-
 
5
 
6
  from lpd_yunet import LPD_YuNet
7
 
8
+ # Check OpenCV version
9
+ assert cv.__version__ >= "4.7.0", \
10
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
11
+
12
+ # Valid combinations of backends and targets
13
+ backend_target_pairs = [
14
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
15
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
16
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
17
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
18
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
19
+ ]
 
 
 
 
 
 
 
20
 
21
  parser = argparse.ArgumentParser(description='LPD-YuNet for License Plate Detection')
22
+ parser.add_argument('--input', '-i', type=str,
23
+ help='Usage: Set path to the input image. Omit for using default camera.')
24
+ parser.add_argument('--model', '-m', type=str, default='license_plate_detection_lpd_yunet_2023mar.onnx',
25
+ help='Usage: Set model path, defaults to license_plate_detection_lpd_yunet_2023mar.onnx.')
26
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
27
+ help='''Choose one of the backend-target pair to run this demo:
28
+ {:d}: (default) OpenCV implementation + CPU,
29
+ {:d}: CUDA + GPU (CUDA),
30
+ {:d}: CUDA + GPU (CUDA FP16),
31
+ {:d}: TIM-VX + NPU,
32
+ {:d}: CANN + NPU
33
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
34
+ parser.add_argument('--conf_threshold', type=float, default=0.9,
35
+ help='Usage: Set the minimum needed confidence for the model to identify a license plate, defaults to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.')
36
+ parser.add_argument('--nms_threshold', type=float, default=0.3,
37
+ help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3. Suppress bounding boxes of iou >= nms_threshold.')
38
+ parser.add_argument('--top_k', type=int, default=5000,
39
+ help='Usage: Keep top_k bounding boxes before NMS.')
40
+ parser.add_argument('--keep_top_k', type=int, default=750,
41
+ help='Usage: Keep keep_top_k bounding boxes after NMS.')
42
+ parser.add_argument('--save', '-s', action='store_true',
43
+ help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
44
+ parser.add_argument('--vis', '-v', action='store_true',
45
+ help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
46
  args = parser.parse_args()
47
 
48
  def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
 
64
  return output
65
 
66
  if __name__ == '__main__':
67
+ backend_id = backend_target_pairs[args.backend_target][0]
68
+ target_id = backend_target_pairs[args.backend_target][1]
69
+
70
  # Instantiate LPD-YuNet
71
  model = LPD_YuNet(modelPath=args.model,
72
  confThreshold=args.conf_threshold,
73
  nmsThreshold=args.nms_threshold,
74
  topK=args.top_k,
75
  keepTopK=args.keep_top_k,
76
+ backendId=backend_id,
77
+ targetId=target_id)
78
 
79
  # If input is an image
80
  if args.input is not None:
 
127
  cv.imshow('LPD-YuNet Demo', frame)
128
 
129
  tm.reset()
 
models/license_plate_detection_yunet/lpd_yunet.py CHANGED
@@ -28,12 +28,10 @@ class LPD_YuNet:
28
  def name(self):
29
  return self.__class__.__name__
30
 
31
- def setBackend(self, backendId):
32
  self.backend_id = backendId
33
- self.model.setPreferableBackend(self.backend_id)
34
-
35
- def setTarget(self, targetId):
36
  self.target_id = targetId
 
37
  self.model.setPreferableTarget(self.target_id)
38
 
39
  def setInputSize(self, inputSize):
 
28
  def name(self):
29
  return self.__class__.__name__
30
 
31
+ def setBackendAndTarget(self, backendId, targetId):
32
  self.backend_id = backendId
 
 
 
33
  self.target_id = targetId
34
+ self.model.setPreferableBackend(self.backend_id)
35
  self.model.setPreferableTarget(self.target_id)
36
 
37
  def setInputSize(self, inputSize):
models/object_detection_nanodet/demo.py CHANGED
@@ -1,29 +1,21 @@
1
  import numpy as np
2
- import cv2
3
  import argparse
4
 
5
  from nanodet import NanoDet
6
 
7
- def str2bool(v):
8
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
9
- return True
10
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
11
- return False
12
- else:
13
- raise NotImplementedError
14
-
15
- backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
16
- targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
17
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
18
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
19
 
20
- try:
21
- backends += [cv2.dnn.DNN_BACKEND_TIMVX]
22
- targets += [cv2.dnn.DNN_TARGET_NPU]
23
- help_msg_backends += "; {:d}: TIMVX"
24
- help_msg_targets += "; {:d}: NPU"
25
- except:
26
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 
27
 
28
  classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
29
  'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
@@ -48,16 +40,16 @@ def letterbox(srcimg, target_size=(416, 416)):
48
  hw_scale = img.shape[0] / img.shape[1]
49
  if hw_scale > 1:
50
  newh, neww = target_size[0], int(target_size[1] / hw_scale)
51
- img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA)
52
  left = int((target_size[1] - neww) * 0.5)
53
- img = cv2.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv2.BORDER_CONSTANT, value=0) # add border
54
  else:
55
  newh, neww = int(target_size[0] * hw_scale), target_size[1]
56
- img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA)
57
  top = int((target_size[0] - newh) * 0.5)
58
- img = cv2.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0)
59
  else:
60
- img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
61
 
62
  letterbox_scale = [top, left, newh, neww]
63
  return img, letterbox_scale
@@ -87,7 +79,7 @@ def vis(preds, res_img, letterbox_scale, fps=None):
87
  # draw FPS
88
  if fps is not None:
89
  fps_label = "FPS: %.2f" % fps
90
- cv2.putText(ret, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
91
 
92
  # draw bboxes and labels
93
  for pred in preds:
@@ -97,37 +89,52 @@ def vis(preds, res_img, letterbox_scale, fps=None):
97
 
98
  # bbox
99
  xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale)
100
- cv2.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2)
101
 
102
  # label
103
  label = "{:s}: {:.2f}".format(classes[classid], conf)
104
- cv2.putText(ret, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
105
 
106
  return ret
107
 
108
  if __name__=='__main__':
109
  parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
110
- parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
111
- parser.add_argument('--model', '-m', type=str, default='object_detection_nanodet_2022nov.onnx', help="Path to the model")
112
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
113
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
114
- parser.add_argument('--confidence', default=0.35, type=float, help='Class confidence')
115
- parser.add_argument('--nms', default=0.6, type=float, help='Enter nms IOU threshold')
116
- parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
117
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 
 
 
 
 
 
 
 
 
 
 
 
118
  args = parser.parse_args()
119
 
 
 
 
120
  model = NanoDet(modelPath= args.model,
121
  prob_threshold=args.confidence,
122
  iou_threshold=args.nms,
123
- backend_id=args.backend,
124
- target_id=args.target)
125
 
126
- tm = cv2.TickMeter()
127
  tm.reset()
128
  if args.input is not None:
129
- image = cv2.imread(args.input)
130
- input_blob = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
131
 
132
  # Letterbox transformation
133
  input_blob, letterbox_scale = letterbox(input_blob)
@@ -142,25 +149,25 @@ if __name__=='__main__':
142
 
143
  if args.save:
144
  print('Resutls saved to result.jpg\n')
145
- cv2.imwrite('result.jpg', img)
146
 
147
  if args.vis:
148
- cv2.namedWindow(args.input, cv2.WINDOW_AUTOSIZE)
149
- cv2.imshow(args.input, img)
150
- cv2.waitKey(0)
151
 
152
  else:
153
  print("Press any key to stop video capture")
154
  deviceId = 0
155
- cap = cv2.VideoCapture(deviceId)
156
 
157
- while cv2.waitKey(1) < 0:
158
  hasFrame, frame = cap.read()
159
  if not hasFrame:
160
  print('No frames grabbed!')
161
  break
162
 
163
- input_blob = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
164
  input_blob, letterbox_scale = letterbox(input_blob)
165
  # Inference
166
  tm.start()
@@ -169,6 +176,6 @@ if __name__=='__main__':
169
 
170
  img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
171
 
172
- cv2.imshow("NanoDet Demo", img)
173
 
174
  tm.reset()
 
1
  import numpy as np
2
+ import cv2 as cv
3
  import argparse
4
 
5
  from nanodet import NanoDet
6
 
7
+ # Check OpenCV version
8
+ assert cv.__version__ >= "4.7.0", \
9
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
 
 
 
 
 
 
 
 
 
10
 
11
+ # Valid combinations of backends and targets
12
+ backend_target_pairs = [
13
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
14
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
15
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
16
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
17
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
18
+ ]
19
 
20
  classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
21
  'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
 
40
  hw_scale = img.shape[0] / img.shape[1]
41
  if hw_scale > 1:
42
  newh, neww = target_size[0], int(target_size[1] / hw_scale)
43
+ img = cv.resize(img, (neww, newh), interpolation=cv.INTER_AREA)
44
  left = int((target_size[1] - neww) * 0.5)
45
+ img = cv.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv.BORDER_CONSTANT, value=0) # add border
46
  else:
47
  newh, neww = int(target_size[0] * hw_scale), target_size[1]
48
+ img = cv.resize(img, (neww, newh), interpolation=cv.INTER_AREA)
49
  top = int((target_size[0] - newh) * 0.5)
50
+ img = cv.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv.BORDER_CONSTANT, value=0)
51
  else:
52
+ img = cv.resize(img, target_size, interpolation=cv.INTER_AREA)
53
 
54
  letterbox_scale = [top, left, newh, neww]
55
  return img, letterbox_scale
 
79
  # draw FPS
80
  if fps is not None:
81
  fps_label = "FPS: %.2f" % fps
82
+ cv.putText(ret, fps_label, (10, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
83
 
84
  # draw bboxes and labels
85
  for pred in preds:
 
89
 
90
  # bbox
91
  xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale)
92
+ cv.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2)
93
 
94
  # label
95
  label = "{:s}: {:.2f}".format(classes[classid], conf)
96
+ cv.putText(ret, label, (xmin, ymin - 10), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
97
 
98
  return ret
99
 
100
  if __name__=='__main__':
101
  parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
102
+ parser.add_argument('--input', '-i', type=str,
103
+ help='Path to the input image. Omit for using default camera.')
104
+ parser.add_argument('--model', '-m', type=str,
105
+ default='object_detection_nanodet_2022nov.onnx', help="Path to the model")
106
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
107
+ help='''Choose one of the backend-target pair to run this demo:
108
+ {:d}: (default) OpenCV implementation + CPU,
109
+ {:d}: CUDA + GPU (CUDA),
110
+ {:d}: CUDA + GPU (CUDA FP16),
111
+ {:d}: TIM-VX + NPU,
112
+ {:d}: CANN + NPU
113
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
114
+ parser.add_argument('--confidence', default=0.35, type=float,
115
+ help='Class confidence')
116
+ parser.add_argument('--nms', default=0.6, type=float,
117
+ help='Enter nms IOU threshold')
118
+ parser.add_argument('--save', '-s', action='store_true',
119
+ help='Specify to save results. This flag is invalid when using camera.')
120
+ parser.add_argument('--vis', '-v', action='store_true',
121
+ help='Specify to open a window for result visualization. This flag is invalid when using camera.')
122
  args = parser.parse_args()
123
 
124
+ backend_id = backend_target_pairs[args.backend_target][0]
125
+ target_id = backend_target_pairs[args.backend_target][1]
126
+
127
  model = NanoDet(modelPath= args.model,
128
  prob_threshold=args.confidence,
129
  iou_threshold=args.nms,
130
+ backend_id=backend_id,
131
+ target_id=target_id)
132
 
133
+ tm = cv.TickMeter()
134
  tm.reset()
135
  if args.input is not None:
136
+ image = cv.imread(args.input)
137
+ input_blob = cv.cvtColor(image, cv.COLOR_BGR2RGB)
138
 
139
  # Letterbox transformation
140
  input_blob, letterbox_scale = letterbox(input_blob)
 
149
 
150
  if args.save:
151
  print('Resutls saved to result.jpg\n')
152
+ cv.imwrite('result.jpg', img)
153
 
154
  if args.vis:
155
+ cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
156
+ cv.imshow(args.input, img)
157
+ cv.waitKey(0)
158
 
159
  else:
160
  print("Press any key to stop video capture")
161
  deviceId = 0
162
+ cap = cv.VideoCapture(deviceId)
163
 
164
+ while cv.waitKey(1) < 0:
165
  hasFrame, frame = cap.read()
166
  if not hasFrame:
167
  print('No frames grabbed!')
168
  break
169
 
170
+ input_blob = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
171
  input_blob, letterbox_scale = letterbox(input_blob)
172
  # Inference
173
  tm.start()
 
176
 
177
  img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
178
 
179
+ cv.imshow("NanoDet Demo", img)
180
 
181
  tm.reset()
models/object_detection_nanodet/nanodet.py CHANGED
@@ -37,12 +37,10 @@ class NanoDet:
37
  def name(self):
38
  return self.__class__.__name__
39
 
40
- def setBackend(self, backendId):
41
- self.backend_id = backendId
 
42
  self.net.setPreferableBackend(self.backend_id)
43
-
44
- def setTarget(self, targetId):
45
- self.target_id = targetId
46
  self.net.setPreferableTarget(self.target_id)
47
 
48
  def pre_process(self, img):
 
37
  def name(self):
38
  return self.__class__.__name__
39
 
40
+ def setBackendAndTarget(self, backendId, targetId):
41
+ self._backendId = backendId
42
+ self._targetId = targetId
43
  self.net.setPreferableBackend(self.backend_id)
 
 
 
44
  self.net.setPreferableTarget(self.target_id)
45
 
46
  def pre_process(self, img):
models/object_detection_yolox/demo.py CHANGED
@@ -1,29 +1,21 @@
1
  import numpy as np
2
- import cv2
3
  import argparse
4
 
5
  from yolox import YoloX
6
 
7
- def str2bool(v):
8
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
9
- return True
10
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
11
- return False
12
- else:
13
- raise NotImplementedError
14
-
15
- backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
16
- targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
17
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
18
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
19
 
20
- try:
21
- backends += [cv2.dnn.DNN_BACKEND_TIMVX]
22
- targets += [cv2.dnn.DNN_TARGET_NPU]
23
- help_msg_backends += "; {:d}: TIMVX"
24
- help_msg_targets += "; {:d}: NPU"
25
- except:
26
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 
27
 
28
  classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
29
  'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
@@ -43,8 +35,8 @@ classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
43
  def letterbox(srcimg, target_size=(640, 640)):
44
  padded_img = np.ones((target_size[0], target_size[1], 3)) * 114.0
45
  ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
46
- resized_img = cv2.resize(
47
- srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv2.INTER_LINEAR
48
  ).astype(np.float32)
49
  padded_img[: int(srcimg.shape[0] * ratio), : int(srcimg.shape[1] * ratio)] = resized_img
50
 
@@ -58,7 +50,7 @@ def vis(dets, srcimg, letterbox_scale, fps=None):
58
 
59
  if fps is not None:
60
  fps_label = "FPS: %.2f" % fps
61
- cv2.putText(res_img, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
62
 
63
  for det in dets:
64
  box = unletterbox(det[:4], letterbox_scale).astype(np.int32)
@@ -68,39 +60,55 @@ def vis(dets, srcimg, letterbox_scale, fps=None):
68
  x0, y0, x1, y1 = box
69
 
70
  text = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
71
- font = cv2.FONT_HERSHEY_SIMPLEX
72
- txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
73
- cv2.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), 2)
74
- cv2.rectangle(res_img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1)
75
- cv2.putText(res_img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 0, 0), thickness=1)
76
 
77
  return res_img
78
 
79
  if __name__=='__main__':
80
  parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
81
- parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
82
- parser.add_argument('--model', '-m', type=str, default='object_detection_yolox_2022nov.onnx', help="Path to the model")
83
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
84
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
85
- parser.add_argument('--confidence', default=0.5, type=float, help='Class confidence')
86
- parser.add_argument('--nms', default=0.5, type=float, help='Enter nms IOU threshold')
87
- parser.add_argument('--obj', default=0.5, type=float, help='Enter object threshold')
88
- parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
89
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  args = parser.parse_args()
91
 
 
 
 
92
  model_net = YoloX(modelPath= args.model,
93
  confThreshold=args.confidence,
94
  nmsThreshold=args.nms,
95
  objThreshold=args.obj,
96
- backendId=args.backend,
97
- targetId=args.target)
98
 
99
- tm = cv2.TickMeter()
100
  tm.reset()
101
  if args.input is not None:
102
- image = cv2.imread(args.input)
103
- input_blob = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
104
  input_blob, letterbox_scale = letterbox(input_blob)
105
 
106
  # Inference
@@ -113,25 +121,25 @@ if __name__=='__main__':
113
 
114
  if args.save:
115
  print('Resutls saved to result.jpg\n')
116
- cv2.imwrite('result.jpg', img)
117
 
118
  if args.vis:
119
- cv2.namedWindow(args.input, cv2.WINDOW_AUTOSIZE)
120
- cv2.imshow(args.input, img)
121
- cv2.waitKey(0)
122
 
123
  else:
124
  print("Press any key to stop video capture")
125
  deviceId = 0
126
- cap = cv2.VideoCapture(deviceId)
127
 
128
- while cv2.waitKey(1) < 0:
129
  hasFrame, frame = cap.read()
130
  if not hasFrame:
131
  print('No frames grabbed!')
132
  break
133
 
134
- input_blob = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
135
  input_blob, letterbox_scale = letterbox(input_blob)
136
 
137
  # Inference
@@ -141,6 +149,6 @@ if __name__=='__main__':
141
 
142
  img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
143
 
144
- cv2.imshow("YoloX Demo", img)
145
 
146
  tm.reset()
 
1
  import numpy as np
2
+ import cv2 as cv
3
  import argparse
4
 
5
  from yolox import YoloX
6
 
7
+ # Check OpenCV version
8
+ assert cv.__version__ >= "4.7.0", \
9
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
 
 
 
 
 
 
 
 
 
10
 
11
+ # Valid combinations of backends and targets
12
+ backend_target_pairs = [
13
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
14
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
15
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
16
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
17
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
18
+ ]
19
 
20
  classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
21
  'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
 
35
  def letterbox(srcimg, target_size=(640, 640)):
36
  padded_img = np.ones((target_size[0], target_size[1], 3)) * 114.0
37
  ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
38
+ resized_img = cv.resize(
39
+ srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv.INTER_LINEAR
40
  ).astype(np.float32)
41
  padded_img[: int(srcimg.shape[0] * ratio), : int(srcimg.shape[1] * ratio)] = resized_img
42
 
 
50
 
51
  if fps is not None:
52
  fps_label = "FPS: %.2f" % fps
53
+ cv.putText(res_img, fps_label, (10, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
54
 
55
  for det in dets:
56
  box = unletterbox(det[:4], letterbox_scale).astype(np.int32)
 
60
  x0, y0, x1, y1 = box
61
 
62
  text = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
63
+ font = cv.FONT_HERSHEY_SIMPLEX
64
+ txt_size = cv.getTextSize(text, font, 0.4, 1)[0]
65
+ cv.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), 2)
66
+ cv.rectangle(res_img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1)
67
+ cv.putText(res_img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 0, 0), thickness=1)
68
 
69
  return res_img
70
 
71
  if __name__=='__main__':
72
  parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
73
+ parser.add_argument('--input', '-i', type=str,
74
+ help='Path to the input image. Omit for using default camera.')
75
+ parser.add_argument('--model', '-m', type=str, default='object_detection_yolox_2022nov.onnx',
76
+ help="Path to the model")
77
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
78
+ help='''Choose one of the backend-target pair to run this demo:
79
+ {:d}: (default) OpenCV implementation + CPU,
80
+ {:d}: CUDA + GPU (CUDA),
81
+ {:d}: CUDA + GPU (CUDA FP16),
82
+ {:d}: TIM-VX + NPU,
83
+ {:d}: CANN + NPU
84
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
85
+ parser.add_argument('--confidence', default=0.5, type=float,
86
+ help='Class confidence')
87
+ parser.add_argument('--nms', default=0.5, type=float,
88
+ help='Enter nms IOU threshold')
89
+ parser.add_argument('--obj', default=0.5, type=float,
90
+ help='Enter object threshold')
91
+ parser.add_argument('--save', '-s', action='store_true',
92
+ help='Specify to save results. This flag is invalid when using camera.')
93
+ parser.add_argument('--vis', '-v', action='store_true',
94
+ help='Specify to open a window for result visualization. This flag is invalid when using camera.')
95
  args = parser.parse_args()
96
 
97
+ backend_id = backend_target_pairs[args.backend_target][0]
98
+ target_id = backend_target_pairs[args.backend_target][1]
99
+
100
  model_net = YoloX(modelPath= args.model,
101
  confThreshold=args.confidence,
102
  nmsThreshold=args.nms,
103
  objThreshold=args.obj,
104
+ backendId=backend_id,
105
+ targetId=target_id)
106
 
107
+ tm = cv.TickMeter()
108
  tm.reset()
109
  if args.input is not None:
110
+ image = cv.imread(args.input)
111
+ input_blob = cv.cvtColor(image, cv.COLOR_BGR2RGB)
112
  input_blob, letterbox_scale = letterbox(input_blob)
113
 
114
  # Inference
 
121
 
122
  if args.save:
123
  print('Resutls saved to result.jpg\n')
124
+ cv.imwrite('result.jpg', img)
125
 
126
  if args.vis:
127
+ cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
128
+ cv.imshow(args.input, img)
129
+ cv.waitKey(0)
130
 
131
  else:
132
  print("Press any key to stop video capture")
133
  deviceId = 0
134
+ cap = cv.VideoCapture(deviceId)
135
 
136
+ while cv.waitKey(1) < 0:
137
  hasFrame, frame = cap.read()
138
  if not hasFrame:
139
  print('No frames grabbed!')
140
  break
141
 
142
+ input_blob = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
143
  input_blob, letterbox_scale = letterbox(input_blob)
144
 
145
  # Inference
 
149
 
150
  img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
151
 
152
+ cv.imshow("YoloX Demo", img)
153
 
154
  tm.reset()
models/object_detection_yolox/yolox.py CHANGED
@@ -23,12 +23,10 @@ class YoloX:
23
  def name(self):
24
  return self.__class__.__name__
25
 
26
- def setBackend(self, backendId):
27
- self.backendId = backendId
 
28
  self.net.setPreferableBackend(self.backendId)
29
-
30
- def setTarget(self, targetId):
31
- self.targetId = targetId
32
  self.net.setPreferableTarget(self.targetId)
33
 
34
  def preprocess(self, img):
 
23
  def name(self):
24
  return self.__class__.__name__
25
 
26
+ def setBackendAndTarget(self, backendId, targetId):
27
+ self._backendId = backendId
28
+ self._targetId = targetId
29
  self.net.setPreferableBackend(self.backendId)
 
 
 
30
  self.net.setPreferableTarget(self.targetId)
31
 
32
  def preprocess(self, img):
models/object_tracking_dasiamrpn/dasiamrpn.py CHANGED
@@ -27,18 +27,10 @@ class DaSiamRPN:
27
  def name(self):
28
  return self.__class__.__name__
29
 
30
- def setBackend(self, backend_id):
31
- self._backend_id = backend_id
32
- self._param = cv.TrackerDaSiamRPN_Params()
33
- self._param.model = self._model_path
34
- self._param.kernel_cls1 = self._kernel_cls1_path
35
- self._param.kernel_r1 = self._kernel_r1_path
36
- self._param.backend = self._backend_id
37
- self._param.target = self._target_id
38
- self._model = cv.TrackerDaSiamRPN.create(self._param)
39
 
40
- def setTarget(self, target_id):
41
- self._target_id = target_id
42
  self._param = cv.TrackerDaSiamRPN_Params()
43
  self._param.model = self._model_path
44
  self._param.kernel_cls1 = self._kernel_cls1_path
@@ -53,4 +45,4 @@ class DaSiamRPN:
53
  def infer(self, image):
54
  isLocated, bbox = self._model.update(image)
55
  score = self._model.getTrackingScore()
56
- return isLocated, bbox, score
 
27
  def name(self):
28
  return self.__class__.__name__
29
 
30
+ def setBackendAndTarget(self, backendId, targetId):
31
+ self._backend_id = backendId
32
+ self._target_id = targetId
 
 
 
 
 
 
33
 
 
 
34
  self._param = cv.TrackerDaSiamRPN_Params()
35
  self._param.model = self._model_path
36
  self._param.kernel_cls1 = self._kernel_cls1_path
 
45
  def infer(self, image):
46
  isLocated, bbox = self._model.update(image)
47
  score = self._model.getTrackingScore()
48
+ return isLocated, bbox, score
models/object_tracking_dasiamrpn/demo.py CHANGED
@@ -11,22 +11,41 @@ import cv2 as cv
11
 
12
  from dasiamrpn import DaSiamRPN
13
 
14
- def str2bool(v):
15
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
16
- return True
17
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
18
- return False
19
- else:
20
- raise NotImplementedError
 
 
 
 
 
21
 
22
  parser = argparse.ArgumentParser(
23
  description="Distractor-aware Siamese Networks for Visual Object Tracking (https://arxiv.org/abs/1808.06048)")
24
- parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input video. Omit for using default camera.')
25
- parser.add_argument('--model_path', type=str, default='object_tracking_dasiamrpn_model_2021nov.onnx', help='Usage: Set model path, defaults to object_tracking_dasiamrpn_model_2021nov.onnx.')
26
- parser.add_argument('--kernel_cls1_path', type=str, default='object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx', help='Usage: Set path to dasiamrpn_kernel_cls1.onnx.')
27
- parser.add_argument('--kernel_r1_path', type=str, default='object_tracking_dasiamrpn_kernel_r1_2021nov.onnx', help='Usage: Set path to dasiamrpn_kernel_r1.onnx.')
28
- parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.')
29
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  args = parser.parse_args()
31
 
32
  def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
@@ -50,12 +69,16 @@ def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),tex
50
  return output
51
 
52
  if __name__ == '__main__':
 
 
 
53
  # Instantiate DaSiamRPN
54
  model = DaSiamRPN(
55
  kernel_cls1_path=args.kernel_cls1_path,
56
  kernel_r1_path=args.kernel_r1_path,
57
  model_path=args.model_path,
58
- )
 
59
 
60
  # Read from args.input
61
  _input = args.input
 
11
 
12
  from dasiamrpn import DaSiamRPN
13
 
14
+ # Check OpenCV version
15
+ assert cv.__version__ >= "4.7.0", \
16
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
17
+
18
+ # Valid combinations of backends and targets
19
+ backend_target_pairs = [
20
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
21
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
22
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
23
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
24
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
25
+ ]
26
 
27
  parser = argparse.ArgumentParser(
28
  description="Distractor-aware Siamese Networks for Visual Object Tracking (https://arxiv.org/abs/1808.06048)")
29
+ parser.add_argument('--input', '-i', type=str,
30
+ help='Usage: Set path to the input video. Omit for using default camera.')
31
+ parser.add_argument('--model_path', type=str, default='object_tracking_dasiamrpn_model_2021nov.onnx',
32
+ help='Usage: Set model path, defaults to object_tracking_dasiamrpn_model_2021nov.onnx.')
33
+ parser.add_argument('--kernel_cls1_path', type=str, default='object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx',
34
+ help='Usage: Set path to dasiamrpn_kernel_cls1.onnx.')
35
+ parser.add_argument('--kernel_r1_path', type=str, default='object_tracking_dasiamrpn_kernel_r1_2021nov.onnx',
36
+ help='Usage: Set path to dasiamrpn_kernel_r1.onnx.')
37
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
38
+ help='''Choose one of the backend-target pair to run this demo:
39
+ {:d}: (default) OpenCV implementation + CPU,
40
+ {:d}: CUDA + GPU (CUDA),
41
+ {:d}: CUDA + GPU (CUDA FP16),
42
+ {:d}: TIM-VX + NPU,
43
+ {:d}: CANN + NPU
44
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
45
+ parser.add_argument('--save', '-s', action='store_true',
46
+ help='Usage: Specify to save a file with results. Invalid in case of camera input.')
47
+ parser.add_argument('--vis', '-v', action='store_true',
48
+ help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
49
  args = parser.parse_args()
50
 
51
  def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
 
69
  return output
70
 
71
  if __name__ == '__main__':
72
+ backend_id = backend_target_pairs[args.backend_target][0]
73
+ target_id = backend_target_pairs[args.backend_target][1]
74
+
75
  # Instantiate DaSiamRPN
76
  model = DaSiamRPN(
77
  kernel_cls1_path=args.kernel_cls1_path,
78
  kernel_r1_path=args.kernel_r1_path,
79
  model_path=args.model_path,
80
+ backend_id=backend_id,
81
+ target_id=target_id)
82
 
83
  # Read from args.input
84
  _input = args.input
models/palm_detection_mediapipe/demo.py CHANGED
@@ -5,35 +5,40 @@ import cv2 as cv
5
 
6
  from mp_palmdet import MPPalmDet
7
 
8
- def str2bool(v):
9
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
10
- return True
11
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
12
- return False
13
- else:
14
- raise NotImplementedError
15
-
16
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
17
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
18
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
19
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
20
- try:
21
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
22
- targets += [cv.dnn.DNN_TARGET_NPU]
23
- help_msg_backends += "; {:d}: TIMVX"
24
- help_msg_targets += "; {:d}: NPU"
25
- except:
26
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
27
 
28
  parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe')
29
- parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
30
- parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2023feb.onnx', help='Usage: Set model path, defaults to palm_detection_mediapipe_2023feb.onnx.')
31
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
32
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
33
- parser.add_argument('--score_threshold', type=float, default=0.8, help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.8. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.')
34
- parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
35
- parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
36
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 
 
 
 
 
 
 
 
 
 
 
 
37
  args = parser.parse_args()
38
 
39
  def visualize(image, results, print_results=False, fps=None):
@@ -71,12 +76,15 @@ def visualize(image, results, print_results=False, fps=None):
71
  return output
72
 
73
  if __name__ == '__main__':
 
 
 
74
  # Instantiate MPPalmDet
75
  model = MPPalmDet(modelPath=args.model,
76
  nmsThreshold=args.nms_threshold,
77
  scoreThreshold=args.score_threshold,
78
- backendId=args.backend,
79
- targetId=args.target)
80
 
81
  # If input is an image
82
  if args.input is not None:
@@ -123,4 +131,3 @@ if __name__ == '__main__':
123
  cv.imshow('MPPalmDet Demo', frame)
124
 
125
  tm.reset()
126
-
 
5
 
6
  from mp_palmdet import MPPalmDet
7
 
8
+ # Check OpenCV version
9
+ assert cv.__version__ >= "4.7.0", \
10
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
11
+
12
+ # Valid combinations of backends and targets
13
+ backend_target_pairs = [
14
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
15
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
16
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
17
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
18
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
19
+ ]
 
 
 
 
 
 
 
20
 
21
  parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe')
22
+ parser.add_argument('--input', '-i', type=str,
23
+ help='Usage: Set path to the input image. Omit for using default camera.')
24
+ parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2023feb.onnx',
25
+ help='Usage: Set model path, defaults to palm_detection_mediapipe_2023feb.onnx.')
26
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
27
+ help='''Choose one of the backend-target pair to run this demo:
28
+ {:d}: (default) OpenCV implementation + CPU,
29
+ {:d}: CUDA + GPU (CUDA),
30
+ {:d}: CUDA + GPU (CUDA FP16),
31
+ {:d}: TIM-VX + NPU,
32
+ {:d}: CANN + NPU
33
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
34
+ parser.add_argument('--score_threshold', type=float, default=0.8,
35
+ help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.8. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.')
36
+ parser.add_argument('--nms_threshold', type=float, default=0.3,
37
+ help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
38
+ parser.add_argument('--save', '-s', action='store_true',
39
+ help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
40
+ parser.add_argument('--vis', '-v', action='store_true',
41
+ help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
42
  args = parser.parse_args()
43
 
44
  def visualize(image, results, print_results=False, fps=None):
 
76
  return output
77
 
78
  if __name__ == '__main__':
79
+ backend_id = backend_target_pairs[args.backend_target][0]
80
+ target_id = backend_target_pairs[args.backend_target][1]
81
+
82
  # Instantiate MPPalmDet
83
  model = MPPalmDet(modelPath=args.model,
84
  nmsThreshold=args.nms_threshold,
85
  scoreThreshold=args.score_threshold,
86
+ backendId=backend_id,
87
+ targetId=target_id)
88
 
89
  # If input is an image
90
  if args.input is not None:
 
131
  cv.imshow('MPPalmDet Demo', frame)
132
 
133
  tm.reset()
 
models/palm_detection_mediapipe/mp_palmdet.py CHANGED
@@ -22,12 +22,10 @@ class MPPalmDet:
22
  def name(self):
23
  return self.__class__.__name__
24
 
25
- def setBackend(self, backendId):
26
  self.backend_id = backendId
27
- self.model.setPreferableBackend(self.backend_id)
28
-
29
- def setTarget(self, targetId):
30
  self.target_id = targetId
 
31
  self.model.setPreferableTarget(self.target_id)
32
 
33
  def _preprocess(self, image):
@@ -35,7 +33,7 @@ class MPPalmDet:
35
  ratio = min(self.input_size / image.shape[:2])
36
  if image.shape[0] != self.input_size[0] or image.shape[1] != self.input_size[1]:
37
  # keep aspect ratio when resize
38
- ratio_size = (np.array(image.shape[:2]) * ratio).astype(np.int)
39
  image = cv.resize(image, (ratio_size[1], ratio_size[0]))
40
  pad_h = self.input_size[0] - ratio_size[0]
41
  pad_w = self.input_size[1] - ratio_size[1]
@@ -46,7 +44,7 @@ class MPPalmDet:
46
  image = cv.copyMakeBorder(image, top, bottom, left, right, cv.BORDER_CONSTANT, None, (0, 0, 0))
47
  image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
48
  image = image.astype(np.float32) / 255.0 # norm
49
- pad_bias = (pad_bias / ratio).astype(np.int)
50
  return image[np.newaxis, :, :, :], pad_bias # hwc -> nhwc
51
 
52
  def infer(self, image):
 
22
  def name(self):
23
  return self.__class__.__name__
24
 
25
+ def setBackendAndTarget(self, backendId, targetId):
26
  self.backend_id = backendId
 
 
 
27
  self.target_id = targetId
28
+ self.model.setPreferableBackend(self.backend_id)
29
  self.model.setPreferableTarget(self.target_id)
30
 
31
  def _preprocess(self, image):
 
33
  ratio = min(self.input_size / image.shape[:2])
34
  if image.shape[0] != self.input_size[0] or image.shape[1] != self.input_size[1]:
35
  # keep aspect ratio when resize
36
+ ratio_size = (np.array(image.shape[:2]) * ratio).astype(np.int32)
37
  image = cv.resize(image, (ratio_size[1], ratio_size[0]))
38
  pad_h = self.input_size[0] - ratio_size[0]
39
  pad_w = self.input_size[1] - ratio_size[1]
 
44
  image = cv.copyMakeBorder(image, top, bottom, left, right, cv.BORDER_CONSTANT, None, (0, 0, 0))
45
  image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
46
  image = image.astype(np.float32) / 255.0 # norm
47
+ pad_bias = (pad_bias / ratio).astype(np.int32)
48
  return image[np.newaxis, :, :, :], pad_bias # hwc -> nhwc
49
 
50
  def infer(self, image):
models/person_reid_youtureid/demo.py CHANGED
@@ -12,36 +12,41 @@ import cv2 as cv
12
 
13
  from youtureid import YoutuReID
14
 
15
- def str2bool(v):
16
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
17
- return True
18
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
19
- return False
20
- else:
21
- raise NotImplementedError
22
-
23
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
24
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
25
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
26
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
27
- try:
28
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
29
- targets += [cv.dnn.DNN_TARGET_NPU]
30
- help_msg_backends += "; {:d}: TIMVX"
31
- help_msg_targets += "; {:d}: NPU"
32
- except:
33
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
34
 
35
  parser = argparse.ArgumentParser(
36
  description="ReID baseline models from Tencent Youtu Lab")
37
- parser.add_argument('--query_dir', '-q', type=str, help='Query directory.')
38
- parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.')
39
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
40
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
41
- parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.')
42
- parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.')
43
- parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
44
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 
 
 
 
 
 
 
 
 
 
 
 
45
  args = parser.parse_args()
46
 
47
  def readImageFromDirectory(img_dir, w=128, h=256):
@@ -78,8 +83,11 @@ def visualize(results, query_dir, gallery_dir, output_size=(128, 384)):
78
  return results_vis
79
 
80
  if __name__ == '__main__':
 
 
 
81
  # Instantiate YoutuReID for person ReID
82
- net = YoutuReID(modelPath=args.model, backendId=args.backend, targetId=args.target)
83
 
84
  # Read images from dir
85
  query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
 
12
 
13
  from youtureid import YoutuReID
14
 
15
+ # Check OpenCV version
16
+ assert cv.__version__ >= "4.7.0", \
17
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
18
+
19
+ # Valid combinations of backends and targets
20
+ backend_target_pairs = [
21
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
22
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
23
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
24
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
25
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
26
+ ]
 
 
 
 
 
 
 
27
 
28
  parser = argparse.ArgumentParser(
29
  description="ReID baseline models from Tencent Youtu Lab")
30
+ parser.add_argument('--query_dir', '-q', type=str,
31
+ help='Query directory.')
32
+ parser.add_argument('--gallery_dir', '-g', type=str,
33
+ help='Gallery directory.')
34
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
35
+ help='''Choose one of the backend-target pair to run this demo:
36
+ {:d}: (default) OpenCV implementation + CPU,
37
+ {:d}: CUDA + GPU (CUDA),
38
+ {:d}: CUDA + GPU (CUDA FP16),
39
+ {:d}: TIM-VX + NPU,
40
+ {:d}: CANN + NPU
41
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
42
+ parser.add_argument('--topk', type=int, default=10,
43
+ help='Top-K closest from gallery for each query.')
44
+ parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx',
45
+ help='Path to the model.')
46
+ parser.add_argument('--save', '-s', type=str2bool, default=False,
47
+ help='Set true to save results. This flag is invalid when using camera.')
48
+ parser.add_argument('--vis', '-v', type=str2bool, default=True,
49
+ help='Set true to open a window for result visualization. This flag is invalid when using camera.')
50
  args = parser.parse_args()
51
 
52
  def readImageFromDirectory(img_dir, w=128, h=256):
 
83
  return results_vis
84
 
85
  if __name__ == '__main__':
86
+ backend_id = backend_target_pairs[args.backend_target][0]
87
+ target_id = backend_target_pairs[args.backend_target][1]
88
+
89
  # Instantiate YoutuReID for person ReID
90
+ net = YoutuReID(modelPath=args.model, backendId=backend_id, targetId=target_id)
91
 
92
  # Read images from dir
93
  query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
models/person_reid_youtureid/youtureid.py CHANGED
@@ -26,12 +26,10 @@ class YoutuReID:
26
  def name(self):
27
  return self.__class__.__name__
28
 
29
- def setBackend(self, backend_id):
30
- self._backendId = backend_id
 
31
  self._model.setPreferableBackend(self._backendId)
32
-
33
- def setTarget(self, target_id):
34
- self._targetId = target_id
35
  self._model.setPreferableTarget(self._targetId)
36
 
37
  def _preprocess(self, image):
@@ -67,4 +65,3 @@ class YoutuReID:
67
  dist = np.matmul(query_arr, gallery_arr.T)
68
  idx = np.argsort(-dist, axis=1)
69
  return [i[0:topK] for i in idx]
70
-
 
26
  def name(self):
27
  return self.__class__.__name__
28
 
29
+ def setBackendAndTarget(self, backendId, targetId):
30
+ self._backendId = backendId
31
+ self._targetId = targetId
32
  self._model.setPreferableBackend(self._backendId)
 
 
 
33
  self._model.setPreferableTarget(self._targetId)
34
 
35
  def _preprocess(self, image):
 
65
  dist = np.matmul(query_arr, gallery_arr.T)
66
  idx = np.argsort(-dist, axis=1)
67
  return [i[0:topK] for i in idx]
 
models/qrcode_wechatqrcode/demo.py CHANGED
@@ -11,23 +11,43 @@ import cv2 as cv
11
 
12
  from wechatqrcode import WeChatQRCode
13
 
14
- def str2bool(v):
15
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
16
- return True
17
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
18
- return False
19
- else:
20
- raise NotImplementedError
 
 
 
 
 
21
 
22
  parser = argparse.ArgumentParser(
23
  description="WeChat QR code detector for detecting and parsing QR code (https://github.com/opencv/opencv_contrib/tree/master/modules/wechat_qrcode)")
24
- parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
25
- parser.add_argument('--detect_prototxt_path', type=str, default='detect_2021sep.prototxt', help='Usage: Set path to detect.prototxt.')
26
- parser.add_argument('--detect_model_path', type=str, default='detect_2021sep.caffemodel', help='Usage: Set path to detect.caffemodel.')
27
- parser.add_argument('--sr_prototxt_path', type=str, default='sr_2021sep.prototxt', help='Usage: Set path to sr.prototxt.')
28
- parser.add_argument('--sr_model_path', type=str, default='sr_2021sep.caffemodel', help='Usage: Set path to sr.caffemodel.')
29
- parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
30
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  args = parser.parse_args()
32
 
33
  def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255, 0), fps=None):
@@ -56,11 +76,16 @@ def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255,
56
 
57
 
58
  if __name__ == '__main__':
 
 
 
59
  # Instantiate WeChatQRCode
60
  model = WeChatQRCode(args.detect_prototxt_path,
61
  args.detect_model_path,
62
  args.sr_prototxt_path,
63
- args.sr_model_path)
 
 
64
 
65
  # If input is an image:
66
  if args.input is not None:
@@ -107,4 +132,4 @@ if __name__ == '__main__':
107
  # Visualize results in a new window
108
  cv.imshow('WeChatQRCode Demo', frame)
109
 
110
- tm.reset()
 
11
 
12
  from wechatqrcode import WeChatQRCode
13
 
14
+ # Check OpenCV version
15
+ assert cv.__version__ >= "4.7.0", \
16
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
17
+
18
+ # Valid combinations of backends and targets
19
+ backend_target_pairs = [
20
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
21
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
22
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
23
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
24
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
25
+ ]
26
 
27
  parser = argparse.ArgumentParser(
28
  description="WeChat QR code detector for detecting and parsing QR code (https://github.com/opencv/opencv_contrib/tree/master/modules/wechat_qrcode)")
29
+ parser.add_argument('--input', '-i', type=str,
30
+ help='Usage: Set path to the input image. Omit for using default camera.')
31
+ parser.add_argument('--detect_prototxt_path', type=str, default='detect_2021sep.prototxt',
32
+ help='Usage: Set path to detect.prototxt.')
33
+ parser.add_argument('--detect_model_path', type=str, default='detect_2021sep.caffemodel',
34
+ help='Usage: Set path to detect.caffemodel.')
35
+ parser.add_argument('--sr_prototxt_path', type=str, default='sr_2021sep.prototxt',
36
+ help='Usage: Set path to sr.prototxt.')
37
+ parser.add_argument('--sr_model_path', type=str, default='sr_2021sep.caffemodel',
38
+ help='Usage: Set path to sr.caffemodel.')
39
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
40
+ help='''Choose one of the backend-target pair to run this demo:
41
+ {:d}: (default) OpenCV implementation + CPU,
42
+ {:d}: CUDA + GPU (CUDA),
43
+ {:d}: CUDA + GPU (CUDA FP16),
44
+ {:d}: TIM-VX + NPU,
45
+ {:d}: CANN + NPU
46
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
47
+ parser.add_argument('--save', '-s', action='store_true',
48
+ help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
49
+ parser.add_argument('--vis', '-v', action='store_true',
50
+ help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
51
  args = parser.parse_args()
52
 
53
  def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255, 0), fps=None):
 
76
 
77
 
78
  if __name__ == '__main__':
79
+ backend_id = backend_target_pairs[args.backend_target][0]
80
+ target_id = backend_target_pairs[args.backend_target][1]
81
+
82
  # Instantiate WeChatQRCode
83
  model = WeChatQRCode(args.detect_prototxt_path,
84
  args.detect_model_path,
85
  args.sr_prototxt_path,
86
+ args.sr_model_path,
87
+ backendId=backend_id,
88
+ targetId=target_id)
89
 
90
  # If input is an image:
91
  if args.input is not None:
 
132
  # Visualize results in a new window
133
  cv.imshow('WeChatQRCode Demo', frame)
134
 
135
+ tm.reset()
models/qrcode_wechatqrcode/wechatqrcode.py CHANGED
@@ -8,27 +8,27 @@ import numpy as np
8
  import cv2 as cv # needs to have cv.wechat_qrcode_WeChatQRCode, which requires compile from source with opencv_contrib/modules/wechat_qrcode
9
 
10
  class WeChatQRCode:
11
- def __init__(self, detect_prototxt_path, detect_model_path, sr_prototxt_path, sr_model_path):
12
  self._model = cv.wechat_qrcode_WeChatQRCode(
13
  detect_prototxt_path,
14
  detect_model_path,
15
  sr_prototxt_path,
16
  sr_model_path
17
  )
 
 
 
 
18
 
19
  @property
20
  def name(self):
21
  return self.__class__.__name__
22
 
23
- def setBackend(self, backend_id):
24
- # self._model.setPreferableBackend(backend_id)
25
- if backend_id != 0:
26
- raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
27
-
28
- def setTarget(self, target_id):
29
- # self._model.setPreferableTarget(target_id)
30
- if target_id != 0:
31
  raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
32
 
33
  def infer(self, image):
34
- return self._model.detectAndDecode(image)
 
8
  import cv2 as cv # needs to have cv.wechat_qrcode_WeChatQRCode, which requires compile from source with opencv_contrib/modules/wechat_qrcode
9
 
10
  class WeChatQRCode:
11
+ def __init__(self, detect_prototxt_path, detect_model_path, sr_prototxt_path, sr_model_path, backendId=0, targetId=0):
12
  self._model = cv.wechat_qrcode_WeChatQRCode(
13
  detect_prototxt_path,
14
  detect_model_path,
15
  sr_prototxt_path,
16
  sr_model_path
17
  )
18
+ if backendId != 0:
19
+ raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()".format(backendId))
20
+ if targetId != 0:
21
+ raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
22
 
23
  @property
24
  def name(self):
25
  return self.__class__.__name__
26
 
27
+ def setBackendAndTarget(self, backendId, targetId):
28
+ if backendId != 0:
29
+ raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()".format(backendId))
30
+ if targetId != 0:
 
 
 
 
31
  raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
32
 
33
  def infer(self, image):
34
+ return self._model.detectAndDecode(image)
models/text_detection_db/db.py CHANGED
@@ -38,12 +38,10 @@ class DB:
38
  def name(self):
39
  return self.__class__.__name__
40
 
41
- def setBackend(self, backend):
42
- self._backendId = backend
 
43
  self._model.setPreferableBackend(self._backendId)
44
-
45
- def setTarget(self, target):
46
- self._targetId = target
47
  self._model.setPreferableTarget(self._targetId)
48
 
49
  def setInputSize(self, input_size):
@@ -55,4 +53,3 @@ class DB:
55
  assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
56
 
57
  return self._model.detect(image)
58
-
 
38
  def name(self):
39
  return self.__class__.__name__
40
 
41
+ def setBackendAndTarget(self, backendId, targetId):
42
+ self._backendId = backendId
43
+ self._targetId = targetId
44
  self._model.setPreferableBackend(self._backendId)
 
 
 
45
  self._model.setPreferableTarget(self._targetId)
46
 
47
  def setInputSize(self, input_size):
 
53
  assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
54
 
55
  return self._model.detect(image)
 
models/text_detection_db/demo.py CHANGED
@@ -11,41 +11,48 @@ import cv2 as cv
11
 
12
  from db import DB
13
 
14
- def str2bool(v):
15
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
16
- return True
17
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
18
- return False
19
- else:
20
- raise NotImplementedError
21
-
22
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
23
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
24
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
25
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
26
- try:
27
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
28
- targets += [cv.dnn.DNN_TARGET_NPU]
29
- help_msg_backends += "; {:d}: TIMVX"
30
- help_msg_targets += "; {:d}: NPU"
31
- except:
32
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
33
 
34
  parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
35
- parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
36
- parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.')
37
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
38
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 
 
 
 
 
 
 
 
39
  parser.add_argument('--width', type=int, default=736,
40
  help='Usage: Resize input image to certain width, default = 736. It should be multiple by 32.')
41
  parser.add_argument('--height', type=int, default=736,
42
  help='Usage: Resize input image to certain height, default = 736. It should be multiple by 32.')
43
- parser.add_argument('--binary_threshold', type=float, default=0.3, help='Usage: Threshold of the binary map, default = 0.3.')
44
- parser.add_argument('--polygon_threshold', type=float, default=0.5, help='Usage: Threshold of polygons, default = 0.5.')
45
- parser.add_argument('--max_candidates', type=int, default=200, help='Usage: Set maximum number of polygon candidates, default = 200.')
46
- parser.add_argument('--unclip_ratio', type=np.float64, default=2.0, help=' Usage: The unclip ratio of the detected text region, which determines the output size, default = 2.0.')
47
- parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
48
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 
 
 
 
 
 
49
  args = parser.parse_args()
50
 
51
  def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None):
@@ -60,6 +67,9 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isC
60
  return output
61
 
62
  if __name__ == '__main__':
 
 
 
63
  # Instantiate DB
64
  model = DB(modelPath=args.model,
65
  inputSize=[args.width, args.height],
@@ -67,9 +77,8 @@ if __name__ == '__main__':
67
  polygonThreshold=args.polygon_threshold,
68
  maxCandidates=args.max_candidates,
69
  unclipRatio=args.unclip_ratio,
70
- backendId=args.backend,
71
- targetId=args.target
72
- )
73
 
74
  # If input is an image
75
  if args.input is not None:
@@ -143,4 +152,3 @@ if __name__ == '__main__':
143
  cv.imshow('{} Demo'.format(model.name), original_image)
144
 
145
  tm.reset()
146
-
 
11
 
12
  from db import DB
13
 
14
+ # Check OpenCV version
15
+ assert cv.__version__ >= "4.7.0", \
16
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
17
+
18
+ # Valid combinations of backends and targets
19
+ backend_target_pairs = [
20
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
21
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
22
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
23
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
24
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
25
+ ]
 
 
 
 
 
 
 
26
 
27
  parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
28
+ parser.add_argument('--input', '-i', type=str,
29
+ help='Usage: Set path to the input image. Omit for using default camera.')
30
+ parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx',
31
+ help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.')
32
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
33
+ help='''Choose one of the backend-target pair to run this demo:
34
+ {:d}: (default) OpenCV implementation + CPU,
35
+ {:d}: CUDA + GPU (CUDA),
36
+ {:d}: CUDA + GPU (CUDA FP16),
37
+ {:d}: TIM-VX + NPU,
38
+ {:d}: CANN + NPU
39
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
40
  parser.add_argument('--width', type=int, default=736,
41
  help='Usage: Resize input image to certain width, default = 736. It should be multiple by 32.')
42
  parser.add_argument('--height', type=int, default=736,
43
  help='Usage: Resize input image to certain height, default = 736. It should be multiple by 32.')
44
+ parser.add_argument('--binary_threshold', type=float, default=0.3,
45
+ help='Usage: Threshold of the binary map, default = 0.3.')
46
+ parser.add_argument('--polygon_threshold', type=float, default=0.5,
47
+ help='Usage: Threshold of polygons, default = 0.5.')
48
+ parser.add_argument('--max_candidates', type=int, default=200,
49
+ help='Usage: Set maximum number of polygon candidates, default = 200.')
50
+ parser.add_argument('--unclip_ratio', type=np.float64, default=2.0,
51
+ help=' Usage: The unclip ratio of the detected text region, which determines the output size, default = 2.0.')
52
+ parser.add_argument('--save', '-s', action='store_true',
53
+ help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
54
+ parser.add_argument('--vis', '-v', action='store_true',
55
+ help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
56
  args = parser.parse_args()
57
 
58
  def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None):
 
67
  return output
68
 
69
  if __name__ == '__main__':
70
+ backend_id = backend_target_pairs[args.backend_target][0]
71
+ target_id = backend_target_pairs[args.backend_target][1]
72
+
73
  # Instantiate DB
74
  model = DB(modelPath=args.model,
75
  inputSize=[args.width, args.height],
 
77
  polygonThreshold=args.polygon_threshold,
78
  maxCandidates=args.max_candidates,
79
  unclipRatio=args.unclip_ratio,
80
+ backendId=backend_id,
81
+ targetId=target_id)
 
82
 
83
  # If input is an image
84
  if args.input is not None:
 
152
  cv.imshow('{} Demo'.format(model.name), original_image)
153
 
154
  tm.reset()
 
models/text_recognition_crnn/crnn.py CHANGED
@@ -43,12 +43,10 @@ class CRNN:
43
  def _load_charset(self, charset):
44
  return ''.join(charset.splitlines())
45
 
46
- def setBackend(self, backend_id):
47
- self._backendId = backend_id
 
48
  self._model.setPreferableBackend(self._backendId)
49
-
50
- def setTarget(self, target_id):
51
- self._targetId = target_id
52
  self._model.setPreferableTarget(self._targetId)
53
 
54
  def _preprocess(self, image, rbbox):
 
43
  def _load_charset(self, charset):
44
  return ''.join(charset.splitlines())
45
 
46
+ def setBackendAndTarget(self, backendId, targetId):
47
+ self._backendId = backendId
48
+ self._targetId = targetId
49
  self._model.setPreferableBackend(self._backendId)
 
 
 
50
  self._model.setPreferableTarget(self._targetId)
51
 
52
  def _preprocess(self, image, rbbox):
models/text_recognition_crnn/demo.py CHANGED
@@ -15,38 +15,41 @@ from crnn import CRNN
15
  sys.path.append('../text_detection_db')
16
  from db import DB
17
 
18
- def str2bool(v):
19
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
20
- return True
21
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
22
- return False
23
- else:
24
- raise NotImplementedError
25
-
26
- backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
27
- targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
28
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
29
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
30
- try:
31
- backends += [cv.dnn.DNN_BACKEND_TIMVX]
32
- targets += [cv.dnn.DNN_TARGET_NPU]
33
- help_msg_backends += "; {:d}: TIMVX"
34
- help_msg_targets += "; {:d}: NPU"
35
- except:
36
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
37
 
38
  parser = argparse.ArgumentParser(
39
  description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
40
- parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
41
- parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.')
42
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
43
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
44
- parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.')
45
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
 
 
 
 
 
 
46
  parser.add_argument('--width', type=int, default=736,
47
  help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
48
  parser.add_argument('--height', type=int, default=736,
49
  help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
 
 
 
 
50
  args = parser.parse_args()
51
 
52
  def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
@@ -59,8 +62,9 @@ def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2
59
  return output
60
 
61
  if __name__ == '__main__':
62
- # Instantiate CRNN for text recognition
63
- recognizer = CRNN(modelPath=args.model)
 
64
  # Instantiate DB for text detection
65
  detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
66
  inputSize=[args.width, args.height],
@@ -68,9 +72,10 @@ if __name__ == '__main__':
68
  polygonThreshold=0.5,
69
  maxCandidates=200,
70
  unclipRatio=2.0,
71
- backendId=args.backend,
72
- targetId=args.target
73
- )
 
74
 
75
  # If input is an image
76
  if args.input is not None:
@@ -161,4 +166,3 @@ if __name__ == '__main__':
161
 
162
  # Visualize results in a new Window
163
  cv.imshow('{} Demo'.format(recognizer.name), original_image)
164
-
 
15
  sys.path.append('../text_detection_db')
16
  from db import DB
17
 
18
+ # Check OpenCV version
19
+ assert cv.__version__ >= "4.7.0", \
20
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
21
+
22
+ # Valid combinations of backends and targets
23
+ backend_target_pairs = [
24
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
25
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
26
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
27
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
28
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
29
+ ]
 
 
 
 
 
 
 
30
 
31
  parser = argparse.ArgumentParser(
32
  description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
33
+ parser.add_argument('--input', '-i', type=str,
34
+ help='Usage: Set path to the input image. Omit for using default camera.')
35
+ parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx',
36
+ help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.')
37
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
38
+ help='''Choose one of the backend-target pair to run this demo:
39
+ {:d}: (default) OpenCV implementation + CPU,
40
+ {:d}: CUDA + GPU (CUDA),
41
+ {:d}: CUDA + GPU (CUDA FP16),
42
+ {:d}: TIM-VX + NPU,
43
+ {:d}: CANN + NPU
44
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
45
  parser.add_argument('--width', type=int, default=736,
46
  help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
47
  parser.add_argument('--height', type=int, default=736,
48
  help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
49
+ parser.add_argument('--save', '-s', action='store_true',
50
+ help='Usage: Specify to save a file with results. Invalid in case of camera input.')
51
+ parser.add_argument('--vis', '-v', action='store_true',
52
+ help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
53
  args = parser.parse_args()
54
 
55
  def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
 
62
  return output
63
 
64
  if __name__ == '__main__':
65
+ backend_id = backend_target_pairs[args.backend_target][0]
66
+ target_id = backend_target_pairs[args.backend_target][1]
67
+
68
  # Instantiate DB for text detection
69
  detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
70
  inputSize=[args.width, args.height],
 
72
  polygonThreshold=0.5,
73
  maxCandidates=200,
74
  unclipRatio=2.0,
75
+ backendId=backend_id,
76
+ targetId=target_id)
77
+ # Instantiate CRNN for text recognition
78
+ recognizer = CRNN(modelPath=args.model, backendId=backend_id, targetId=target_id)
79
 
80
  # If input is an image
81
  if args.input is not None:
 
166
 
167
  # Visualize results in a new Window
168
  cv.imshow('{} Demo'.format(recognizer.name), original_image)