ytfeng commited on
Commit
ec17a5b
·
1 Parent(s): de9c40f

Add options for demo scripts to select backend & targets (#43)

Browse files

* add options for selecting backend & targets

* add eol

models/face_detection_yunet/demo.py CHANGED
@@ -19,9 +19,23 @@ def str2bool(v):
19
  else:
20
  raise NotImplementedError
21
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
23
  parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
24
  parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the model.')
 
 
25
  parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')
26
  parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
27
  parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
@@ -61,7 +75,9 @@ if __name__ == '__main__':
61
  inputSize=[320, 320],
62
  confThreshold=args.conf_threshold,
63
  nmsThreshold=args.nms_threshold,
64
- topK=args.top_k)
 
 
65
 
66
  # If input is an image
67
  if args.input is not None:
@@ -117,4 +133,5 @@ if __name__ == '__main__':
117
  # Visualize results in a new Window
118
  cv.imshow('YuNet Demo', frame)
119
 
120
- tm.reset()
 
 
19
  else:
20
  raise NotImplementedError
21
 
22
+ backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
23
+ targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
24
+ help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
25
+ help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
26
+ try:
27
+ backends += [cv.dnn.DNN_BACKEND_TIMVX]
28
+ targets += [cv.dnn.DNN_TARGET_NPU]
29
+ help_msg_backends += "; {:d}: TIMVX"
30
+ help_msg_targets += "; {:d}: NPU"
31
+ except:
32
+ print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
33
+
34
  parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
35
  parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
36
  parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the model.')
37
+ parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
38
+ parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
39
  parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')
40
  parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
41
  parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
 
75
  inputSize=[320, 320],
76
  confThreshold=args.conf_threshold,
77
  nmsThreshold=args.nms_threshold,
78
+ topK=args.top_k,
79
+ backendId=args.backend,
80
+ targetId=args.target)
81
 
82
  # If input is an image
83
  if args.input is not None:
 
133
  # Visualize results in a new Window
134
  cv.imshow('YuNet Demo', frame)
135
 
136
+ tm.reset()
137
+
models/face_detection_yunet/yunet.py CHANGED
@@ -63,4 +63,5 @@ class YuNet:
63
  def infer(self, image):
64
  # Forward
65
  faces = self._model.detect(image)
66
- return faces[1]
 
 
63
  def infer(self, image):
64
  # Forward
65
  faces = self._model.detect(image)
66
+ return faces[1]
67
+
models/face_recognition_sface/demo.py CHANGED
@@ -23,11 +23,25 @@ def str2bool(v):
23
  else:
24
  raise NotImplementedError
25
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  parser = argparse.ArgumentParser(
27
  description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
28
  parser.add_argument('--input1', '-i1', type=str, help='Path to the input image 1.')
29
  parser.add_argument('--input2', '-i2', type=str, help='Path to the input image 2.')
30
  parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the model.')
 
 
31
  parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Distance type. \'0\': cosine, \'1\': norm_l1.')
32
  parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
33
  parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
@@ -35,13 +49,15 @@ args = parser.parse_args()
35
 
36
  if __name__ == '__main__':
37
  # Instantiate SFace for face recognition
38
- recognizer = SFace(modelPath=args.model, disType=args.dis_type)
39
  # Instantiate YuNet for face detection
40
  detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2021dec.onnx',
41
  inputSize=[320, 320],
42
  confThreshold=0.9,
43
  nmsThreshold=0.3,
44
- topK=5000)
 
 
45
 
46
  img1 = cv.imread(args.input1)
47
  img2 = cv.imread(args.input2)
@@ -56,4 +72,5 @@ if __name__ == '__main__':
56
 
57
  # Match
58
  result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
59
- print('Result: {}.'.format('same identity' if result else 'different identities'))
 
 
23
  else:
24
  raise NotImplementedError
25
 
26
+ backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
27
+ targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
28
+ help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
29
+ help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
30
+ try:
31
+ backends += [cv.dnn.DNN_BACKEND_TIMVX]
32
+ targets += [cv.dnn.DNN_TARGET_NPU]
33
+ help_msg_backends += "; {:d}: TIMVX"
34
+ help_msg_targets += "; {:d}: NPU"
35
+ except:
36
+ print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
37
+
38
  parser = argparse.ArgumentParser(
39
  description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
40
  parser.add_argument('--input1', '-i1', type=str, help='Path to the input image 1.')
41
  parser.add_argument('--input2', '-i2', type=str, help='Path to the input image 2.')
42
  parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the model.')
43
+ parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
44
+ parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
45
  parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Distance type. \'0\': cosine, \'1\': norm_l1.')
46
  parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
47
  parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 
49
 
50
  if __name__ == '__main__':
51
  # Instantiate SFace for face recognition
52
+ recognizer = SFace(modelPath=args.model, disType=args.dis_type, backendId=args.backend, targetId=args.target)
53
  # Instantiate YuNet for face detection
54
  detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2021dec.onnx',
55
  inputSize=[320, 320],
56
  confThreshold=0.9,
57
  nmsThreshold=0.3,
58
+ topK=5000,
59
+ backendId=args.backend,
60
+ targetId=args.target)
61
 
62
  img1 = cv.imread(args.input1)
63
  img2 = cv.imread(args.input2)
 
72
 
73
  # Match
74
  result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
75
+ print('Result: {}.'.format('same identity' if result else 'different identities'))
76
+
models/human_segmentation_pphumanseg/demo.py CHANGED
@@ -19,9 +19,23 @@ def str2bool(v):
19
  else:
20
  raise NotImplementedError
21
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
23
  parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
24
  parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2021oct.onnx', help='Path to the model.')
 
 
25
  parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
26
  parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
27
  args = parser.parse_args()
@@ -84,7 +98,7 @@ def visualize(image, result, weight=0.6, fps=None):
84
 
85
  if __name__ == '__main__':
86
  # Instantiate PPHumanSeg
87
- model = PPHumanSeg(modelPath=args.model)
88
 
89
  if args.input is not None:
90
  # Read image and resize to 192x192
@@ -138,4 +152,5 @@ if __name__ == '__main__':
138
  # Visualize results in a new window
139
  cv.imshow('PPHumanSeg Demo', frame)
140
 
141
- tm.reset()
 
 
19
  else:
20
  raise NotImplementedError
21
 
22
+ backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
23
+ targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
24
+ help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
25
+ help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
26
+ try:
27
+ backends += [cv.dnn.DNN_BACKEND_TIMVX]
28
+ targets += [cv.dnn.DNN_TARGET_NPU]
29
+ help_msg_backends += "; {:d}: TIMVX"
30
+ help_msg_targets += "; {:d}: NPU"
31
+ except:
32
+ print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
33
+
34
  parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
35
  parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
36
  parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2021oct.onnx', help='Path to the model.')
37
+ parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
38
+ parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
39
  parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
40
  parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
41
  args = parser.parse_args()
 
98
 
99
  if __name__ == '__main__':
100
  # Instantiate PPHumanSeg
101
+ model = PPHumanSeg(modelPath=args.model, backendId=args.backend, targetId=args.target)
102
 
103
  if args.input is not None:
104
  # Read image and resize to 192x192
 
152
  # Visualize results in a new window
153
  cv.imshow('PPHumanSeg Demo', frame)
154
 
155
+ tm.reset()
156
+
models/human_segmentation_pphumanseg/pphumanseg.py CHANGED
@@ -8,9 +8,14 @@ import numpy as np
8
  import cv2 as cv
9
 
10
  class PPHumanSeg:
11
- def __init__(self, modelPath):
12
  self._modelPath = modelPath
 
 
 
13
  self._model = cv.dnn.readNet(self._modelPath)
 
 
14
 
15
  self._inputNames = ''
16
  self._outputNames = ['save_infer_model/scale_0.tmp_1']
@@ -23,10 +28,12 @@ class PPHumanSeg:
23
  return self.__class__.__name__
24
 
25
  def setBackend(self, backend_id):
26
- self._model.setPreferableBackend(backend_id)
 
27
 
28
  def setTarget(self, target_id):
29
- self._model.setPreferableTarget(target_id)
 
30
 
31
  def _preprocess(self, image):
32
  image = image.astype(np.float32, copy=False) / 255.0
@@ -52,4 +59,5 @@ class PPHumanSeg:
52
 
53
  def _postprocess(self, outputBlob):
54
  result = np.argmax(outputBlob[0], axis=1).astype(np.uint8)
55
- return result
 
 
8
  import cv2 as cv
9
 
10
  class PPHumanSeg:
11
+ def __init__(self, modelPath, backendId=0, targetId=0):
12
  self._modelPath = modelPath
13
+ self._backendId = backendId
14
+ self._targetId = targetId
15
+
16
  self._model = cv.dnn.readNet(self._modelPath)
17
+ self._model.setPreferableBackend(self._backendId)
18
+ self._model.setPreferableTarget(self._targetId)
19
 
20
  self._inputNames = ''
21
  self._outputNames = ['save_infer_model/scale_0.tmp_1']
 
28
  return self.__class__.__name__
29
 
30
  def setBackend(self, backend_id):
31
+ self._backendId = backend_id
32
+ self._model.setPreferableBackend(self._backendId)
33
 
34
  def setTarget(self, target_id):
35
+ self._targetId = target_id
36
+ self._model.setPreferableTarget(self._targetId)
37
 
38
  def _preprocess(self, image):
39
  image = image.astype(np.float32, copy=False) / 255.0
 
59
 
60
  def _postprocess(self, outputBlob):
61
  result = np.argmax(outputBlob[0], axis=1).astype(np.uint8)
62
+ return result
63
+
models/image_classification_ppresnet/demo.py CHANGED
@@ -19,15 +19,29 @@ def str2bool(v):
19
  else:
20
  raise NotImplementedError
21
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
23
  parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
24
  parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Path to the model.')
 
 
25
  parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Path to the dataset labels.')
26
  args = parser.parse_args()
27
 
28
  if __name__ == '__main__':
29
  # Instantiate ResNet
30
- model = PPResNet(modelPath=args.model, labelPath=args.label)
31
 
32
  # Read image and get a 224x224 crop from a 256x256 resized
33
  image = cv.imread(args.input)
@@ -40,3 +54,4 @@ if __name__ == '__main__':
40
 
41
  # Print result
42
  print('label: {}'.format(result))
 
 
19
  else:
20
  raise NotImplementedError
21
 
22
+ backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
23
+ targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
24
+ help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
25
+ help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
26
+ try:
27
+ backends += [cv.dnn.DNN_BACKEND_TIMVX]
28
+ targets += [cv.dnn.DNN_TARGET_NPU]
29
+ help_msg_backends += "; {:d}: TIMVX"
30
+ help_msg_targets += "; {:d}: NPU"
31
+ except:
32
+ print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
33
+
34
  parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
35
  parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
36
  parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Path to the model.')
37
+ parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
38
+ parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
39
  parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Path to the dataset labels.')
40
  args = parser.parse_args()
41
 
42
  if __name__ == '__main__':
43
  # Instantiate ResNet
44
+ model = PPResNet(modelPath=args.model, labelPath=args.label, backendId=args.backend, targetId=args.target)
45
 
46
  # Read image and get a 224x224 crop from a 256x256 resized
47
  image = cv.imread(args.input)
 
54
 
55
  # Print result
56
  print('label: {}'.format(result))
57
+
models/image_classification_ppresnet/ppresnet.py CHANGED
@@ -9,10 +9,15 @@ import numpy as np
9
  import cv2 as cv
10
 
11
  class PPResNet:
12
- def __init__(self, modelPath, labelPath):
13
  self._modelPath = modelPath
14
- self._model = cv.dnn.readNet(self._modelPath)
15
  self._labelPath = labelPath
 
 
 
 
 
 
16
 
17
  self._inputNames = ''
18
  self._outputNames = ['save_infer_model/scale_0.tmp_0']
@@ -35,10 +40,12 @@ class PPResNet:
35
  return self.__class__.__name__
36
 
37
  def setBackend(self, backend_id):
38
- self._model.setPreferableBackend(backend_id)
 
39
 
40
  def setTarget(self, target_id):
41
- self._model.setPreferableTarget(target_id)
 
42
 
43
  def _preprocess(self, image):
44
  image = image.astype(np.float32, copy=False) / 255.0
@@ -64,4 +71,5 @@ class PPResNet:
64
 
65
  def _postprocess(self, outputBlob):
66
  class_id = np.argmax(outputBlob[0])
67
- return self._labels[class_id]
 
 
9
  import cv2 as cv
10
 
11
  class PPResNet:
12
+ def __init__(self, modelPath, labelPath, backendId=0, targetId=0):
13
  self._modelPath = modelPath
 
14
  self._labelPath = labelPath
15
+ self._backendId = backendId
16
+ self._targetId = targetId
17
+
18
+ self._model = cv.dnn.readNet(self._modelPath)
19
+ self._model.setPreferableBackend(self._backendId)
20
+ self._model.setPreferableTarget(self._targetId)
21
 
22
  self._inputNames = ''
23
  self._outputNames = ['save_infer_model/scale_0.tmp_0']
 
40
  return self.__class__.__name__
41
 
42
  def setBackend(self, backend_id):
43
+ self._backendId = backend_id
44
+ self._model.setPreferableBackend(self._backendId)
45
 
46
  def setTarget(self, target_id):
47
+ self._targetId = target_id
48
+ self._model.setPreferableTarget(self._targetId)
49
 
50
  def _preprocess(self, image):
51
  image = image.astype(np.float32, copy=False) / 255.0
 
71
 
72
  def _postprocess(self, outputBlob):
73
  class_id = np.argmax(outputBlob[0])
74
+ return self._labels[class_id]
75
+
models/person_reid_youtureid/demo.py CHANGED
@@ -20,10 +20,24 @@ def str2bool(v):
20
  else:
21
  raise NotImplementedError
22
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  parser = argparse.ArgumentParser(
24
  description="ReID baseline models from Tencent Youtu Lab")
25
  parser.add_argument('--query_dir', '-q', type=str, help='Query directory.')
26
  parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.')
 
 
27
  parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.')
28
  parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.')
29
  parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
@@ -65,7 +79,7 @@ def visualize(results, query_dir, gallery_dir, output_size=(128, 384)):
65
 
66
  if __name__ == '__main__':
67
  # Instantiate YoutuReID for person ReID
68
- net = YoutuReID(modelPath=args.model)
69
 
70
  # Read images from dir
71
  query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
@@ -97,4 +111,5 @@ if __name__ == '__main__':
97
  cv.namedWindow('result-{}'.format(f), cv.WINDOW_AUTOSIZE)
98
  cv.imshow('result-{}'.format(f), img)
99
  cv.waitKey(0)
100
- cv.destroyAllWindows()
 
 
20
  else:
21
  raise NotImplementedError
22
 
23
+ backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
24
+ targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
25
+ help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
26
+ help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
27
+ try:
28
+ backends += [cv.dnn.DNN_BACKEND_TIMVX]
29
+ targets += [cv.dnn.DNN_TARGET_NPU]
30
+ help_msg_backends += "; {:d}: TIMVX"
31
+ help_msg_targets += "; {:d}: NPU"
32
+ except:
33
+ print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
34
+
35
  parser = argparse.ArgumentParser(
36
  description="ReID baseline models from Tencent Youtu Lab")
37
  parser.add_argument('--query_dir', '-q', type=str, help='Query directory.')
38
  parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.')
39
+ parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
40
+ parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
41
  parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.')
42
  parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.')
43
  parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
 
79
 
80
  if __name__ == '__main__':
81
  # Instantiate YoutuReID for person ReID
82
+ net = YoutuReID(modelPath=args.model, backendId=args.backend, targetId=args.target)
83
 
84
  # Read images from dir
85
  query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
 
111
  cv.namedWindow('result-{}'.format(f), cv.WINDOW_AUTOSIZE)
112
  cv.imshow('result-{}'.format(f), img)
113
  cv.waitKey(0)
114
+ cv.destroyAllWindows()
115
+
models/person_reid_youtureid/youtureid.py CHANGED
@@ -8,8 +8,15 @@ import numpy as np
8
  import cv2 as cv
9
 
10
  class YoutuReID:
11
- def __init__(self, modelPath):
 
 
 
 
12
  self._model = cv.dnn.readNet(modelPath)
 
 
 
13
  self._input_size = (128, 256) # fixed
14
  self._output_dim = 768
15
  self._mean = (0.485, 0.456, 0.406)
@@ -20,10 +27,12 @@ class YoutuReID:
20
  return self.__class__.__name__
21
 
22
  def setBackend(self, backend_id):
23
- self._model.setPreferableBackend(backend_id)
 
24
 
25
  def setTarget(self, target_id):
26
- self._model.setPreferableTarget(target_id)
 
27
 
28
  def _preprocess(self, image):
29
  image = image[:, :, ::-1]
@@ -57,4 +66,5 @@ class YoutuReID:
57
 
58
  dist = np.matmul(query_arr, gallery_arr.T)
59
  idx = np.argsort(-dist, axis=1)
60
- return [i[0:topK] for i in idx]
 
 
8
  import cv2 as cv
9
 
10
  class YoutuReID:
11
+ def __init__(self, modelPath, backendId=0, targetId=0):
12
+ self._modelPath = modelPath
13
+ self._backendId = backendId
14
+ self._targetId = targetId
15
+
16
  self._model = cv.dnn.readNet(modelPath)
17
+ self._model.setPreferableBackend(self._backendId)
18
+ self._model.setPreferableTarget(self._targetId)
19
+
20
  self._input_size = (128, 256) # fixed
21
  self._output_dim = 768
22
  self._mean = (0.485, 0.456, 0.406)
 
27
  return self.__class__.__name__
28
 
29
  def setBackend(self, backend_id):
30
+ self._backendId = backend_id
31
+ self._model.setPreferableBackend(self._backendId)
32
 
33
  def setTarget(self, target_id):
34
+ self._targetId = target_id
35
+ self._model.setPreferableTarget(self._targetId)
36
 
37
  def _preprocess(self, image):
38
  image = image[:, :, ::-1]
 
66
 
67
  dist = np.matmul(query_arr, gallery_arr.T)
68
  idx = np.argsort(-dist, axis=1)
69
+ return [i[0:topK] for i in idx]
70
+
models/text_detection_db/db.py CHANGED
@@ -8,7 +8,7 @@ import numpy as np
8
  import cv2 as cv
9
 
10
  class DB:
11
- def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0):
12
  self._modelPath = modelPath
13
  self._model = cv.dnn_TextDetectionModel_DB(
14
  cv.dnn.readNet(self._modelPath)
@@ -21,6 +21,11 @@ class DB:
21
  self._polygonThreshold = polygonThreshold
22
  self._maxCandidates = maxCandidates
23
  self._unclipRatio = unclipRatio
 
 
 
 
 
24
 
25
  self._model.setBinaryThreshold(self._binaryThreshold)
26
  self._model.setPolygonThreshold(self._polygonThreshold)
@@ -34,10 +39,12 @@ class DB:
34
  return self.__class__.__name__
35
 
36
  def setBackend(self, backend):
37
- self._model.setPreferableBackend(backend)
 
38
 
39
  def setTarget(self, target):
40
- self._model.setPreferableTarget(target)
 
41
 
42
  def setInputSize(self, input_size):
43
  self._inputSize = tuple(input_size)
@@ -47,4 +54,5 @@ class DB:
47
  assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
48
  assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
49
 
50
- return self._model.detect(image)
 
 
8
  import cv2 as cv
9
 
10
  class DB:
11
+ def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0, backendId=0, targetId=0):
12
  self._modelPath = modelPath
13
  self._model = cv.dnn_TextDetectionModel_DB(
14
  cv.dnn.readNet(self._modelPath)
 
21
  self._polygonThreshold = polygonThreshold
22
  self._maxCandidates = maxCandidates
23
  self._unclipRatio = unclipRatio
24
+ self._backendId = backendId
25
+ self._targetId = targetId
26
+
27
+ self._model.setPreferableBackend(self._backendId)
28
+ self._model.setPreferableTarget(self._targetId)
29
 
30
  self._model.setBinaryThreshold(self._binaryThreshold)
31
  self._model.setPolygonThreshold(self._polygonThreshold)
 
39
  return self.__class__.__name__
40
 
41
  def setBackend(self, backend):
42
+ self._backendId = backend
43
+ self._model.setPreferableBackend(self._backendId)
44
 
45
  def setTarget(self, target):
46
+ self._targetId = target
47
+ self._model.setPreferableTarget(self._targetId)
48
 
49
  def setInputSize(self, input_size):
50
  self._inputSize = tuple(input_size)
 
54
  assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
55
  assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
56
 
57
+ return self._model.detect(image)
58
+
models/text_detection_db/demo.py CHANGED
@@ -19,9 +19,23 @@ def str2bool(v):
19
  else:
20
  raise NotImplementedError
21
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
23
  parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
24
  parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Path to the model.')
 
 
25
  parser.add_argument('--width', type=int, default=736,
26
  help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
27
  parser.add_argument('--height', type=int, default=736,
@@ -53,6 +67,8 @@ if __name__ == '__main__':
53
  polygonThreshold=args.polygon_threshold,
54
  maxCandidates=args.max_candidates,
55
  unclipRatio=args.unclip_ratio
 
 
56
  )
57
 
58
  # If input is an image
@@ -104,4 +120,5 @@ if __name__ == '__main__':
104
  # Visualize results in a new Window
105
  cv.imshow('{} Demo'.format(model.name), frame)
106
 
107
- tm.reset()
 
 
19
  else:
20
  raise NotImplementedError
21
 
22
+ backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
23
+ targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
24
+ help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
25
+ help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
26
+ try:
27
+ backends += [cv.dnn.DNN_BACKEND_TIMVX]
28
+ targets += [cv.dnn.DNN_TARGET_NPU]
29
+ help_msg_backends += "; {:d}: TIMVX"
30
+ help_msg_targets += "; {:d}: NPU"
31
+ except:
32
+ print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
33
+
34
  parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
35
  parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
36
  parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Path to the model.')
37
+ parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
38
+ parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
39
  parser.add_argument('--width', type=int, default=736,
40
  help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
41
  parser.add_argument('--height', type=int, default=736,
 
67
  polygonThreshold=args.polygon_threshold,
68
  maxCandidates=args.max_candidates,
69
  unclipRatio=args.unclip_ratio
70
+ backendId=args.backend,
71
+ targetId=args.target
72
  )
73
 
74
  # If input is an image
 
120
  # Visualize results in a new Window
121
  cv.imshow('{} Demo'.format(model.name), frame)
122
 
123
+ tm.reset()
124
+
models/text_recognition_crnn/crnn.py CHANGED
@@ -8,10 +8,17 @@ import numpy as np
8
  import cv2 as cv
9
 
10
  class CRNN:
11
- def __init__(self, modelPath, charsetPath):
12
  self._model_path = modelPath
 
 
 
 
13
  self._model = cv.dnn.readNet(self._model_path)
14
- self._charset = self._load_charset(charsetPath)
 
 
 
15
  self._inputSize = [100, 32] # Fixed
16
  self._targetVertices = np.array([
17
  [0, self._inputSize[1] - 1],
@@ -33,10 +40,12 @@ class CRNN:
33
  return charset
34
 
35
  def setBackend(self, backend_id):
36
- self._model.setPreferableBackend(backend_id)
 
37
 
38
  def setTarget(self, target_id):
39
- self._model.setPreferableTarget(target_id)
 
40
 
41
  def _preprocess(self, image, rbbox):
42
  # Remove conf, reshape and ensure all is np.float32
@@ -81,4 +90,5 @@ class CRNN:
81
  for i in range(len(text)):
82
  if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
83
  char_list.append(text[i])
84
- return ''.join(char_list)
 
 
8
  import cv2 as cv
9
 
10
  class CRNN:
11
+ def __init__(self, modelPath, charsetPath, backendId=0, targetId=0):
12
  self._model_path = modelPath
13
+ self._charsetPath = charsetPath
14
+ self._backendId = backendId
15
+ self._targetId = targetId
16
+
17
  self._model = cv.dnn.readNet(self._model_path)
18
+ self._model.setPreferableBackend(self._backendId)
19
+ self._model.setPreferableTarget(self._targetId)
20
+
21
+ self._charset = self._load_charset(self._charsetPath)
22
  self._inputSize = [100, 32] # Fixed
23
  self._targetVertices = np.array([
24
  [0, self._inputSize[1] - 1],
 
40
  return charset
41
 
42
  def setBackend(self, backend_id):
43
+ self._backendId = backend_id
44
+ self._model.setPreferableBackend(self._backendId)
45
 
46
  def setTarget(self, target_id):
47
+ self._targetId = target_id
48
+ self._model.setPreferableTarget(self._targetId)
49
 
50
  def _preprocess(self, image, rbbox):
51
  # Remove conf, reshape and ensure all is np.float32
 
90
  for i in range(len(text)):
91
  if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
92
  char_list.append(text[i])
93
+ return ''.join(char_list)
94
+
models/text_recognition_crnn/demo.py CHANGED
@@ -23,10 +23,24 @@ def str2bool(v):
23
  else:
24
  raise NotImplementedError
25
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  parser = argparse.ArgumentParser(
27
  description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
28
  parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
29
  parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.')
 
 
30
  parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.')
31
  parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
32
  parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
@@ -50,7 +64,9 @@ if __name__ == '__main__':
50
  binaryThreshold=0.3,
51
  polygonThreshold=0.5,
52
  maxCandidates=200,
53
- unclipRatio=2.0
 
 
54
  )
55
 
56
  # If input is an image
@@ -118,4 +134,5 @@ if __name__ == '__main__':
118
  print(texts)
119
 
120
  # Visualize results in a new Window
121
- cv.imshow('{} Demo'.format(recognizer.name), frame)
 
 
23
  else:
24
  raise NotImplementedError
25
 
26
+ backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
27
+ targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
28
+ help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
29
+ help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
30
+ try:
31
+ backends += [cv.dnn.DNN_BACKEND_TIMVX]
32
+ targets += [cv.dnn.DNN_TARGET_NPU]
33
+ help_msg_backends += "; {:d}: TIMVX"
34
+ help_msg_targets += "; {:d}: NPU"
35
+ except:
36
+ print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
37
+
38
  parser = argparse.ArgumentParser(
39
  description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
40
  parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
41
  parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.')
42
+ parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
43
+ parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
44
  parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.')
45
  parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
46
  parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 
64
  binaryThreshold=0.3,
65
  polygonThreshold=0.5,
66
  maxCandidates=200,
67
+ unclipRatio=2.0,
68
+ backendId=args.backend,
69
+ targetId=args.target
70
  )
71
 
72
  # If input is an image
 
134
  print(texts)
135
 
136
  # Visualize results in a new Window
137
+ cv.imshow('{} Demo'.format(recognizer.name), frame)
138
+