Add options for demo scripts to select backend & targets (#43)
Browse files* add options for selecting backend & targets
* add eol
- models/face_detection_yunet/demo.py +19 -2
- models/face_detection_yunet/yunet.py +2 -1
- models/face_recognition_sface/demo.py +20 -3
- models/human_segmentation_pphumanseg/demo.py +17 -2
- models/human_segmentation_pphumanseg/pphumanseg.py +12 -4
- models/image_classification_ppresnet/demo.py +16 -1
- models/image_classification_ppresnet/ppresnet.py +13 -5
- models/person_reid_youtureid/demo.py +17 -2
- models/person_reid_youtureid/youtureid.py +14 -4
- models/text_detection_db/db.py +12 -4
- models/text_detection_db/demo.py +18 -1
- models/text_recognition_crnn/crnn.py +15 -5
- models/text_recognition_crnn/demo.py +19 -2
models/face_detection_yunet/demo.py
CHANGED
@@ -19,9 +19,23 @@ def str2bool(v):
|
|
19 |
else:
|
20 |
raise NotImplementedError
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
|
23 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
24 |
parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the model.')
|
|
|
|
|
25 |
parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')
|
26 |
parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
|
27 |
parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
|
@@ -61,7 +75,9 @@ if __name__ == '__main__':
|
|
61 |
inputSize=[320, 320],
|
62 |
confThreshold=args.conf_threshold,
|
63 |
nmsThreshold=args.nms_threshold,
|
64 |
-
topK=args.top_k
|
|
|
|
|
65 |
|
66 |
# If input is an image
|
67 |
if args.input is not None:
|
@@ -117,4 +133,5 @@ if __name__ == '__main__':
|
|
117 |
# Visualize results in a new Window
|
118 |
cv.imshow('YuNet Demo', frame)
|
119 |
|
120 |
-
tm.reset()
|
|
|
|
19 |
else:
|
20 |
raise NotImplementedError
|
21 |
|
22 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
23 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
24 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
25 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
26 |
+
try:
|
27 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
28 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
29 |
+
help_msg_backends += "; {:d}: TIMVX"
|
30 |
+
help_msg_targets += "; {:d}: NPU"
|
31 |
+
except:
|
32 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
33 |
+
|
34 |
parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
|
35 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
36 |
parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the model.')
|
37 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
38 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
39 |
parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')
|
40 |
parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
|
41 |
parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
|
|
|
75 |
inputSize=[320, 320],
|
76 |
confThreshold=args.conf_threshold,
|
77 |
nmsThreshold=args.nms_threshold,
|
78 |
+
topK=args.top_k,
|
79 |
+
backendId=args.backend,
|
80 |
+
targetId=args.target)
|
81 |
|
82 |
# If input is an image
|
83 |
if args.input is not None:
|
|
|
133 |
# Visualize results in a new Window
|
134 |
cv.imshow('YuNet Demo', frame)
|
135 |
|
136 |
+
tm.reset()
|
137 |
+
|
models/face_detection_yunet/yunet.py
CHANGED
@@ -63,4 +63,5 @@ class YuNet:
|
|
63 |
def infer(self, image):
|
64 |
# Forward
|
65 |
faces = self._model.detect(image)
|
66 |
-
return faces[1]
|
|
|
|
63 |
def infer(self, image):
|
64 |
# Forward
|
65 |
faces = self._model.detect(image)
|
66 |
+
return faces[1]
|
67 |
+
|
models/face_recognition_sface/demo.py
CHANGED
@@ -23,11 +23,25 @@ def str2bool(v):
|
|
23 |
else:
|
24 |
raise NotImplementedError
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
parser = argparse.ArgumentParser(
|
27 |
description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
|
28 |
parser.add_argument('--input1', '-i1', type=str, help='Path to the input image 1.')
|
29 |
parser.add_argument('--input2', '-i2', type=str, help='Path to the input image 2.')
|
30 |
parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the model.')
|
|
|
|
|
31 |
parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Distance type. \'0\': cosine, \'1\': norm_l1.')
|
32 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
33 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
@@ -35,13 +49,15 @@ args = parser.parse_args()
|
|
35 |
|
36 |
if __name__ == '__main__':
|
37 |
# Instantiate SFace for face recognition
|
38 |
-
recognizer = SFace(modelPath=args.model, disType=args.dis_type)
|
39 |
# Instantiate YuNet for face detection
|
40 |
detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2021dec.onnx',
|
41 |
inputSize=[320, 320],
|
42 |
confThreshold=0.9,
|
43 |
nmsThreshold=0.3,
|
44 |
-
topK=5000
|
|
|
|
|
45 |
|
46 |
img1 = cv.imread(args.input1)
|
47 |
img2 = cv.imread(args.input2)
|
@@ -56,4 +72,5 @@ if __name__ == '__main__':
|
|
56 |
|
57 |
# Match
|
58 |
result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
|
59 |
-
print('Result: {}.'.format('same identity' if result else 'different identities'))
|
|
|
|
23 |
else:
|
24 |
raise NotImplementedError
|
25 |
|
26 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
27 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
28 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
29 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
30 |
+
try:
|
31 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
32 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
33 |
+
help_msg_backends += "; {:d}: TIMVX"
|
34 |
+
help_msg_targets += "; {:d}: NPU"
|
35 |
+
except:
|
36 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
37 |
+
|
38 |
parser = argparse.ArgumentParser(
|
39 |
description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
|
40 |
parser.add_argument('--input1', '-i1', type=str, help='Path to the input image 1.')
|
41 |
parser.add_argument('--input2', '-i2', type=str, help='Path to the input image 2.')
|
42 |
parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the model.')
|
43 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
44 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
45 |
parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Distance type. \'0\': cosine, \'1\': norm_l1.')
|
46 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
47 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
|
|
49 |
|
50 |
if __name__ == '__main__':
|
51 |
# Instantiate SFace for face recognition
|
52 |
+
recognizer = SFace(modelPath=args.model, disType=args.dis_type, backendId=args.backend, targetId=args.target)
|
53 |
# Instantiate YuNet for face detection
|
54 |
detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2021dec.onnx',
|
55 |
inputSize=[320, 320],
|
56 |
confThreshold=0.9,
|
57 |
nmsThreshold=0.3,
|
58 |
+
topK=5000,
|
59 |
+
backendId=args.backend,
|
60 |
+
targetId=args.target)
|
61 |
|
62 |
img1 = cv.imread(args.input1)
|
63 |
img2 = cv.imread(args.input2)
|
|
|
72 |
|
73 |
# Match
|
74 |
result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
|
75 |
+
print('Result: {}.'.format('same identity' if result else 'different identities'))
|
76 |
+
|
models/human_segmentation_pphumanseg/demo.py
CHANGED
@@ -19,9 +19,23 @@ def str2bool(v):
|
|
19 |
else:
|
20 |
raise NotImplementedError
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
|
23 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
24 |
parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2021oct.onnx', help='Path to the model.')
|
|
|
|
|
25 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
26 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
27 |
args = parser.parse_args()
|
@@ -84,7 +98,7 @@ def visualize(image, result, weight=0.6, fps=None):
|
|
84 |
|
85 |
if __name__ == '__main__':
|
86 |
# Instantiate PPHumanSeg
|
87 |
-
model = PPHumanSeg(modelPath=args.model)
|
88 |
|
89 |
if args.input is not None:
|
90 |
# Read image and resize to 192x192
|
@@ -138,4 +152,5 @@ if __name__ == '__main__':
|
|
138 |
# Visualize results in a new window
|
139 |
cv.imshow('PPHumanSeg Demo', frame)
|
140 |
|
141 |
-
tm.reset()
|
|
|
|
19 |
else:
|
20 |
raise NotImplementedError
|
21 |
|
22 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
23 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
24 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
25 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
26 |
+
try:
|
27 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
28 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
29 |
+
help_msg_backends += "; {:d}: TIMVX"
|
30 |
+
help_msg_targets += "; {:d}: NPU"
|
31 |
+
except:
|
32 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
33 |
+
|
34 |
parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
|
35 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
36 |
parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2021oct.onnx', help='Path to the model.')
|
37 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
38 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
39 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
40 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
41 |
args = parser.parse_args()
|
|
|
98 |
|
99 |
if __name__ == '__main__':
|
100 |
# Instantiate PPHumanSeg
|
101 |
+
model = PPHumanSeg(modelPath=args.model, backendId=args.backend, targetId=args.target)
|
102 |
|
103 |
if args.input is not None:
|
104 |
# Read image and resize to 192x192
|
|
|
152 |
# Visualize results in a new window
|
153 |
cv.imshow('PPHumanSeg Demo', frame)
|
154 |
|
155 |
+
tm.reset()
|
156 |
+
|
models/human_segmentation_pphumanseg/pphumanseg.py
CHANGED
@@ -8,9 +8,14 @@ import numpy as np
|
|
8 |
import cv2 as cv
|
9 |
|
10 |
class PPHumanSeg:
|
11 |
-
def __init__(self, modelPath):
|
12 |
self._modelPath = modelPath
|
|
|
|
|
|
|
13 |
self._model = cv.dnn.readNet(self._modelPath)
|
|
|
|
|
14 |
|
15 |
self._inputNames = ''
|
16 |
self._outputNames = ['save_infer_model/scale_0.tmp_1']
|
@@ -23,10 +28,12 @@ class PPHumanSeg:
|
|
23 |
return self.__class__.__name__
|
24 |
|
25 |
def setBackend(self, backend_id):
|
26 |
-
self.
|
|
|
27 |
|
28 |
def setTarget(self, target_id):
|
29 |
-
self.
|
|
|
30 |
|
31 |
def _preprocess(self, image):
|
32 |
image = image.astype(np.float32, copy=False) / 255.0
|
@@ -52,4 +59,5 @@ class PPHumanSeg:
|
|
52 |
|
53 |
def _postprocess(self, outputBlob):
|
54 |
result = np.argmax(outputBlob[0], axis=1).astype(np.uint8)
|
55 |
-
return result
|
|
|
|
8 |
import cv2 as cv
|
9 |
|
10 |
class PPHumanSeg:
|
11 |
+
def __init__(self, modelPath, backendId=0, targetId=0):
|
12 |
self._modelPath = modelPath
|
13 |
+
self._backendId = backendId
|
14 |
+
self._targetId = targetId
|
15 |
+
|
16 |
self._model = cv.dnn.readNet(self._modelPath)
|
17 |
+
self._model.setPreferableBackend(self._backendId)
|
18 |
+
self._model.setPreferableTarget(self._targetId)
|
19 |
|
20 |
self._inputNames = ''
|
21 |
self._outputNames = ['save_infer_model/scale_0.tmp_1']
|
|
|
28 |
return self.__class__.__name__
|
29 |
|
30 |
def setBackend(self, backend_id):
|
31 |
+
self._backendId = backend_id
|
32 |
+
self._model.setPreferableBackend(self._backendId)
|
33 |
|
34 |
def setTarget(self, target_id):
|
35 |
+
self._targetId = target_id
|
36 |
+
self._model.setPreferableTarget(self._targetId)
|
37 |
|
38 |
def _preprocess(self, image):
|
39 |
image = image.astype(np.float32, copy=False) / 255.0
|
|
|
59 |
|
60 |
def _postprocess(self, outputBlob):
|
61 |
result = np.argmax(outputBlob[0], axis=1).astype(np.uint8)
|
62 |
+
return result
|
63 |
+
|
models/image_classification_ppresnet/demo.py
CHANGED
@@ -19,15 +19,29 @@ def str2bool(v):
|
|
19 |
else:
|
20 |
raise NotImplementedError
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
|
23 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
|
24 |
parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Path to the model.')
|
|
|
|
|
25 |
parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Path to the dataset labels.')
|
26 |
args = parser.parse_args()
|
27 |
|
28 |
if __name__ == '__main__':
|
29 |
# Instantiate ResNet
|
30 |
-
model = PPResNet(modelPath=args.model, labelPath=args.label)
|
31 |
|
32 |
# Read image and get a 224x224 crop from a 256x256 resized
|
33 |
image = cv.imread(args.input)
|
@@ -40,3 +54,4 @@ if __name__ == '__main__':
|
|
40 |
|
41 |
# Print result
|
42 |
print('label: {}'.format(result))
|
|
|
|
19 |
else:
|
20 |
raise NotImplementedError
|
21 |
|
22 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
23 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
24 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
25 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
26 |
+
try:
|
27 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
28 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
29 |
+
help_msg_backends += "; {:d}: TIMVX"
|
30 |
+
help_msg_targets += "; {:d}: NPU"
|
31 |
+
except:
|
32 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
33 |
+
|
34 |
parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
|
35 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
|
36 |
parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Path to the model.')
|
37 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
38 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
39 |
parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Path to the dataset labels.')
|
40 |
args = parser.parse_args()
|
41 |
|
42 |
if __name__ == '__main__':
|
43 |
# Instantiate ResNet
|
44 |
+
model = PPResNet(modelPath=args.model, labelPath=args.label, backendId=args.backend, targetId=args.target)
|
45 |
|
46 |
# Read image and get a 224x224 crop from a 256x256 resized
|
47 |
image = cv.imread(args.input)
|
|
|
54 |
|
55 |
# Print result
|
56 |
print('label: {}'.format(result))
|
57 |
+
|
models/image_classification_ppresnet/ppresnet.py
CHANGED
@@ -9,10 +9,15 @@ import numpy as np
|
|
9 |
import cv2 as cv
|
10 |
|
11 |
class PPResNet:
|
12 |
-
def __init__(self, modelPath, labelPath):
|
13 |
self._modelPath = modelPath
|
14 |
-
self._model = cv.dnn.readNet(self._modelPath)
|
15 |
self._labelPath = labelPath
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
self._inputNames = ''
|
18 |
self._outputNames = ['save_infer_model/scale_0.tmp_0']
|
@@ -35,10 +40,12 @@ class PPResNet:
|
|
35 |
return self.__class__.__name__
|
36 |
|
37 |
def setBackend(self, backend_id):
|
38 |
-
self.
|
|
|
39 |
|
40 |
def setTarget(self, target_id):
|
41 |
-
self.
|
|
|
42 |
|
43 |
def _preprocess(self, image):
|
44 |
image = image.astype(np.float32, copy=False) / 255.0
|
@@ -64,4 +71,5 @@ class PPResNet:
|
|
64 |
|
65 |
def _postprocess(self, outputBlob):
|
66 |
class_id = np.argmax(outputBlob[0])
|
67 |
-
return self._labels[class_id]
|
|
|
|
9 |
import cv2 as cv
|
10 |
|
11 |
class PPResNet:
|
12 |
+
def __init__(self, modelPath, labelPath, backendId=0, targetId=0):
|
13 |
self._modelPath = modelPath
|
|
|
14 |
self._labelPath = labelPath
|
15 |
+
self._backendId = backendId
|
16 |
+
self._targetId = targetId
|
17 |
+
|
18 |
+
self._model = cv.dnn.readNet(self._modelPath)
|
19 |
+
self._model.setPreferableBackend(self._backendId)
|
20 |
+
self._model.setPreferableTarget(self._targetId)
|
21 |
|
22 |
self._inputNames = ''
|
23 |
self._outputNames = ['save_infer_model/scale_0.tmp_0']
|
|
|
40 |
return self.__class__.__name__
|
41 |
|
42 |
def setBackend(self, backend_id):
|
43 |
+
self._backendId = backend_id
|
44 |
+
self._model.setPreferableBackend(self._backendId)
|
45 |
|
46 |
def setTarget(self, target_id):
|
47 |
+
self._targetId = target_id
|
48 |
+
self._model.setPreferableTarget(self._targetId)
|
49 |
|
50 |
def _preprocess(self, image):
|
51 |
image = image.astype(np.float32, copy=False) / 255.0
|
|
|
71 |
|
72 |
def _postprocess(self, outputBlob):
|
73 |
class_id = np.argmax(outputBlob[0])
|
74 |
+
return self._labels[class_id]
|
75 |
+
|
models/person_reid_youtureid/demo.py
CHANGED
@@ -20,10 +20,24 @@ def str2bool(v):
|
|
20 |
else:
|
21 |
raise NotImplementedError
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
parser = argparse.ArgumentParser(
|
24 |
description="ReID baseline models from Tencent Youtu Lab")
|
25 |
parser.add_argument('--query_dir', '-q', type=str, help='Query directory.')
|
26 |
parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.')
|
|
|
|
|
27 |
parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.')
|
28 |
parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.')
|
29 |
parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
@@ -65,7 +79,7 @@ def visualize(results, query_dir, gallery_dir, output_size=(128, 384)):
|
|
65 |
|
66 |
if __name__ == '__main__':
|
67 |
# Instantiate YoutuReID for person ReID
|
68 |
-
net = YoutuReID(modelPath=args.model)
|
69 |
|
70 |
# Read images from dir
|
71 |
query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
|
@@ -97,4 +111,5 @@ if __name__ == '__main__':
|
|
97 |
cv.namedWindow('result-{}'.format(f), cv.WINDOW_AUTOSIZE)
|
98 |
cv.imshow('result-{}'.format(f), img)
|
99 |
cv.waitKey(0)
|
100 |
-
cv.destroyAllWindows()
|
|
|
|
20 |
else:
|
21 |
raise NotImplementedError
|
22 |
|
23 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
24 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
25 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
26 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
27 |
+
try:
|
28 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
29 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
30 |
+
help_msg_backends += "; {:d}: TIMVX"
|
31 |
+
help_msg_targets += "; {:d}: NPU"
|
32 |
+
except:
|
33 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
34 |
+
|
35 |
parser = argparse.ArgumentParser(
|
36 |
description="ReID baseline models from Tencent Youtu Lab")
|
37 |
parser.add_argument('--query_dir', '-q', type=str, help='Query directory.')
|
38 |
parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.')
|
39 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
40 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
41 |
parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.')
|
42 |
parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.')
|
43 |
parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
|
|
79 |
|
80 |
if __name__ == '__main__':
|
81 |
# Instantiate YoutuReID for person ReID
|
82 |
+
net = YoutuReID(modelPath=args.model, backendId=args.backend, targetId=args.target)
|
83 |
|
84 |
# Read images from dir
|
85 |
query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
|
|
|
111 |
cv.namedWindow('result-{}'.format(f), cv.WINDOW_AUTOSIZE)
|
112 |
cv.imshow('result-{}'.format(f), img)
|
113 |
cv.waitKey(0)
|
114 |
+
cv.destroyAllWindows()
|
115 |
+
|
models/person_reid_youtureid/youtureid.py
CHANGED
@@ -8,8 +8,15 @@ import numpy as np
|
|
8 |
import cv2 as cv
|
9 |
|
10 |
class YoutuReID:
|
11 |
-
def __init__(self, modelPath):
|
|
|
|
|
|
|
|
|
12 |
self._model = cv.dnn.readNet(modelPath)
|
|
|
|
|
|
|
13 |
self._input_size = (128, 256) # fixed
|
14 |
self._output_dim = 768
|
15 |
self._mean = (0.485, 0.456, 0.406)
|
@@ -20,10 +27,12 @@ class YoutuReID:
|
|
20 |
return self.__class__.__name__
|
21 |
|
22 |
def setBackend(self, backend_id):
|
23 |
-
self.
|
|
|
24 |
|
25 |
def setTarget(self, target_id):
|
26 |
-
self.
|
|
|
27 |
|
28 |
def _preprocess(self, image):
|
29 |
image = image[:, :, ::-1]
|
@@ -57,4 +66,5 @@ class YoutuReID:
|
|
57 |
|
58 |
dist = np.matmul(query_arr, gallery_arr.T)
|
59 |
idx = np.argsort(-dist, axis=1)
|
60 |
-
return [i[0:topK] for i in idx]
|
|
|
|
8 |
import cv2 as cv
|
9 |
|
10 |
class YoutuReID:
|
11 |
+
def __init__(self, modelPath, backendId=0, targetId=0):
|
12 |
+
self._modelPath = modelPath
|
13 |
+
self._backendId = backendId
|
14 |
+
self._targetId = targetId
|
15 |
+
|
16 |
self._model = cv.dnn.readNet(modelPath)
|
17 |
+
self._model.setPreferableBackend(self._backendId)
|
18 |
+
self._model.setPreferableTarget(self._targetId)
|
19 |
+
|
20 |
self._input_size = (128, 256) # fixed
|
21 |
self._output_dim = 768
|
22 |
self._mean = (0.485, 0.456, 0.406)
|
|
|
27 |
return self.__class__.__name__
|
28 |
|
29 |
def setBackend(self, backend_id):
|
30 |
+
self._backendId = backend_id
|
31 |
+
self._model.setPreferableBackend(self._backendId)
|
32 |
|
33 |
def setTarget(self, target_id):
|
34 |
+
self._targetId = target_id
|
35 |
+
self._model.setPreferableTarget(self._targetId)
|
36 |
|
37 |
def _preprocess(self, image):
|
38 |
image = image[:, :, ::-1]
|
|
|
66 |
|
67 |
dist = np.matmul(query_arr, gallery_arr.T)
|
68 |
idx = np.argsort(-dist, axis=1)
|
69 |
+
return [i[0:topK] for i in idx]
|
70 |
+
|
models/text_detection_db/db.py
CHANGED
@@ -8,7 +8,7 @@ import numpy as np
|
|
8 |
import cv2 as cv
|
9 |
|
10 |
class DB:
|
11 |
-
def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0):
|
12 |
self._modelPath = modelPath
|
13 |
self._model = cv.dnn_TextDetectionModel_DB(
|
14 |
cv.dnn.readNet(self._modelPath)
|
@@ -21,6 +21,11 @@ class DB:
|
|
21 |
self._polygonThreshold = polygonThreshold
|
22 |
self._maxCandidates = maxCandidates
|
23 |
self._unclipRatio = unclipRatio
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
self._model.setBinaryThreshold(self._binaryThreshold)
|
26 |
self._model.setPolygonThreshold(self._polygonThreshold)
|
@@ -34,10 +39,12 @@ class DB:
|
|
34 |
return self.__class__.__name__
|
35 |
|
36 |
def setBackend(self, backend):
|
37 |
-
self.
|
|
|
38 |
|
39 |
def setTarget(self, target):
|
40 |
-
self.
|
|
|
41 |
|
42 |
def setInputSize(self, input_size):
|
43 |
self._inputSize = tuple(input_size)
|
@@ -47,4 +54,5 @@ class DB:
|
|
47 |
assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
|
48 |
assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
|
49 |
|
50 |
-
return self._model.detect(image)
|
|
|
|
8 |
import cv2 as cv
|
9 |
|
10 |
class DB:
|
11 |
+
def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0, backendId=0, targetId=0):
|
12 |
self._modelPath = modelPath
|
13 |
self._model = cv.dnn_TextDetectionModel_DB(
|
14 |
cv.dnn.readNet(self._modelPath)
|
|
|
21 |
self._polygonThreshold = polygonThreshold
|
22 |
self._maxCandidates = maxCandidates
|
23 |
self._unclipRatio = unclipRatio
|
24 |
+
self._backendId = backendId
|
25 |
+
self._targetId = targetId
|
26 |
+
|
27 |
+
self._model.setPreferableBackend(self._backendId)
|
28 |
+
self._model.setPreferableTarget(self._targetId)
|
29 |
|
30 |
self._model.setBinaryThreshold(self._binaryThreshold)
|
31 |
self._model.setPolygonThreshold(self._polygonThreshold)
|
|
|
39 |
return self.__class__.__name__
|
40 |
|
41 |
def setBackend(self, backend):
|
42 |
+
self._backendId = backend
|
43 |
+
self._model.setPreferableBackend(self._backendId)
|
44 |
|
45 |
def setTarget(self, target):
|
46 |
+
self._targetId = target
|
47 |
+
self._model.setPreferableTarget(self._targetId)
|
48 |
|
49 |
def setInputSize(self, input_size):
|
50 |
self._inputSize = tuple(input_size)
|
|
|
54 |
assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
|
55 |
assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
|
56 |
|
57 |
+
return self._model.detect(image)
|
58 |
+
|
models/text_detection_db/demo.py
CHANGED
@@ -19,9 +19,23 @@ def str2bool(v):
|
|
19 |
else:
|
20 |
raise NotImplementedError
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
|
23 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
24 |
parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Path to the model.')
|
|
|
|
|
25 |
parser.add_argument('--width', type=int, default=736,
|
26 |
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
|
27 |
parser.add_argument('--height', type=int, default=736,
|
@@ -53,6 +67,8 @@ if __name__ == '__main__':
|
|
53 |
polygonThreshold=args.polygon_threshold,
|
54 |
maxCandidates=args.max_candidates,
|
55 |
unclipRatio=args.unclip_ratio
|
|
|
|
|
56 |
)
|
57 |
|
58 |
# If input is an image
|
@@ -104,4 +120,5 @@ if __name__ == '__main__':
|
|
104 |
# Visualize results in a new Window
|
105 |
cv.imshow('{} Demo'.format(model.name), frame)
|
106 |
|
107 |
-
tm.reset()
|
|
|
|
19 |
else:
|
20 |
raise NotImplementedError
|
21 |
|
22 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
23 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
24 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
25 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
26 |
+
try:
|
27 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
28 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
29 |
+
help_msg_backends += "; {:d}: TIMVX"
|
30 |
+
help_msg_targets += "; {:d}: NPU"
|
31 |
+
except:
|
32 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
33 |
+
|
34 |
parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
|
35 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
36 |
parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Path to the model.')
|
37 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
38 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
39 |
parser.add_argument('--width', type=int, default=736,
|
40 |
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
|
41 |
parser.add_argument('--height', type=int, default=736,
|
|
|
67 |
polygonThreshold=args.polygon_threshold,
|
68 |
maxCandidates=args.max_candidates,
|
69 |
unclipRatio=args.unclip_ratio
|
70 |
+
backendId=args.backend,
|
71 |
+
targetId=args.target
|
72 |
)
|
73 |
|
74 |
# If input is an image
|
|
|
120 |
# Visualize results in a new Window
|
121 |
cv.imshow('{} Demo'.format(model.name), frame)
|
122 |
|
123 |
+
tm.reset()
|
124 |
+
|
models/text_recognition_crnn/crnn.py
CHANGED
@@ -8,10 +8,17 @@ import numpy as np
|
|
8 |
import cv2 as cv
|
9 |
|
10 |
class CRNN:
|
11 |
-
def __init__(self, modelPath, charsetPath):
|
12 |
self._model_path = modelPath
|
|
|
|
|
|
|
|
|
13 |
self._model = cv.dnn.readNet(self._model_path)
|
14 |
-
self.
|
|
|
|
|
|
|
15 |
self._inputSize = [100, 32] # Fixed
|
16 |
self._targetVertices = np.array([
|
17 |
[0, self._inputSize[1] - 1],
|
@@ -33,10 +40,12 @@ class CRNN:
|
|
33 |
return charset
|
34 |
|
35 |
def setBackend(self, backend_id):
|
36 |
-
self.
|
|
|
37 |
|
38 |
def setTarget(self, target_id):
|
39 |
-
self.
|
|
|
40 |
|
41 |
def _preprocess(self, image, rbbox):
|
42 |
# Remove conf, reshape and ensure all is np.float32
|
@@ -81,4 +90,5 @@ class CRNN:
|
|
81 |
for i in range(len(text)):
|
82 |
if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
|
83 |
char_list.append(text[i])
|
84 |
-
return ''.join(char_list)
|
|
|
|
8 |
import cv2 as cv
|
9 |
|
10 |
class CRNN:
|
11 |
+
def __init__(self, modelPath, charsetPath, backendId=0, targetId=0):
|
12 |
self._model_path = modelPath
|
13 |
+
self._charsetPath = charsetPath
|
14 |
+
self._backendId = backendId
|
15 |
+
self._targetId = targetId
|
16 |
+
|
17 |
self._model = cv.dnn.readNet(self._model_path)
|
18 |
+
self._model.setPreferableBackend(self._backendId)
|
19 |
+
self._model.setPreferableTarget(self._targetId)
|
20 |
+
|
21 |
+
self._charset = self._load_charset(self._charsetPath)
|
22 |
self._inputSize = [100, 32] # Fixed
|
23 |
self._targetVertices = np.array([
|
24 |
[0, self._inputSize[1] - 1],
|
|
|
40 |
return charset
|
41 |
|
42 |
def setBackend(self, backend_id):
|
43 |
+
self._backendId = backend_id
|
44 |
+
self._model.setPreferableBackend(self._backendId)
|
45 |
|
46 |
def setTarget(self, target_id):
|
47 |
+
self._targetId = target_id
|
48 |
+
self._model.setPreferableTarget(self._targetId)
|
49 |
|
50 |
def _preprocess(self, image, rbbox):
|
51 |
# Remove conf, reshape and ensure all is np.float32
|
|
|
90 |
for i in range(len(text)):
|
91 |
if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
|
92 |
char_list.append(text[i])
|
93 |
+
return ''.join(char_list)
|
94 |
+
|
models/text_recognition_crnn/demo.py
CHANGED
@@ -23,10 +23,24 @@ def str2bool(v):
|
|
23 |
else:
|
24 |
raise NotImplementedError
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
parser = argparse.ArgumentParser(
|
27 |
description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
|
28 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
29 |
parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.')
|
|
|
|
|
30 |
parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.')
|
31 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
32 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
@@ -50,7 +64,9 @@ if __name__ == '__main__':
|
|
50 |
binaryThreshold=0.3,
|
51 |
polygonThreshold=0.5,
|
52 |
maxCandidates=200,
|
53 |
-
unclipRatio=2.0
|
|
|
|
|
54 |
)
|
55 |
|
56 |
# If input is an image
|
@@ -118,4 +134,5 @@ if __name__ == '__main__':
|
|
118 |
print(texts)
|
119 |
|
120 |
# Visualize results in a new Window
|
121 |
-
cv.imshow('{} Demo'.format(recognizer.name), frame)
|
|
|
|
23 |
else:
|
24 |
raise NotImplementedError
|
25 |
|
26 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
27 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
28 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
29 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
30 |
+
try:
|
31 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
32 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
33 |
+
help_msg_backends += "; {:d}: TIMVX"
|
34 |
+
help_msg_targets += "; {:d}: NPU"
|
35 |
+
except:
|
36 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
37 |
+
|
38 |
parser = argparse.ArgumentParser(
|
39 |
description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
|
40 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
41 |
parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.')
|
42 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
43 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
44 |
parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.')
|
45 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
46 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
|
|
64 |
binaryThreshold=0.3,
|
65 |
polygonThreshold=0.5,
|
66 |
maxCandidates=200,
|
67 |
+
unclipRatio=2.0,
|
68 |
+
backendId=args.backend,
|
69 |
+
targetId=args.target
|
70 |
)
|
71 |
|
72 |
# If input is an image
|
|
|
134 |
print(texts)
|
135 |
|
136 |
# Visualize results in a new Window
|
137 |
+
cv.imshow('{} Demo'.format(recognizer.name), frame)
|
138 |
+
|