Limit combinations of backends and targets in demos and benchmark (#145)
Browse files* limit backend and target combination in demos and benchmark
* simpler version checking
- benchmark/benchmark.py +35 -14
- models/face_detection_yunet/demo.py +39 -31
- models/face_detection_yunet/yunet.py +1 -13
- models/face_recognition_sface/demo.py +35 -30
- models/face_recognition_sface/sface.py +1 -11
- models/facial_expression_recognition/demo.py +33 -30
- models/facial_expression_recognition/facial_fer_model.py +3 -5
- models/handpose_estimation_mediapipe/demo.py +36 -30
- models/handpose_estimation_mediapipe/mp_handpose.py +3 -6
- models/human_segmentation_pphumanseg/demo.py +31 -26
- models/human_segmentation_pphumanseg/pphumanseg.py +3 -6
- models/image_classification_mobilenet/demo.py +26 -30
- models/image_classification_mobilenet/mobilenet.py +3 -5
- models/image_classification_ppresnet/demo.py +27 -24
- models/image_classification_ppresnet/ppresnet.py +3 -5
- models/license_plate_detection_yunet/demo.py +41 -32
- models/license_plate_detection_yunet/lpd_yunet.py +2 -4
- models/object_detection_nanodet/demo.py +56 -49
- models/object_detection_nanodet/nanodet.py +3 -5
- models/object_detection_yolox/demo.py +58 -50
- models/object_detection_yolox/yolox.py +3 -5
- models/object_tracking_dasiamrpn/dasiamrpn.py +4 -12
- models/object_tracking_dasiamrpn/demo.py +37 -14
- models/palm_detection_mediapipe/demo.py +37 -30
- models/palm_detection_mediapipe/mp_palmdet.py +4 -6
- models/person_reid_youtureid/demo.py +36 -28
- models/person_reid_youtureid/youtureid.py +3 -6
- models/qrcode_wechatqrcode/demo.py +41 -16
- models/qrcode_wechatqrcode/wechatqrcode.py +10 -10
- models/text_detection_db/db.py +3 -6
- models/text_detection_db/demo.py +41 -33
- models/text_recognition_crnn/crnn.py +3 -5
- models/text_recognition_crnn/demo.py +35 -31
benchmark/benchmark.py
CHANGED
@@ -8,9 +8,31 @@ import cv2 as cv
|
|
8 |
from models import MODELS
|
9 |
from utils import METRICS, DATALOADERS
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.")
|
12 |
parser.add_argument('--cfg', '-c', type=str,
|
13 |
help='Benchmarking on the given config.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
parser.add_argument("--fp32", action="store_true", help="Runs models of float32 precision only.")
|
15 |
parser.add_argument("--fp16", action="store_true", help="Runs models of float16 precision only.")
|
16 |
parser.add_argument("--int8", action="store_true", help="Runs models of int8 precision only.")
|
@@ -56,6 +78,8 @@ class Benchmark:
|
|
56 |
opencv=cv.dnn.DNN_BACKEND_OPENCV,
|
57 |
# vkcom=cv.dnn.DNN_BACKEND_VKCOM,
|
58 |
cuda=cv.dnn.DNN_BACKEND_CUDA,
|
|
|
|
|
59 |
)
|
60 |
|
61 |
target_id = kwargs.pop('target', 'cpu')
|
@@ -69,28 +93,20 @@ class Benchmark:
|
|
69 |
cuda=cv.dnn.DNN_TARGET_CUDA,
|
70 |
cuda_fp16=cv.dnn.DNN_TARGET_CUDA_FP16,
|
71 |
# hddl=cv.dnn.DNN_TARGET_HDDL,
|
|
|
72 |
)
|
73 |
|
74 |
-
# add extra backends & targets
|
75 |
-
try:
|
76 |
-
available_backends['timvx'] = cv.dnn.DNN_BACKEND_TIMVX
|
77 |
-
available_targets['npu'] = cv.dnn.DNN_TARGET_NPU
|
78 |
-
except:
|
79 |
-
print('OpenCV is not compiled with TIM-VX backend enbaled. See https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more details on how to enable TIM-VX backend.')
|
80 |
-
try:
|
81 |
-
available_backends['cann'] = cv.dnn.DNN_BACKEND_CANN
|
82 |
-
available_targets['npu'] = cv.dnn.DNN_TARGET_NPU
|
83 |
-
except:
|
84 |
-
print('OpenCV is not compiled with CANN backend enabled. See https://github.com/opencv/opencv/wiki/Huawei-CANN-Backend for more details on how to enable CANN backend.')
|
85 |
-
|
86 |
self._backend = available_backends[backend_id]
|
87 |
self._target = available_targets[target_id]
|
88 |
|
89 |
self._benchmark_results = dict()
|
90 |
|
|
|
|
|
|
|
|
|
91 |
def run(self, model):
|
92 |
-
model.
|
93 |
-
model.setTarget(self._target)
|
94 |
|
95 |
for idx, data in enumerate(self._dataloader):
|
96 |
filename, input_data = data[:2]
|
@@ -118,6 +134,11 @@ if __name__ == '__main__':
|
|
118 |
# Instantiate benchmark
|
119 |
benchmark = Benchmark(**cfg['Benchmark'])
|
120 |
|
|
|
|
|
|
|
|
|
|
|
121 |
# Instantiate model
|
122 |
model_config = cfg['Model']
|
123 |
model_handler, model_paths = MODELS.get(model_config.pop('name'))
|
|
|
8 |
from models import MODELS
|
9 |
from utils import METRICS, DATALOADERS
|
10 |
|
11 |
+
# Check OpenCV version
|
12 |
+
assert cv.__version__ >= "4.7.0", \
|
13 |
+
"Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python"
|
14 |
+
|
15 |
+
# Valid combinations of backends and targets
|
16 |
+
backend_target_pairs = [
|
17 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
18 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
19 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
20 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
21 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
22 |
+
]
|
23 |
+
|
24 |
parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.")
|
25 |
parser.add_argument('--cfg', '-c', type=str,
|
26 |
help='Benchmarking on the given config.')
|
27 |
+
parser.add_argument('--cfg_overwrite_backend_target', type=int, default=-1,
|
28 |
+
help='''Choose one of the backend-target pair to run this demo:
|
29 |
+
others: (default) use the one from config,
|
30 |
+
{:d}: OpenCV implementation + CPU,
|
31 |
+
{:d}: CUDA + GPU (CUDA),
|
32 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
33 |
+
{:d}: TIM-VX + NPU,
|
34 |
+
{:d}: CANN + NPU
|
35 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
36 |
parser.add_argument("--fp32", action="store_true", help="Runs models of float32 precision only.")
|
37 |
parser.add_argument("--fp16", action="store_true", help="Runs models of float16 precision only.")
|
38 |
parser.add_argument("--int8", action="store_true", help="Runs models of int8 precision only.")
|
|
|
78 |
opencv=cv.dnn.DNN_BACKEND_OPENCV,
|
79 |
# vkcom=cv.dnn.DNN_BACKEND_VKCOM,
|
80 |
cuda=cv.dnn.DNN_BACKEND_CUDA,
|
81 |
+
timvx=cv.dnn.DNN_BACKEND_TIMVX,
|
82 |
+
cann=cv.dnn.DNN_BACKEND_CANN,
|
83 |
)
|
84 |
|
85 |
target_id = kwargs.pop('target', 'cpu')
|
|
|
93 |
cuda=cv.dnn.DNN_TARGET_CUDA,
|
94 |
cuda_fp16=cv.dnn.DNN_TARGET_CUDA_FP16,
|
95 |
# hddl=cv.dnn.DNN_TARGET_HDDL,
|
96 |
+
npu=cv.dnn.DNN_TARGET_NPU,
|
97 |
)
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
self._backend = available_backends[backend_id]
|
100 |
self._target = available_targets[target_id]
|
101 |
|
102 |
self._benchmark_results = dict()
|
103 |
|
104 |
+
def setBackendAndTarget(self, backend_id, target_id):
|
105 |
+
self._backend = backend_id
|
106 |
+
self._target = target_id
|
107 |
+
|
108 |
def run(self, model):
|
109 |
+
model.setBackendAndTarget(self._backend, self._target)
|
|
|
110 |
|
111 |
for idx, data in enumerate(self._dataloader):
|
112 |
filename, input_data = data[:2]
|
|
|
134 |
# Instantiate benchmark
|
135 |
benchmark = Benchmark(**cfg['Benchmark'])
|
136 |
|
137 |
+
if args.cfg_overwrite_backend_target >= 0:
|
138 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
139 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
140 |
+
benchmark.setBackendAndTarget(backend_id, target_id)
|
141 |
+
|
142 |
# Instantiate model
|
143 |
model_config = cfg['Model']
|
144 |
model_handler, model_paths = MODELS.get(model_config.pop('name'))
|
models/face_detection_yunet/demo.py
CHANGED
@@ -11,36 +11,42 @@ import cv2 as cv
|
|
11 |
|
12 |
from yunet import YuNet
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
try:
|
27 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
28 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
29 |
-
help_msg_backends += "; {:d}: TIMVX"
|
30 |
-
help_msg_targets += "; {:d}: NPU"
|
31 |
-
except:
|
32 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
33 |
|
34 |
parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
|
35 |
-
parser.add_argument('--input', '-i', type=str,
|
36 |
-
|
37 |
-
parser.add_argument('--
|
38 |
-
|
39 |
-
parser.add_argument('--
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
args = parser.parse_args()
|
45 |
|
46 |
def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
|
@@ -70,14 +76,17 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps
|
|
70 |
return output
|
71 |
|
72 |
if __name__ == '__main__':
|
|
|
|
|
|
|
73 |
# Instantiate YuNet
|
74 |
model = YuNet(modelPath=args.model,
|
75 |
inputSize=[320, 320],
|
76 |
confThreshold=args.conf_threshold,
|
77 |
nmsThreshold=args.nms_threshold,
|
78 |
topK=args.top_k,
|
79 |
-
backendId=
|
80 |
-
targetId=
|
81 |
|
82 |
# If input is an image
|
83 |
if args.input is not None:
|
@@ -134,4 +143,3 @@ if __name__ == '__main__':
|
|
134 |
cv.imshow('YuNet Demo', frame)
|
135 |
|
136 |
tm.reset()
|
137 |
-
|
|
|
11 |
|
12 |
from yunet import YuNet
|
13 |
|
14 |
+
# Check OpenCV version
|
15 |
+
assert cv.__version__ >= "4.7.0", \
|
16 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
17 |
+
|
18 |
+
# Valid combinations of backends and targets
|
19 |
+
backend_target_pairs = [
|
20 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
21 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
22 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
23 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
24 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
25 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
|
28 |
+
parser.add_argument('--input', '-i', type=str,
|
29 |
+
help='Usage: Set input to a certain image, omit if using camera.')
|
30 |
+
parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2022mar.onnx',
|
31 |
+
help="Usage: Set model type, defaults to 'face_detection_yunet_2022mar.onnx'.")
|
32 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
33 |
+
help='''Choose one of the backend-target pair to run this demo:
|
34 |
+
{:d}: (default) OpenCV implementation + CPU,
|
35 |
+
{:d}: CUDA + GPU (CUDA),
|
36 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
37 |
+
{:d}: TIM-VX + NPU,
|
38 |
+
{:d}: CANN + NPU
|
39 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
40 |
+
parser.add_argument('--conf_threshold', type=float, default=0.9,
|
41 |
+
help='Usage: Set the minimum needed confidence for the model to identify a face, defauts to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.')
|
42 |
+
parser.add_argument('--nms_threshold', type=float, default=0.3,
|
43 |
+
help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
|
44 |
+
parser.add_argument('--top_k', type=int, default=5000,
|
45 |
+
help='Usage: Keep top_k bounding boxes before NMS.')
|
46 |
+
parser.add_argument('--save', '-s', action='store_true',
|
47 |
+
help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
|
48 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
49 |
+
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
|
50 |
args = parser.parse_args()
|
51 |
|
52 |
def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
|
|
|
76 |
return output
|
77 |
|
78 |
if __name__ == '__main__':
|
79 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
80 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
81 |
+
|
82 |
# Instantiate YuNet
|
83 |
model = YuNet(modelPath=args.model,
|
84 |
inputSize=[320, 320],
|
85 |
confThreshold=args.conf_threshold,
|
86 |
nmsThreshold=args.nms_threshold,
|
87 |
topK=args.top_k,
|
88 |
+
backendId=backend_id,
|
89 |
+
targetId=target_id)
|
90 |
|
91 |
# If input is an image
|
92 |
if args.input is not None:
|
|
|
143 |
cv.imshow('YuNet Demo', frame)
|
144 |
|
145 |
tm.reset()
|
|
models/face_detection_yunet/yunet.py
CHANGED
@@ -33,19 +33,8 @@ class YuNet:
|
|
33 |
def name(self):
|
34 |
return self.__class__.__name__
|
35 |
|
36 |
-
def
|
37 |
self._backendId = backendId
|
38 |
-
self._model = cv.FaceDetectorYN.create(
|
39 |
-
model=self._modelPath,
|
40 |
-
config="",
|
41 |
-
input_size=self._inputSize,
|
42 |
-
score_threshold=self._confThreshold,
|
43 |
-
nms_threshold=self._nmsThreshold,
|
44 |
-
top_k=self._topK,
|
45 |
-
backend_id=self._backendId,
|
46 |
-
target_id=self._targetId)
|
47 |
-
|
48 |
-
def setTarget(self, targetId):
|
49 |
self._targetId = targetId
|
50 |
self._model = cv.FaceDetectorYN.create(
|
51 |
model=self._modelPath,
|
@@ -64,4 +53,3 @@ class YuNet:
|
|
64 |
# Forward
|
65 |
faces = self._model.detect(image)
|
66 |
return faces[1]
|
67 |
-
|
|
|
33 |
def name(self):
|
34 |
return self.__class__.__name__
|
35 |
|
36 |
+
def setBackendAndTarget(self, backendId, targetId):
|
37 |
self._backendId = backendId
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
self._targetId = targetId
|
39 |
self._model = cv.FaceDetectorYN.create(
|
40 |
model=self._modelPath,
|
|
|
53 |
# Forward
|
54 |
faces = self._model.detect(image)
|
55 |
return faces[1]
|
|
models/face_recognition_sface/demo.py
CHANGED
@@ -15,49 +15,55 @@ from sface import SFace
|
|
15 |
sys.path.append('../face_detection_yunet')
|
16 |
from yunet import YuNet
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
|
22 |
-
return False
|
23 |
-
else:
|
24 |
-
raise NotImplementedError
|
25 |
|
26 |
-
backends
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
help_msg_targets += "; {:d}: NPU"
|
35 |
-
except:
|
36 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
37 |
|
38 |
parser = argparse.ArgumentParser(
|
39 |
description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
|
40 |
-
parser.add_argument('--input1', '-i1', type=str,
|
41 |
-
|
42 |
-
parser.add_argument('--
|
43 |
-
|
44 |
-
parser.add_argument('--
|
45 |
-
|
46 |
-
parser.add_argument('--
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
args = parser.parse_args()
|
49 |
|
50 |
if __name__ == '__main__':
|
|
|
|
|
51 |
# Instantiate SFace for face recognition
|
52 |
-
recognizer = SFace(modelPath=args.model,
|
|
|
|
|
|
|
53 |
# Instantiate YuNet for face detection
|
54 |
detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx',
|
55 |
inputSize=[320, 320],
|
56 |
confThreshold=0.9,
|
57 |
nmsThreshold=0.3,
|
58 |
topK=5000,
|
59 |
-
backendId=
|
60 |
-
targetId=
|
61 |
|
62 |
img1 = cv.imread(args.input1)
|
63 |
img2 = cv.imread(args.input2)
|
@@ -73,4 +79,3 @@ if __name__ == '__main__':
|
|
73 |
# Match
|
74 |
result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
|
75 |
print('Result: {}.'.format('same identity' if result else 'different identities'))
|
76 |
-
|
|
|
15 |
sys.path.append('../face_detection_yunet')
|
16 |
from yunet import YuNet
|
17 |
|
18 |
+
# Check OpenCV version
|
19 |
+
assert cv.__version__ >= "4.7.0", \
|
20 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
# Valid combinations of backends and targets
|
23 |
+
backend_target_pairs = [
|
24 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
25 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
26 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
27 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
28 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
29 |
+
]
|
|
|
|
|
|
|
30 |
|
31 |
parser = argparse.ArgumentParser(
|
32 |
description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
|
33 |
+
parser.add_argument('--input1', '-i1', type=str,
|
34 |
+
help='Usage: Set path to the input image 1 (original face).')
|
35 |
+
parser.add_argument('--input2', '-i2', type=str,
|
36 |
+
help='Usage: Set path to the input image 2 (comparison face).')
|
37 |
+
parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx',
|
38 |
+
help='Usage: Set model path, defaults to face_recognition_sface_2021dec.onnx.')
|
39 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
40 |
+
help='''Choose one of the backend-target pair to run this demo:
|
41 |
+
{:d}: (default) OpenCV implementation + CPU,
|
42 |
+
{:d}: CUDA + GPU (CUDA),
|
43 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
44 |
+
{:d}: TIM-VX + NPU,
|
45 |
+
{:d}: CANN + NPU
|
46 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
47 |
+
parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0,
|
48 |
+
help='Usage: Distance type. \'0\': cosine, \'1\': norm_l1. Defaults to \'0\'')
|
49 |
args = parser.parse_args()
|
50 |
|
51 |
if __name__ == '__main__':
|
52 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
53 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
54 |
# Instantiate SFace for face recognition
|
55 |
+
recognizer = SFace(modelPath=args.model,
|
56 |
+
disType=args.dis_type,
|
57 |
+
backendId=backend_id,
|
58 |
+
targetId=target_id)
|
59 |
# Instantiate YuNet for face detection
|
60 |
detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx',
|
61 |
inputSize=[320, 320],
|
62 |
confThreshold=0.9,
|
63 |
nmsThreshold=0.3,
|
64 |
topK=5000,
|
65 |
+
backendId=backend_id,
|
66 |
+
targetId=target_id)
|
67 |
|
68 |
img1 = cv.imread(args.input1)
|
69 |
img2 = cv.imread(args.input2)
|
|
|
79 |
# Match
|
80 |
result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
|
81 |
print('Result: {}.'.format('same identity' if result else 'different identities'))
|
|
models/face_recognition_sface/sface.py
CHANGED
@@ -7,8 +7,6 @@
|
|
7 |
import numpy as np
|
8 |
import cv2 as cv
|
9 |
|
10 |
-
from _testcapi import FLT_MIN
|
11 |
-
|
12 |
class SFace:
|
13 |
def __init__(self, modelPath, disType=0, backendId=0, targetId=0):
|
14 |
self._modelPath = modelPath
|
@@ -30,15 +28,8 @@ class SFace:
|
|
30 |
def name(self):
|
31 |
return self.__class__.__name__
|
32 |
|
33 |
-
def
|
34 |
self._backendId = backendId
|
35 |
-
self._model = cv.FaceRecognizerSF.create(
|
36 |
-
model=self._modelPath,
|
37 |
-
config="",
|
38 |
-
backend_id=self._backendId,
|
39 |
-
target_id=self._targetId)
|
40 |
-
|
41 |
-
def setTarget(self, targetId):
|
42 |
self._targetId = targetId
|
43 |
self._model = cv.FaceRecognizerSF.create(
|
44 |
model=self._modelPath,
|
@@ -70,4 +61,3 @@ class SFace:
|
|
70 |
else: # NORM_L2
|
71 |
norml2_distance = self._model.match(feature1, feature2, self._disType)
|
72 |
return 1 if norml2_distance <= self._threshold_norml2 else 0
|
73 |
-
|
|
|
7 |
import numpy as np
|
8 |
import cv2 as cv
|
9 |
|
|
|
|
|
10 |
class SFace:
|
11 |
def __init__(self, modelPath, disType=0, backendId=0, targetId=0):
|
12 |
self._modelPath = modelPath
|
|
|
28 |
def name(self):
|
29 |
return self.__class__.__name__
|
30 |
|
31 |
+
def setBackendAndTarget(self, backendId, targetId):
|
32 |
self._backendId = backendId
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
self._targetId = targetId
|
34 |
self._model = cv.FaceRecognizerSF.create(
|
35 |
model=self._modelPath,
|
|
|
61 |
else: # NORM_L2
|
62 |
norml2_distance = self._model.match(feature1, feature2, self._disType)
|
63 |
return 1 if norml2_distance <= self._threshold_norml2 else 0
|
|
models/facial_expression_recognition/demo.py
CHANGED
@@ -11,38 +11,38 @@ from facial_fer_model import FacialExpressionRecog
|
|
11 |
sys.path.append('../face_detection_yunet')
|
12 |
from yunet import YuNet
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
27 |
-
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
28 |
-
try:
|
29 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
30 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
31 |
-
help_msg_backends += "; {:d}: TIMVX"
|
32 |
-
help_msg_targets += "; {:d}: NPU"
|
33 |
-
except:
|
34 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
35 |
|
36 |
parser = argparse.ArgumentParser(description='Facial Expression Recognition')
|
37 |
-
parser.add_argument('--input', '-i', type=str,
|
38 |
-
|
39 |
-
parser.add_argument('--
|
40 |
-
|
41 |
-
parser.add_argument('--
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
args = parser.parse_args()
|
44 |
|
45 |
-
|
46 |
def visualize(image, det_res, fer_res, box_color=(0, 255, 0), text_color=(0, 0, 255)):
|
47 |
|
48 |
print('%s %3d faces detected.' % (datetime.datetime.now(), len(det_res)))
|
@@ -83,11 +83,14 @@ def process(detect_model, fer_model, frame):
|
|
83 |
|
84 |
|
85 |
if __name__ == '__main__':
|
|
|
|
|
|
|
86 |
detect_model = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx')
|
87 |
|
88 |
fer_model = FacialExpressionRecog(modelPath=args.model,
|
89 |
-
backendId=
|
90 |
-
targetId=
|
91 |
|
92 |
# If input is an image
|
93 |
if args.input is not None:
|
|
|
11 |
sys.path.append('../face_detection_yunet')
|
12 |
from yunet import YuNet
|
13 |
|
14 |
+
# Check OpenCV version
|
15 |
+
assert cv.__version__ >= "4.7.0", \
|
16 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
17 |
+
|
18 |
+
# Valid combinations of backends and targets
|
19 |
+
backend_target_pairs = [
|
20 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
21 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
22 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
23 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
24 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
25 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
parser = argparse.ArgumentParser(description='Facial Expression Recognition')
|
28 |
+
parser.add_argument('--input', '-i', type=str,
|
29 |
+
help='Path to the input image. Omit for using default camera.')
|
30 |
+
parser.add_argument('--model', '-m', type=str, default='./facial_expression_recognition_mobilefacenet_2022july.onnx',
|
31 |
+
help='Path to the facial expression recognition model.')
|
32 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
33 |
+
help='''Choose one of the backend-target pair to run this demo:
|
34 |
+
{:d}: (default) OpenCV implementation + CPU,
|
35 |
+
{:d}: CUDA + GPU (CUDA),
|
36 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
37 |
+
{:d}: TIM-VX + NPU,
|
38 |
+
{:d}: CANN + NPU
|
39 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
40 |
+
parser.add_argument('--save', '-s', action='store_true',
|
41 |
+
help='Specify to save results. This flag is invalid when using camera.')
|
42 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
43 |
+
help='Specify to open a window for result visualization. This flag is invalid when using camera.')
|
44 |
args = parser.parse_args()
|
45 |
|
|
|
46 |
def visualize(image, det_res, fer_res, box_color=(0, 255, 0), text_color=(0, 0, 255)):
|
47 |
|
48 |
print('%s %3d faces detected.' % (datetime.datetime.now(), len(det_res)))
|
|
|
83 |
|
84 |
|
85 |
if __name__ == '__main__':
|
86 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
87 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
88 |
+
|
89 |
detect_model = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx')
|
90 |
|
91 |
fer_model = FacialExpressionRecog(modelPath=args.model,
|
92 |
+
backendId=backend_id,
|
93 |
+
targetId=target_id)
|
94 |
|
95 |
# If input is an image
|
96 |
if args.input is not None:
|
models/facial_expression_recognition/facial_fer_model.py
CHANGED
@@ -29,12 +29,10 @@ class FacialExpressionRecog:
|
|
29 |
def name(self):
|
30 |
return self.__class__.__name__
|
31 |
|
32 |
-
def
|
33 |
-
self._backendId =
|
|
|
34 |
self._model.setPreferableBackend(self._backendId)
|
35 |
-
|
36 |
-
def setTarget(self, target_id):
|
37 |
-
self._targetId = target_id
|
38 |
self._model.setPreferableTarget(self._targetId)
|
39 |
|
40 |
def _preprocess(self, image, bbox):
|
|
|
29 |
def name(self):
|
30 |
return self.__class__.__name__
|
31 |
|
32 |
+
def setBackendAndTarget(self, backendId, targetId):
|
33 |
+
self._backendId = backendId
|
34 |
+
self._targetId = targetId
|
35 |
self._model.setPreferableBackend(self._backendId)
|
|
|
|
|
|
|
36 |
self._model.setPreferableTarget(self._targetId)
|
37 |
|
38 |
def _preprocess(self, image, bbox):
|
models/handpose_estimation_mediapipe/demo.py
CHANGED
@@ -9,34 +9,38 @@ from mp_handpose import MPHandPose
|
|
9 |
sys.path.append('../palm_detection_mediapipe')
|
10 |
from mp_palmdet import MPPalmDet
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
try:
|
25 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
26 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
27 |
-
help_msg_backends += "; {:d}: TIMVX"
|
28 |
-
help_msg_targets += "; {:d}: NPU"
|
29 |
-
except:
|
30 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
31 |
|
32 |
parser = argparse.ArgumentParser(description='Hand Pose Estimation from MediaPipe')
|
33 |
-
parser.add_argument('--input', '-i', type=str,
|
34 |
-
|
35 |
-
parser.add_argument('--
|
36 |
-
|
37 |
-
parser.add_argument('--
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
args = parser.parse_args()
|
41 |
|
42 |
|
@@ -147,17 +151,19 @@ def visualize(image, hands, print_result=False):
|
|
147 |
|
148 |
|
149 |
if __name__ == '__main__':
|
|
|
|
|
150 |
# palm detector
|
151 |
palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx',
|
152 |
nmsThreshold=0.3,
|
153 |
scoreThreshold=0.6,
|
154 |
-
backendId=
|
155 |
-
targetId=
|
156 |
# handpose detector
|
157 |
handpose_detector = MPHandPose(modelPath=args.model,
|
158 |
confThreshold=args.conf_threshold,
|
159 |
-
backendId=
|
160 |
-
targetId=
|
161 |
|
162 |
# If input is an image
|
163 |
if args.input is not None:
|
|
|
9 |
sys.path.append('../palm_detection_mediapipe')
|
10 |
from mp_palmdet import MPPalmDet
|
11 |
|
12 |
+
# Check OpenCV version
|
13 |
+
assert cv.__version__ >= "4.7.0", \
|
14 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
15 |
+
|
16 |
+
# Valid combinations of backends and targets
|
17 |
+
backend_target_pairs = [
|
18 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
19 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
20 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
21 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
22 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
23 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
parser = argparse.ArgumentParser(description='Hand Pose Estimation from MediaPipe')
|
26 |
+
parser.add_argument('--input', '-i', type=str,
|
27 |
+
help='Path to the input image. Omit for using default camera.')
|
28 |
+
parser.add_argument('--model', '-m', type=str, default='./handpose_estimation_mediapipe_2023feb.onnx',
|
29 |
+
help='Path to the model.')
|
30 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
31 |
+
help='''Choose one of the backend-target pair to run this demo:
|
32 |
+
{:d}: (default) OpenCV implementation + CPU,
|
33 |
+
{:d}: CUDA + GPU (CUDA),
|
34 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
35 |
+
{:d}: TIM-VX + NPU,
|
36 |
+
{:d}: CANN + NPU
|
37 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
38 |
+
parser.add_argument('--conf_threshold', type=float, default=0.9,
|
39 |
+
help='Filter out hands of confidence < conf_threshold.')
|
40 |
+
parser.add_argument('--save', '-s', action='store_true',
|
41 |
+
help='Specify to save results. This flag is invalid when using camera.')
|
42 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
43 |
+
help='Specify to open a window for result visualization. This flag is invalid when using camera.')
|
44 |
args = parser.parse_args()
|
45 |
|
46 |
|
|
|
151 |
|
152 |
|
153 |
if __name__ == '__main__':
|
154 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
155 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
156 |
# palm detector
|
157 |
palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx',
|
158 |
nmsThreshold=0.3,
|
159 |
scoreThreshold=0.6,
|
160 |
+
backendId=backend_id,
|
161 |
+
targetId=target_id)
|
162 |
# handpose detector
|
163 |
handpose_detector = MPHandPose(modelPath=args.model,
|
164 |
confThreshold=args.conf_threshold,
|
165 |
+
backendId=backend_id,
|
166 |
+
targetId=target_id)
|
167 |
|
168 |
# If input is an image
|
169 |
if args.input is not None:
|
models/handpose_estimation_mediapipe/mp_handpose.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import numpy as np
|
2 |
import cv2 as cv
|
3 |
|
4 |
-
|
5 |
class MPHandPose:
|
6 |
def __init__(self, modelPath, confThreshold=0.8, backendId=0, targetId=0):
|
7 |
self.model_path = modelPath
|
@@ -28,12 +27,10 @@ class MPHandPose:
|
|
28 |
def name(self):
|
29 |
return self.__class__.__name__
|
30 |
|
31 |
-
def
|
32 |
-
self.
|
|
|
33 |
self.model.setPreferableBackend(self.backend_id)
|
34 |
-
|
35 |
-
def setTarget(self, targetId):
|
36 |
-
self.target_id = targetId
|
37 |
self.model.setPreferableTarget(self.target_id)
|
38 |
|
39 |
def _cropAndPadFromPalm(self, image, palm_bbox, for_rotation = False):
|
|
|
1 |
import numpy as np
|
2 |
import cv2 as cv
|
3 |
|
|
|
4 |
class MPHandPose:
|
5 |
def __init__(self, modelPath, confThreshold=0.8, backendId=0, targetId=0):
|
6 |
self.model_path = modelPath
|
|
|
27 |
def name(self):
|
28 |
return self.__class__.__name__
|
29 |
|
30 |
+
def setBackendAndTarget(self, backendId, targetId):
|
31 |
+
self._backendId = backendId
|
32 |
+
self._targetId = targetId
|
33 |
self.model.setPreferableBackend(self.backend_id)
|
|
|
|
|
|
|
34 |
self.model.setPreferableTarget(self.target_id)
|
35 |
|
36 |
def _cropAndPadFromPalm(self, image, palm_bbox, for_rotation = False):
|
models/human_segmentation_pphumanseg/demo.py
CHANGED
@@ -11,33 +11,36 @@ import cv2 as cv
|
|
11 |
|
12 |
from pphumanseg import PPHumanSeg
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
try:
|
27 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
28 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
29 |
-
help_msg_backends += "; {:d}: TIMVX"
|
30 |
-
help_msg_targets += "; {:d}: NPU"
|
31 |
-
except:
|
32 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
33 |
|
34 |
parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
|
35 |
-
parser.add_argument('--input', '-i', type=str,
|
36 |
-
|
37 |
-
parser.add_argument('--
|
38 |
-
|
39 |
-
parser.add_argument('--
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
args = parser.parse_args()
|
42 |
|
43 |
def get_color_map_list(num_classes):
|
@@ -97,8 +100,10 @@ def visualize(image, result, weight=0.6, fps=None):
|
|
97 |
|
98 |
|
99 |
if __name__ == '__main__':
|
|
|
|
|
100 |
# Instantiate PPHumanSeg
|
101 |
-
model = PPHumanSeg(modelPath=args.model, backendId=
|
102 |
|
103 |
if args.input is not None:
|
104 |
# Read image and resize to 192x192
|
|
|
11 |
|
12 |
from pphumanseg import PPHumanSeg
|
13 |
|
14 |
+
# Check OpenCV version
|
15 |
+
assert cv.__version__ >= "4.7.0", \
|
16 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
17 |
+
|
18 |
+
# Valid combinations of backends and targets
|
19 |
+
backend_target_pairs = [
|
20 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
21 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
22 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
23 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
24 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
25 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
|
28 |
+
parser.add_argument('--input', '-i', type=str,
|
29 |
+
help='Usage: Set input path to a certain image, omit if using camera.')
|
30 |
+
parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2023mar.onnx',
|
31 |
+
help='Usage: Set model path, defaults to human_segmentation_pphumanseg_2023mar.onnx.')
|
32 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
33 |
+
help='''Choose one of the backend-target pair to run this demo:
|
34 |
+
{:d}: (default) OpenCV implementation + CPU,
|
35 |
+
{:d}: CUDA + GPU (CUDA),
|
36 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
37 |
+
{:d}: TIM-VX + NPU,
|
38 |
+
{:d}: CANN + NPU
|
39 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
40 |
+
parser.add_argument('--save', '-s', action='store_true',
|
41 |
+
help='Usage: Specify to save a file with results. Invalid in case of camera input.')
|
42 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
43 |
+
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
|
44 |
args = parser.parse_args()
|
45 |
|
46 |
def get_color_map_list(num_classes):
|
|
|
100 |
|
101 |
|
102 |
if __name__ == '__main__':
|
103 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
104 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
105 |
# Instantiate PPHumanSeg
|
106 |
+
model = PPHumanSeg(modelPath=args.model, backendId=backend_id, targetId=target_id)
|
107 |
|
108 |
if args.input is not None:
|
109 |
# Read image and resize to 192x192
|
models/human_segmentation_pphumanseg/pphumanseg.py
CHANGED
@@ -28,12 +28,10 @@ class PPHumanSeg:
|
|
28 |
def name(self):
|
29 |
return self.__class__.__name__
|
30 |
|
31 |
-
def
|
32 |
-
self._backendId =
|
|
|
33 |
self._model.setPreferableBackend(self._backendId)
|
34 |
-
|
35 |
-
def setTarget(self, target_id):
|
36 |
-
self._targetId = target_id
|
37 |
self._model.setPreferableTarget(self._targetId)
|
38 |
|
39 |
def _preprocess(self, image):
|
@@ -69,4 +67,3 @@ class PPHumanSeg:
|
|
69 |
|
70 |
result = np.argmax(outputBlob, axis=1).astype(np.uint8)
|
71 |
return result
|
72 |
-
|
|
|
28 |
def name(self):
|
29 |
return self.__class__.__name__
|
30 |
|
31 |
+
def setBackendAndTarget(self, backendId, targetId):
|
32 |
+
self._backendId = backendId
|
33 |
+
self._targetId = targetId
|
34 |
self._model.setPreferableBackend(self._backendId)
|
|
|
|
|
|
|
35 |
self._model.setPreferableTarget(self._targetId)
|
36 |
|
37 |
def _preprocess(self, image):
|
|
|
67 |
|
68 |
result = np.argmax(outputBlob, axis=1).astype(np.uint8)
|
69 |
return result
|
|
models/image_classification_mobilenet/demo.py
CHANGED
@@ -5,43 +5,39 @@ import cv2 as cv
|
|
5 |
|
6 |
from mobilenet import MobileNet
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
20 |
-
try:
|
21 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
22 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
23 |
-
help_msg_backends += "; {:d}: TIMVX"
|
24 |
-
help_msg_targets += "; {:d}: NPU"
|
25 |
-
except:
|
26 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
27 |
-
|
28 |
-
all_mobilenets = [
|
29 |
-
'image_classification_mobilenetv1_2022apr.onnx',
|
30 |
-
'image_classification_mobilenetv2_2022apr.onnx',
|
31 |
-
'image_classification_mobilenetv1_2022apr-int8-quantized.onnx',
|
32 |
-
'image_classification_mobilenetv2_2022apr-int8-quantized.onnx'
|
33 |
]
|
34 |
|
35 |
parser = argparse.ArgumentParser(description='Demo for MobileNet V1 & V2.')
|
36 |
-
parser.add_argument('--input', '-i', type=str,
|
37 |
-
|
38 |
-
parser.add_argument('--
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
args = parser.parse_args()
|
41 |
|
42 |
if __name__ == '__main__':
|
|
|
|
|
43 |
# Instantiate MobileNet
|
44 |
-
model = MobileNet(modelPath=args.model, backendId=
|
45 |
|
46 |
# Read image and get a 224x224 crop from a 256x256 resized
|
47 |
image = cv.imread(args.input)
|
|
|
5 |
|
6 |
from mobilenet import MobileNet
|
7 |
|
8 |
+
# Check OpenCV version
|
9 |
+
assert cv.__version__ >= "4.7.0", \
|
10 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
11 |
+
|
12 |
+
# Valid combinations of backends and targets
|
13 |
+
backend_target_pairs = [
|
14 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
15 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
16 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
17 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
18 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
]
|
20 |
|
21 |
parser = argparse.ArgumentParser(description='Demo for MobileNet V1 & V2.')
|
22 |
+
parser.add_argument('--input', '-i', type=str,
|
23 |
+
help='Usage: Set input path to a certain image, omit if using camera.')
|
24 |
+
parser.add_argument('--model', '-m', type=str, default='image_classification_mobilenetv1_2022apr.onnx',
|
25 |
+
help='Usage: Set model type, defaults to image_classification_mobilenetv1_2022apr.onnx (v1).')
|
26 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
27 |
+
help='''Choose one of the backend-target pair to run this demo:
|
28 |
+
{:d}: (default) OpenCV implementation + CPU,
|
29 |
+
{:d}: CUDA + GPU (CUDA),
|
30 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
31 |
+
{:d}: TIM-VX + NPU,
|
32 |
+
{:d}: CANN + NPU
|
33 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
34 |
args = parser.parse_args()
|
35 |
|
36 |
if __name__ == '__main__':
|
37 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
38 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
39 |
# Instantiate MobileNet
|
40 |
+
model = MobileNet(modelPath=args.model, backendId=backend_id, targetId=target_id)
|
41 |
|
42 |
# Read image and get a 224x224 crop from a 256x256 resized
|
43 |
image = cv.imread(args.input)
|
models/image_classification_mobilenet/mobilenet.py
CHANGED
@@ -33,12 +33,10 @@ class MobileNet:
|
|
33 |
def name(self):
|
34 |
return self.__class__.__name__
|
35 |
|
36 |
-
def
|
37 |
-
self.
|
|
|
38 |
self.model.setPreferableBackend(self.backend_id)
|
39 |
-
|
40 |
-
def setTarget(self, targetId):
|
41 |
-
self.target_id = targetId
|
42 |
self.model.setPreferableTarget(self.target_id)
|
43 |
|
44 |
def _preprocess(self, image):
|
|
|
33 |
def name(self):
|
34 |
return self.__class__.__name__
|
35 |
|
36 |
+
def setBackendAndTarget(self, backendId, targetId):
|
37 |
+
self._backendId = backendId
|
38 |
+
self._targetId = targetId
|
39 |
self.model.setPreferableBackend(self.backend_id)
|
|
|
|
|
|
|
40 |
self.model.setPreferableTarget(self.target_id)
|
41 |
|
42 |
def _preprocess(self, image):
|
models/image_classification_ppresnet/demo.py
CHANGED
@@ -11,36 +11,39 @@ import cv2 as cv
|
|
11 |
|
12 |
from ppresnet import PPResNet
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
try:
|
27 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
28 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
29 |
-
help_msg_backends += "; {:d}: TIMVX"
|
30 |
-
help_msg_targets += "; {:d}: NPU"
|
31 |
-
except:
|
32 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
33 |
|
34 |
parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
|
35 |
-
parser.add_argument('--input', '-i', type=str,
|
36 |
-
|
37 |
-
parser.add_argument('--
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
args = parser.parse_args()
|
40 |
|
41 |
if __name__ == '__main__':
|
|
|
|
|
42 |
# Instantiate ResNet
|
43 |
-
model = PPResNet(modelPath=args.model, backendId=
|
44 |
|
45 |
# Read image and get a 224x224 crop from a 256x256 resized
|
46 |
image = cv.imread(args.input)
|
|
|
11 |
|
12 |
from ppresnet import PPResNet
|
13 |
|
14 |
+
# Check OpenCV version
|
15 |
+
assert cv.__version__ >= "4.7.0", \
|
16 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
17 |
+
|
18 |
+
# Valid combinations of backends and targets
|
19 |
+
backend_target_pairs = [
|
20 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
21 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
22 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
23 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
24 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
25 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
|
28 |
+
parser.add_argument('--input', '-i', type=str,
|
29 |
+
help='Usage: Set input path to a certain image, omit if using camera.')
|
30 |
+
parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx',
|
31 |
+
help='Usage: Set model path, defaults to image_classification_ppresnet50_2022jan.onnx.')
|
32 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
33 |
+
help='''Choose one of the backend-target pair to run this demo:
|
34 |
+
{:d}: (default) OpenCV implementation + CPU,
|
35 |
+
{:d}: CUDA + GPU (CUDA),
|
36 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
37 |
+
{:d}: TIM-VX + NPU,
|
38 |
+
{:d}: CANN + NPU
|
39 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
40 |
args = parser.parse_args()
|
41 |
|
42 |
if __name__ == '__main__':
|
43 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
44 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
45 |
# Instantiate ResNet
|
46 |
+
model = PPResNet(modelPath=args.model, backendId=backend_id, targetId=target_id)
|
47 |
|
48 |
# Read image and get a 224x224 crop from a 256x256 resized
|
49 |
image = cv.imread(args.input)
|
models/image_classification_ppresnet/ppresnet.py
CHANGED
@@ -36,12 +36,10 @@ class PPResNet:
|
|
36 |
def name(self):
|
37 |
return self.__class__.__name__
|
38 |
|
39 |
-
def
|
40 |
-
self._backendId =
|
|
|
41 |
self._model.setPreferableBackend(self._backendId)
|
42 |
-
|
43 |
-
def setTarget(self, target_id):
|
44 |
-
self._targetId = target_id
|
45 |
self._model.setPreferableTarget(self._targetId)
|
46 |
|
47 |
def _preprocess(self, image):
|
|
|
36 |
def name(self):
|
37 |
return self.__class__.__name__
|
38 |
|
39 |
+
def setBackendAndTarget(self, backendId, targetId):
|
40 |
+
self._backendId = backendId
|
41 |
+
self._targetId = targetId
|
42 |
self._model.setPreferableBackend(self._backendId)
|
|
|
|
|
|
|
43 |
self._model.setPreferableTarget(self._targetId)
|
44 |
|
45 |
def _preprocess(self, image):
|
models/license_plate_detection_yunet/demo.py
CHANGED
@@ -5,37 +5,44 @@ import cv2 as cv
|
|
5 |
|
6 |
from lpd_yunet import LPD_YuNet
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
try:
|
21 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
22 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
23 |
-
help_msg_backends += "; {:d}: TIMVX"
|
24 |
-
help_msg_targets += "; {:d}: NPU"
|
25 |
-
except:
|
26 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
27 |
|
28 |
parser = argparse.ArgumentParser(description='LPD-YuNet for License Plate Detection')
|
29 |
-
parser.add_argument('--input', '-i', type=str,
|
30 |
-
|
31 |
-
parser.add_argument('--
|
32 |
-
|
33 |
-
parser.add_argument('--
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
args = parser.parse_args()
|
40 |
|
41 |
def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
|
@@ -57,14 +64,17 @@ def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=N
|
|
57 |
return output
|
58 |
|
59 |
if __name__ == '__main__':
|
|
|
|
|
|
|
60 |
# Instantiate LPD-YuNet
|
61 |
model = LPD_YuNet(modelPath=args.model,
|
62 |
confThreshold=args.conf_threshold,
|
63 |
nmsThreshold=args.nms_threshold,
|
64 |
topK=args.top_k,
|
65 |
keepTopK=args.keep_top_k,
|
66 |
-
backendId=
|
67 |
-
targetId=
|
68 |
|
69 |
# If input is an image
|
70 |
if args.input is not None:
|
@@ -117,4 +127,3 @@ if __name__ == '__main__':
|
|
117 |
cv.imshow('LPD-YuNet Demo', frame)
|
118 |
|
119 |
tm.reset()
|
120 |
-
|
|
|
5 |
|
6 |
from lpd_yunet import LPD_YuNet
|
7 |
|
8 |
+
# Check OpenCV version
|
9 |
+
assert cv.__version__ >= "4.7.0", \
|
10 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
11 |
+
|
12 |
+
# Valid combinations of backends and targets
|
13 |
+
backend_target_pairs = [
|
14 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
15 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
16 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
17 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
18 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
19 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
parser = argparse.ArgumentParser(description='LPD-YuNet for License Plate Detection')
|
22 |
+
parser.add_argument('--input', '-i', type=str,
|
23 |
+
help='Usage: Set path to the input image. Omit for using default camera.')
|
24 |
+
parser.add_argument('--model', '-m', type=str, default='license_plate_detection_lpd_yunet_2023mar.onnx',
|
25 |
+
help='Usage: Set model path, defaults to license_plate_detection_lpd_yunet_2023mar.onnx.')
|
26 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
27 |
+
help='''Choose one of the backend-target pair to run this demo:
|
28 |
+
{:d}: (default) OpenCV implementation + CPU,
|
29 |
+
{:d}: CUDA + GPU (CUDA),
|
30 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
31 |
+
{:d}: TIM-VX + NPU,
|
32 |
+
{:d}: CANN + NPU
|
33 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
34 |
+
parser.add_argument('--conf_threshold', type=float, default=0.9,
|
35 |
+
help='Usage: Set the minimum needed confidence for the model to identify a license plate, defaults to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.')
|
36 |
+
parser.add_argument('--nms_threshold', type=float, default=0.3,
|
37 |
+
help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3. Suppress bounding boxes of iou >= nms_threshold.')
|
38 |
+
parser.add_argument('--top_k', type=int, default=5000,
|
39 |
+
help='Usage: Keep top_k bounding boxes before NMS.')
|
40 |
+
parser.add_argument('--keep_top_k', type=int, default=750,
|
41 |
+
help='Usage: Keep keep_top_k bounding boxes after NMS.')
|
42 |
+
parser.add_argument('--save', '-s', action='store_true',
|
43 |
+
help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
|
44 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
45 |
+
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
|
46 |
args = parser.parse_args()
|
47 |
|
48 |
def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
|
|
|
64 |
return output
|
65 |
|
66 |
if __name__ == '__main__':
|
67 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
68 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
69 |
+
|
70 |
# Instantiate LPD-YuNet
|
71 |
model = LPD_YuNet(modelPath=args.model,
|
72 |
confThreshold=args.conf_threshold,
|
73 |
nmsThreshold=args.nms_threshold,
|
74 |
topK=args.top_k,
|
75 |
keepTopK=args.keep_top_k,
|
76 |
+
backendId=backend_id,
|
77 |
+
targetId=target_id)
|
78 |
|
79 |
# If input is an image
|
80 |
if args.input is not None:
|
|
|
127 |
cv.imshow('LPD-YuNet Demo', frame)
|
128 |
|
129 |
tm.reset()
|
|
models/license_plate_detection_yunet/lpd_yunet.py
CHANGED
@@ -28,12 +28,10 @@ class LPD_YuNet:
|
|
28 |
def name(self):
|
29 |
return self.__class__.__name__
|
30 |
|
31 |
-
def
|
32 |
self.backend_id = backendId
|
33 |
-
self.model.setPreferableBackend(self.backend_id)
|
34 |
-
|
35 |
-
def setTarget(self, targetId):
|
36 |
self.target_id = targetId
|
|
|
37 |
self.model.setPreferableTarget(self.target_id)
|
38 |
|
39 |
def setInputSize(self, inputSize):
|
|
|
28 |
def name(self):
|
29 |
return self.__class__.__name__
|
30 |
|
31 |
+
def setBackendAndTarget(self, backendId, targetId):
|
32 |
self.backend_id = backendId
|
|
|
|
|
|
|
33 |
self.target_id = targetId
|
34 |
+
self.model.setPreferableBackend(self.backend_id)
|
35 |
self.model.setPreferableTarget(self.target_id)
|
36 |
|
37 |
def setInputSize(self, inputSize):
|
models/object_detection_nanodet/demo.py
CHANGED
@@ -1,29 +1,21 @@
|
|
1 |
import numpy as np
|
2 |
-
import cv2
|
3 |
import argparse
|
4 |
|
5 |
from nanodet import NanoDet
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
|
11 |
-
return False
|
12 |
-
else:
|
13 |
-
raise NotImplementedError
|
14 |
-
|
15 |
-
backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
|
16 |
-
targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
|
17 |
-
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
18 |
-
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
27 |
|
28 |
classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
29 |
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
|
@@ -48,16 +40,16 @@ def letterbox(srcimg, target_size=(416, 416)):
|
|
48 |
hw_scale = img.shape[0] / img.shape[1]
|
49 |
if hw_scale > 1:
|
50 |
newh, neww = target_size[0], int(target_size[1] / hw_scale)
|
51 |
-
img =
|
52 |
left = int((target_size[1] - neww) * 0.5)
|
53 |
-
img =
|
54 |
else:
|
55 |
newh, neww = int(target_size[0] * hw_scale), target_size[1]
|
56 |
-
img =
|
57 |
top = int((target_size[0] - newh) * 0.5)
|
58 |
-
img =
|
59 |
else:
|
60 |
-
img =
|
61 |
|
62 |
letterbox_scale = [top, left, newh, neww]
|
63 |
return img, letterbox_scale
|
@@ -87,7 +79,7 @@ def vis(preds, res_img, letterbox_scale, fps=None):
|
|
87 |
# draw FPS
|
88 |
if fps is not None:
|
89 |
fps_label = "FPS: %.2f" % fps
|
90 |
-
|
91 |
|
92 |
# draw bboxes and labels
|
93 |
for pred in preds:
|
@@ -97,37 +89,52 @@ def vis(preds, res_img, letterbox_scale, fps=None):
|
|
97 |
|
98 |
# bbox
|
99 |
xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale)
|
100 |
-
|
101 |
|
102 |
# label
|
103 |
label = "{:s}: {:.2f}".format(classes[classid], conf)
|
104 |
-
|
105 |
|
106 |
return ret
|
107 |
|
108 |
if __name__=='__main__':
|
109 |
parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
|
110 |
-
parser.add_argument('--input', '-i', type=str,
|
111 |
-
|
112 |
-
parser.add_argument('--
|
113 |
-
|
114 |
-
parser.add_argument('--
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
args = parser.parse_args()
|
119 |
|
|
|
|
|
|
|
120 |
model = NanoDet(modelPath= args.model,
|
121 |
prob_threshold=args.confidence,
|
122 |
iou_threshold=args.nms,
|
123 |
-
backend_id=
|
124 |
-
target_id=
|
125 |
|
126 |
-
tm =
|
127 |
tm.reset()
|
128 |
if args.input is not None:
|
129 |
-
image =
|
130 |
-
input_blob =
|
131 |
|
132 |
# Letterbox transformation
|
133 |
input_blob, letterbox_scale = letterbox(input_blob)
|
@@ -142,25 +149,25 @@ if __name__=='__main__':
|
|
142 |
|
143 |
if args.save:
|
144 |
print('Resutls saved to result.jpg\n')
|
145 |
-
|
146 |
|
147 |
if args.vis:
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
|
152 |
else:
|
153 |
print("Press any key to stop video capture")
|
154 |
deviceId = 0
|
155 |
-
cap =
|
156 |
|
157 |
-
while
|
158 |
hasFrame, frame = cap.read()
|
159 |
if not hasFrame:
|
160 |
print('No frames grabbed!')
|
161 |
break
|
162 |
|
163 |
-
input_blob =
|
164 |
input_blob, letterbox_scale = letterbox(input_blob)
|
165 |
# Inference
|
166 |
tm.start()
|
@@ -169,6 +176,6 @@ if __name__=='__main__':
|
|
169 |
|
170 |
img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
|
171 |
|
172 |
-
|
173 |
|
174 |
tm.reset()
|
|
|
1 |
import numpy as np
|
2 |
+
import cv2 as cv
|
3 |
import argparse
|
4 |
|
5 |
from nanodet import NanoDet
|
6 |
|
7 |
+
# Check OpenCV version
|
8 |
+
assert cv.__version__ >= "4.7.0", \
|
9 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
# Valid combinations of backends and targets
|
12 |
+
backend_target_pairs = [
|
13 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
14 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
15 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
16 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
17 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
18 |
+
]
|
19 |
|
20 |
classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
21 |
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
|
|
|
40 |
hw_scale = img.shape[0] / img.shape[1]
|
41 |
if hw_scale > 1:
|
42 |
newh, neww = target_size[0], int(target_size[1] / hw_scale)
|
43 |
+
img = cv.resize(img, (neww, newh), interpolation=cv.INTER_AREA)
|
44 |
left = int((target_size[1] - neww) * 0.5)
|
45 |
+
img = cv.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv.BORDER_CONSTANT, value=0) # add border
|
46 |
else:
|
47 |
newh, neww = int(target_size[0] * hw_scale), target_size[1]
|
48 |
+
img = cv.resize(img, (neww, newh), interpolation=cv.INTER_AREA)
|
49 |
top = int((target_size[0] - newh) * 0.5)
|
50 |
+
img = cv.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv.BORDER_CONSTANT, value=0)
|
51 |
else:
|
52 |
+
img = cv.resize(img, target_size, interpolation=cv.INTER_AREA)
|
53 |
|
54 |
letterbox_scale = [top, left, newh, neww]
|
55 |
return img, letterbox_scale
|
|
|
79 |
# draw FPS
|
80 |
if fps is not None:
|
81 |
fps_label = "FPS: %.2f" % fps
|
82 |
+
cv.putText(ret, fps_label, (10, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
83 |
|
84 |
# draw bboxes and labels
|
85 |
for pred in preds:
|
|
|
89 |
|
90 |
# bbox
|
91 |
xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale)
|
92 |
+
cv.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2)
|
93 |
|
94 |
# label
|
95 |
label = "{:s}: {:.2f}".format(classes[classid], conf)
|
96 |
+
cv.putText(ret, label, (xmin, ymin - 10), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
|
97 |
|
98 |
return ret
|
99 |
|
100 |
if __name__=='__main__':
|
101 |
parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
|
102 |
+
parser.add_argument('--input', '-i', type=str,
|
103 |
+
help='Path to the input image. Omit for using default camera.')
|
104 |
+
parser.add_argument('--model', '-m', type=str,
|
105 |
+
default='object_detection_nanodet_2022nov.onnx', help="Path to the model")
|
106 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
107 |
+
help='''Choose one of the backend-target pair to run this demo:
|
108 |
+
{:d}: (default) OpenCV implementation + CPU,
|
109 |
+
{:d}: CUDA + GPU (CUDA),
|
110 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
111 |
+
{:d}: TIM-VX + NPU,
|
112 |
+
{:d}: CANN + NPU
|
113 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
114 |
+
parser.add_argument('--confidence', default=0.35, type=float,
|
115 |
+
help='Class confidence')
|
116 |
+
parser.add_argument('--nms', default=0.6, type=float,
|
117 |
+
help='Enter nms IOU threshold')
|
118 |
+
parser.add_argument('--save', '-s', action='store_true',
|
119 |
+
help='Specify to save results. This flag is invalid when using camera.')
|
120 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
121 |
+
help='Specify to open a window for result visualization. This flag is invalid when using camera.')
|
122 |
args = parser.parse_args()
|
123 |
|
124 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
125 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
126 |
+
|
127 |
model = NanoDet(modelPath= args.model,
|
128 |
prob_threshold=args.confidence,
|
129 |
iou_threshold=args.nms,
|
130 |
+
backend_id=backend_id,
|
131 |
+
target_id=target_id)
|
132 |
|
133 |
+
tm = cv.TickMeter()
|
134 |
tm.reset()
|
135 |
if args.input is not None:
|
136 |
+
image = cv.imread(args.input)
|
137 |
+
input_blob = cv.cvtColor(image, cv.COLOR_BGR2RGB)
|
138 |
|
139 |
# Letterbox transformation
|
140 |
input_blob, letterbox_scale = letterbox(input_blob)
|
|
|
149 |
|
150 |
if args.save:
|
151 |
print('Resutls saved to result.jpg\n')
|
152 |
+
cv.imwrite('result.jpg', img)
|
153 |
|
154 |
if args.vis:
|
155 |
+
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
156 |
+
cv.imshow(args.input, img)
|
157 |
+
cv.waitKey(0)
|
158 |
|
159 |
else:
|
160 |
print("Press any key to stop video capture")
|
161 |
deviceId = 0
|
162 |
+
cap = cv.VideoCapture(deviceId)
|
163 |
|
164 |
+
while cv.waitKey(1) < 0:
|
165 |
hasFrame, frame = cap.read()
|
166 |
if not hasFrame:
|
167 |
print('No frames grabbed!')
|
168 |
break
|
169 |
|
170 |
+
input_blob = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
|
171 |
input_blob, letterbox_scale = letterbox(input_blob)
|
172 |
# Inference
|
173 |
tm.start()
|
|
|
176 |
|
177 |
img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
|
178 |
|
179 |
+
cv.imshow("NanoDet Demo", img)
|
180 |
|
181 |
tm.reset()
|
models/object_detection_nanodet/nanodet.py
CHANGED
@@ -37,12 +37,10 @@ class NanoDet:
|
|
37 |
def name(self):
|
38 |
return self.__class__.__name__
|
39 |
|
40 |
-
def
|
41 |
-
self.
|
|
|
42 |
self.net.setPreferableBackend(self.backend_id)
|
43 |
-
|
44 |
-
def setTarget(self, targetId):
|
45 |
-
self.target_id = targetId
|
46 |
self.net.setPreferableTarget(self.target_id)
|
47 |
|
48 |
def pre_process(self, img):
|
|
|
37 |
def name(self):
|
38 |
return self.__class__.__name__
|
39 |
|
40 |
+
def setBackendAndTarget(self, backendId, targetId):
|
41 |
+
self._backendId = backendId
|
42 |
+
self._targetId = targetId
|
43 |
self.net.setPreferableBackend(self.backend_id)
|
|
|
|
|
|
|
44 |
self.net.setPreferableTarget(self.target_id)
|
45 |
|
46 |
def pre_process(self, img):
|
models/object_detection_yolox/demo.py
CHANGED
@@ -1,29 +1,21 @@
|
|
1 |
import numpy as np
|
2 |
-
import cv2
|
3 |
import argparse
|
4 |
|
5 |
from yolox import YoloX
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
|
11 |
-
return False
|
12 |
-
else:
|
13 |
-
raise NotImplementedError
|
14 |
-
|
15 |
-
backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
|
16 |
-
targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
|
17 |
-
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
18 |
-
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
27 |
|
28 |
classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
29 |
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
|
@@ -43,8 +35,8 @@ classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
|
43 |
def letterbox(srcimg, target_size=(640, 640)):
|
44 |
padded_img = np.ones((target_size[0], target_size[1], 3)) * 114.0
|
45 |
ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
|
46 |
-
resized_img =
|
47 |
-
srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=
|
48 |
).astype(np.float32)
|
49 |
padded_img[: int(srcimg.shape[0] * ratio), : int(srcimg.shape[1] * ratio)] = resized_img
|
50 |
|
@@ -58,7 +50,7 @@ def vis(dets, srcimg, letterbox_scale, fps=None):
|
|
58 |
|
59 |
if fps is not None:
|
60 |
fps_label = "FPS: %.2f" % fps
|
61 |
-
|
62 |
|
63 |
for det in dets:
|
64 |
box = unletterbox(det[:4], letterbox_scale).astype(np.int32)
|
@@ -68,39 +60,55 @@ def vis(dets, srcimg, letterbox_scale, fps=None):
|
|
68 |
x0, y0, x1, y1 = box
|
69 |
|
70 |
text = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
|
71 |
-
font =
|
72 |
-
txt_size =
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
|
77 |
return res_img
|
78 |
|
79 |
if __name__=='__main__':
|
80 |
parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
|
81 |
-
parser.add_argument('--input', '-i', type=str,
|
82 |
-
|
83 |
-
parser.add_argument('--
|
84 |
-
|
85 |
-
parser.add_argument('--
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
args = parser.parse_args()
|
91 |
|
|
|
|
|
|
|
92 |
model_net = YoloX(modelPath= args.model,
|
93 |
confThreshold=args.confidence,
|
94 |
nmsThreshold=args.nms,
|
95 |
objThreshold=args.obj,
|
96 |
-
backendId=
|
97 |
-
targetId=
|
98 |
|
99 |
-
tm =
|
100 |
tm.reset()
|
101 |
if args.input is not None:
|
102 |
-
image =
|
103 |
-
input_blob =
|
104 |
input_blob, letterbox_scale = letterbox(input_blob)
|
105 |
|
106 |
# Inference
|
@@ -113,25 +121,25 @@ if __name__=='__main__':
|
|
113 |
|
114 |
if args.save:
|
115 |
print('Resutls saved to result.jpg\n')
|
116 |
-
|
117 |
|
118 |
if args.vis:
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
|
123 |
else:
|
124 |
print("Press any key to stop video capture")
|
125 |
deviceId = 0
|
126 |
-
cap =
|
127 |
|
128 |
-
while
|
129 |
hasFrame, frame = cap.read()
|
130 |
if not hasFrame:
|
131 |
print('No frames grabbed!')
|
132 |
break
|
133 |
|
134 |
-
input_blob =
|
135 |
input_blob, letterbox_scale = letterbox(input_blob)
|
136 |
|
137 |
# Inference
|
@@ -141,6 +149,6 @@ if __name__=='__main__':
|
|
141 |
|
142 |
img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
|
143 |
|
144 |
-
|
145 |
|
146 |
tm.reset()
|
|
|
1 |
import numpy as np
|
2 |
+
import cv2 as cv
|
3 |
import argparse
|
4 |
|
5 |
from yolox import YoloX
|
6 |
|
7 |
+
# Check OpenCV version
|
8 |
+
assert cv.__version__ >= "4.7.0", \
|
9 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
# Valid combinations of backends and targets
|
12 |
+
backend_target_pairs = [
|
13 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
14 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
15 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
16 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
17 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
18 |
+
]
|
19 |
|
20 |
classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
21 |
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
|
|
|
35 |
def letterbox(srcimg, target_size=(640, 640)):
|
36 |
padded_img = np.ones((target_size[0], target_size[1], 3)) * 114.0
|
37 |
ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
|
38 |
+
resized_img = cv.resize(
|
39 |
+
srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv.INTER_LINEAR
|
40 |
).astype(np.float32)
|
41 |
padded_img[: int(srcimg.shape[0] * ratio), : int(srcimg.shape[1] * ratio)] = resized_img
|
42 |
|
|
|
50 |
|
51 |
if fps is not None:
|
52 |
fps_label = "FPS: %.2f" % fps
|
53 |
+
cv.putText(res_img, fps_label, (10, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
54 |
|
55 |
for det in dets:
|
56 |
box = unletterbox(det[:4], letterbox_scale).astype(np.int32)
|
|
|
60 |
x0, y0, x1, y1 = box
|
61 |
|
62 |
text = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
|
63 |
+
font = cv.FONT_HERSHEY_SIMPLEX
|
64 |
+
txt_size = cv.getTextSize(text, font, 0.4, 1)[0]
|
65 |
+
cv.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), 2)
|
66 |
+
cv.rectangle(res_img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1)
|
67 |
+
cv.putText(res_img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 0, 0), thickness=1)
|
68 |
|
69 |
return res_img
|
70 |
|
71 |
if __name__=='__main__':
|
72 |
parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
|
73 |
+
parser.add_argument('--input', '-i', type=str,
|
74 |
+
help='Path to the input image. Omit for using default camera.')
|
75 |
+
parser.add_argument('--model', '-m', type=str, default='object_detection_yolox_2022nov.onnx',
|
76 |
+
help="Path to the model")
|
77 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
78 |
+
help='''Choose one of the backend-target pair to run this demo:
|
79 |
+
{:d}: (default) OpenCV implementation + CPU,
|
80 |
+
{:d}: CUDA + GPU (CUDA),
|
81 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
82 |
+
{:d}: TIM-VX + NPU,
|
83 |
+
{:d}: CANN + NPU
|
84 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
85 |
+
parser.add_argument('--confidence', default=0.5, type=float,
|
86 |
+
help='Class confidence')
|
87 |
+
parser.add_argument('--nms', default=0.5, type=float,
|
88 |
+
help='Enter nms IOU threshold')
|
89 |
+
parser.add_argument('--obj', default=0.5, type=float,
|
90 |
+
help='Enter object threshold')
|
91 |
+
parser.add_argument('--save', '-s', action='store_true',
|
92 |
+
help='Specify to save results. This flag is invalid when using camera.')
|
93 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
94 |
+
help='Specify to open a window for result visualization. This flag is invalid when using camera.')
|
95 |
args = parser.parse_args()
|
96 |
|
97 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
98 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
99 |
+
|
100 |
model_net = YoloX(modelPath= args.model,
|
101 |
confThreshold=args.confidence,
|
102 |
nmsThreshold=args.nms,
|
103 |
objThreshold=args.obj,
|
104 |
+
backendId=backend_id,
|
105 |
+
targetId=target_id)
|
106 |
|
107 |
+
tm = cv.TickMeter()
|
108 |
tm.reset()
|
109 |
if args.input is not None:
|
110 |
+
image = cv.imread(args.input)
|
111 |
+
input_blob = cv.cvtColor(image, cv.COLOR_BGR2RGB)
|
112 |
input_blob, letterbox_scale = letterbox(input_blob)
|
113 |
|
114 |
# Inference
|
|
|
121 |
|
122 |
if args.save:
|
123 |
print('Resutls saved to result.jpg\n')
|
124 |
+
cv.imwrite('result.jpg', img)
|
125 |
|
126 |
if args.vis:
|
127 |
+
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
128 |
+
cv.imshow(args.input, img)
|
129 |
+
cv.waitKey(0)
|
130 |
|
131 |
else:
|
132 |
print("Press any key to stop video capture")
|
133 |
deviceId = 0
|
134 |
+
cap = cv.VideoCapture(deviceId)
|
135 |
|
136 |
+
while cv.waitKey(1) < 0:
|
137 |
hasFrame, frame = cap.read()
|
138 |
if not hasFrame:
|
139 |
print('No frames grabbed!')
|
140 |
break
|
141 |
|
142 |
+
input_blob = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
|
143 |
input_blob, letterbox_scale = letterbox(input_blob)
|
144 |
|
145 |
# Inference
|
|
|
149 |
|
150 |
img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
|
151 |
|
152 |
+
cv.imshow("YoloX Demo", img)
|
153 |
|
154 |
tm.reset()
|
models/object_detection_yolox/yolox.py
CHANGED
@@ -23,12 +23,10 @@ class YoloX:
|
|
23 |
def name(self):
|
24 |
return self.__class__.__name__
|
25 |
|
26 |
-
def
|
27 |
-
self.
|
|
|
28 |
self.net.setPreferableBackend(self.backendId)
|
29 |
-
|
30 |
-
def setTarget(self, targetId):
|
31 |
-
self.targetId = targetId
|
32 |
self.net.setPreferableTarget(self.targetId)
|
33 |
|
34 |
def preprocess(self, img):
|
|
|
23 |
def name(self):
|
24 |
return self.__class__.__name__
|
25 |
|
26 |
+
def setBackendAndTarget(self, backendId, targetId):
|
27 |
+
self._backendId = backendId
|
28 |
+
self._targetId = targetId
|
29 |
self.net.setPreferableBackend(self.backendId)
|
|
|
|
|
|
|
30 |
self.net.setPreferableTarget(self.targetId)
|
31 |
|
32 |
def preprocess(self, img):
|
models/object_tracking_dasiamrpn/dasiamrpn.py
CHANGED
@@ -27,18 +27,10 @@ class DaSiamRPN:
|
|
27 |
def name(self):
|
28 |
return self.__class__.__name__
|
29 |
|
30 |
-
def
|
31 |
-
self._backend_id =
|
32 |
-
self.
|
33 |
-
self._param.model = self._model_path
|
34 |
-
self._param.kernel_cls1 = self._kernel_cls1_path
|
35 |
-
self._param.kernel_r1 = self._kernel_r1_path
|
36 |
-
self._param.backend = self._backend_id
|
37 |
-
self._param.target = self._target_id
|
38 |
-
self._model = cv.TrackerDaSiamRPN.create(self._param)
|
39 |
|
40 |
-
def setTarget(self, target_id):
|
41 |
-
self._target_id = target_id
|
42 |
self._param = cv.TrackerDaSiamRPN_Params()
|
43 |
self._param.model = self._model_path
|
44 |
self._param.kernel_cls1 = self._kernel_cls1_path
|
@@ -53,4 +45,4 @@ class DaSiamRPN:
|
|
53 |
def infer(self, image):
|
54 |
isLocated, bbox = self._model.update(image)
|
55 |
score = self._model.getTrackingScore()
|
56 |
-
return isLocated, bbox, score
|
|
|
27 |
def name(self):
|
28 |
return self.__class__.__name__
|
29 |
|
30 |
+
def setBackendAndTarget(self, backendId, targetId):
|
31 |
+
self._backend_id = backendId
|
32 |
+
self._target_id = targetId
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
|
|
|
|
34 |
self._param = cv.TrackerDaSiamRPN_Params()
|
35 |
self._param.model = self._model_path
|
36 |
self._param.kernel_cls1 = self._kernel_cls1_path
|
|
|
45 |
def infer(self, image):
|
46 |
isLocated, bbox = self._model.update(image)
|
47 |
score = self._model.getTrackingScore()
|
48 |
+
return isLocated, bbox, score
|
models/object_tracking_dasiamrpn/demo.py
CHANGED
@@ -11,22 +11,41 @@ import cv2 as cv
|
|
11 |
|
12 |
from dasiamrpn import DaSiamRPN
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
parser = argparse.ArgumentParser(
|
23 |
description="Distractor-aware Siamese Networks for Visual Object Tracking (https://arxiv.org/abs/1808.06048)")
|
24 |
-
parser.add_argument('--input', '-i', type=str,
|
25 |
-
|
26 |
-
parser.add_argument('--
|
27 |
-
|
28 |
-
parser.add_argument('--
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
args = parser.parse_args()
|
31 |
|
32 |
def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
|
@@ -50,12 +69,16 @@ def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),tex
|
|
50 |
return output
|
51 |
|
52 |
if __name__ == '__main__':
|
|
|
|
|
|
|
53 |
# Instantiate DaSiamRPN
|
54 |
model = DaSiamRPN(
|
55 |
kernel_cls1_path=args.kernel_cls1_path,
|
56 |
kernel_r1_path=args.kernel_r1_path,
|
57 |
model_path=args.model_path,
|
58 |
-
|
|
|
59 |
|
60 |
# Read from args.input
|
61 |
_input = args.input
|
|
|
11 |
|
12 |
from dasiamrpn import DaSiamRPN
|
13 |
|
14 |
+
# Check OpenCV version
|
15 |
+
assert cv.__version__ >= "4.7.0", \
|
16 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
17 |
+
|
18 |
+
# Valid combinations of backends and targets
|
19 |
+
backend_target_pairs = [
|
20 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
21 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
22 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
23 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
24 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
25 |
+
]
|
26 |
|
27 |
parser = argparse.ArgumentParser(
|
28 |
description="Distractor-aware Siamese Networks for Visual Object Tracking (https://arxiv.org/abs/1808.06048)")
|
29 |
+
parser.add_argument('--input', '-i', type=str,
|
30 |
+
help='Usage: Set path to the input video. Omit for using default camera.')
|
31 |
+
parser.add_argument('--model_path', type=str, default='object_tracking_dasiamrpn_model_2021nov.onnx',
|
32 |
+
help='Usage: Set model path, defaults to object_tracking_dasiamrpn_model_2021nov.onnx.')
|
33 |
+
parser.add_argument('--kernel_cls1_path', type=str, default='object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx',
|
34 |
+
help='Usage: Set path to dasiamrpn_kernel_cls1.onnx.')
|
35 |
+
parser.add_argument('--kernel_r1_path', type=str, default='object_tracking_dasiamrpn_kernel_r1_2021nov.onnx',
|
36 |
+
help='Usage: Set path to dasiamrpn_kernel_r1.onnx.')
|
37 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
38 |
+
help='''Choose one of the backend-target pair to run this demo:
|
39 |
+
{:d}: (default) OpenCV implementation + CPU,
|
40 |
+
{:d}: CUDA + GPU (CUDA),
|
41 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
42 |
+
{:d}: TIM-VX + NPU,
|
43 |
+
{:d}: CANN + NPU
|
44 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
45 |
+
parser.add_argument('--save', '-s', action='store_true',
|
46 |
+
help='Usage: Specify to save a file with results. Invalid in case of camera input.')
|
47 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
48 |
+
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
|
49 |
args = parser.parse_args()
|
50 |
|
51 |
def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
|
|
|
69 |
return output
|
70 |
|
71 |
if __name__ == '__main__':
|
72 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
73 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
74 |
+
|
75 |
# Instantiate DaSiamRPN
|
76 |
model = DaSiamRPN(
|
77 |
kernel_cls1_path=args.kernel_cls1_path,
|
78 |
kernel_r1_path=args.kernel_r1_path,
|
79 |
model_path=args.model_path,
|
80 |
+
backend_id=backend_id,
|
81 |
+
target_id=target_id)
|
82 |
|
83 |
# Read from args.input
|
84 |
_input = args.input
|
models/palm_detection_mediapipe/demo.py
CHANGED
@@ -5,35 +5,40 @@ import cv2 as cv
|
|
5 |
|
6 |
from mp_palmdet import MPPalmDet
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
try:
|
21 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
22 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
23 |
-
help_msg_backends += "; {:d}: TIMVX"
|
24 |
-
help_msg_targets += "; {:d}: NPU"
|
25 |
-
except:
|
26 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
27 |
|
28 |
parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe')
|
29 |
-
parser.add_argument('--input', '-i', type=str,
|
30 |
-
|
31 |
-
parser.add_argument('--
|
32 |
-
|
33 |
-
parser.add_argument('--
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
args = parser.parse_args()
|
38 |
|
39 |
def visualize(image, results, print_results=False, fps=None):
|
@@ -71,12 +76,15 @@ def visualize(image, results, print_results=False, fps=None):
|
|
71 |
return output
|
72 |
|
73 |
if __name__ == '__main__':
|
|
|
|
|
|
|
74 |
# Instantiate MPPalmDet
|
75 |
model = MPPalmDet(modelPath=args.model,
|
76 |
nmsThreshold=args.nms_threshold,
|
77 |
scoreThreshold=args.score_threshold,
|
78 |
-
backendId=
|
79 |
-
targetId=
|
80 |
|
81 |
# If input is an image
|
82 |
if args.input is not None:
|
@@ -123,4 +131,3 @@ if __name__ == '__main__':
|
|
123 |
cv.imshow('MPPalmDet Demo', frame)
|
124 |
|
125 |
tm.reset()
|
126 |
-
|
|
|
5 |
|
6 |
from mp_palmdet import MPPalmDet
|
7 |
|
8 |
+
# Check OpenCV version
|
9 |
+
assert cv.__version__ >= "4.7.0", \
|
10 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
11 |
+
|
12 |
+
# Valid combinations of backends and targets
|
13 |
+
backend_target_pairs = [
|
14 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
15 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
16 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
17 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
18 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
19 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe')
|
22 |
+
parser.add_argument('--input', '-i', type=str,
|
23 |
+
help='Usage: Set path to the input image. Omit for using default camera.')
|
24 |
+
parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2023feb.onnx',
|
25 |
+
help='Usage: Set model path, defaults to palm_detection_mediapipe_2023feb.onnx.')
|
26 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
27 |
+
help='''Choose one of the backend-target pair to run this demo:
|
28 |
+
{:d}: (default) OpenCV implementation + CPU,
|
29 |
+
{:d}: CUDA + GPU (CUDA),
|
30 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
31 |
+
{:d}: TIM-VX + NPU,
|
32 |
+
{:d}: CANN + NPU
|
33 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
34 |
+
parser.add_argument('--score_threshold', type=float, default=0.8,
|
35 |
+
help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.8. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.')
|
36 |
+
parser.add_argument('--nms_threshold', type=float, default=0.3,
|
37 |
+
help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
|
38 |
+
parser.add_argument('--save', '-s', action='store_true',
|
39 |
+
help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
|
40 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
41 |
+
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
|
42 |
args = parser.parse_args()
|
43 |
|
44 |
def visualize(image, results, print_results=False, fps=None):
|
|
|
76 |
return output
|
77 |
|
78 |
if __name__ == '__main__':
|
79 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
80 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
81 |
+
|
82 |
# Instantiate MPPalmDet
|
83 |
model = MPPalmDet(modelPath=args.model,
|
84 |
nmsThreshold=args.nms_threshold,
|
85 |
scoreThreshold=args.score_threshold,
|
86 |
+
backendId=backend_id,
|
87 |
+
targetId=target_id)
|
88 |
|
89 |
# If input is an image
|
90 |
if args.input is not None:
|
|
|
131 |
cv.imshow('MPPalmDet Demo', frame)
|
132 |
|
133 |
tm.reset()
|
|
models/palm_detection_mediapipe/mp_palmdet.py
CHANGED
@@ -22,12 +22,10 @@ class MPPalmDet:
|
|
22 |
def name(self):
|
23 |
return self.__class__.__name__
|
24 |
|
25 |
-
def
|
26 |
self.backend_id = backendId
|
27 |
-
self.model.setPreferableBackend(self.backend_id)
|
28 |
-
|
29 |
-
def setTarget(self, targetId):
|
30 |
self.target_id = targetId
|
|
|
31 |
self.model.setPreferableTarget(self.target_id)
|
32 |
|
33 |
def _preprocess(self, image):
|
@@ -35,7 +33,7 @@ class MPPalmDet:
|
|
35 |
ratio = min(self.input_size / image.shape[:2])
|
36 |
if image.shape[0] != self.input_size[0] or image.shape[1] != self.input_size[1]:
|
37 |
# keep aspect ratio when resize
|
38 |
-
ratio_size = (np.array(image.shape[:2]) * ratio).astype(np.
|
39 |
image = cv.resize(image, (ratio_size[1], ratio_size[0]))
|
40 |
pad_h = self.input_size[0] - ratio_size[0]
|
41 |
pad_w = self.input_size[1] - ratio_size[1]
|
@@ -46,7 +44,7 @@ class MPPalmDet:
|
|
46 |
image = cv.copyMakeBorder(image, top, bottom, left, right, cv.BORDER_CONSTANT, None, (0, 0, 0))
|
47 |
image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
|
48 |
image = image.astype(np.float32) / 255.0 # norm
|
49 |
-
pad_bias = (pad_bias / ratio).astype(np.
|
50 |
return image[np.newaxis, :, :, :], pad_bias # hwc -> nhwc
|
51 |
|
52 |
def infer(self, image):
|
|
|
22 |
def name(self):
|
23 |
return self.__class__.__name__
|
24 |
|
25 |
+
def setBackendAndTarget(self, backendId, targetId):
|
26 |
self.backend_id = backendId
|
|
|
|
|
|
|
27 |
self.target_id = targetId
|
28 |
+
self.model.setPreferableBackend(self.backend_id)
|
29 |
self.model.setPreferableTarget(self.target_id)
|
30 |
|
31 |
def _preprocess(self, image):
|
|
|
33 |
ratio = min(self.input_size / image.shape[:2])
|
34 |
if image.shape[0] != self.input_size[0] or image.shape[1] != self.input_size[1]:
|
35 |
# keep aspect ratio when resize
|
36 |
+
ratio_size = (np.array(image.shape[:2]) * ratio).astype(np.int32)
|
37 |
image = cv.resize(image, (ratio_size[1], ratio_size[0]))
|
38 |
pad_h = self.input_size[0] - ratio_size[0]
|
39 |
pad_w = self.input_size[1] - ratio_size[1]
|
|
|
44 |
image = cv.copyMakeBorder(image, top, bottom, left, right, cv.BORDER_CONSTANT, None, (0, 0, 0))
|
45 |
image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
|
46 |
image = image.astype(np.float32) / 255.0 # norm
|
47 |
+
pad_bias = (pad_bias / ratio).astype(np.int32)
|
48 |
return image[np.newaxis, :, :, :], pad_bias # hwc -> nhwc
|
49 |
|
50 |
def infer(self, image):
|
models/person_reid_youtureid/demo.py
CHANGED
@@ -12,36 +12,41 @@ import cv2 as cv
|
|
12 |
|
13 |
from youtureid import YoutuReID
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
try:
|
28 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
29 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
30 |
-
help_msg_backends += "; {:d}: TIMVX"
|
31 |
-
help_msg_targets += "; {:d}: NPU"
|
32 |
-
except:
|
33 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
34 |
|
35 |
parser = argparse.ArgumentParser(
|
36 |
description="ReID baseline models from Tencent Youtu Lab")
|
37 |
-
parser.add_argument('--query_dir', '-q', type=str,
|
38 |
-
|
39 |
-
parser.add_argument('--
|
40 |
-
|
41 |
-
parser.add_argument('--
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
args = parser.parse_args()
|
46 |
|
47 |
def readImageFromDirectory(img_dir, w=128, h=256):
|
@@ -78,8 +83,11 @@ def visualize(results, query_dir, gallery_dir, output_size=(128, 384)):
|
|
78 |
return results_vis
|
79 |
|
80 |
if __name__ == '__main__':
|
|
|
|
|
|
|
81 |
# Instantiate YoutuReID for person ReID
|
82 |
-
net = YoutuReID(modelPath=args.model, backendId=
|
83 |
|
84 |
# Read images from dir
|
85 |
query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
|
|
|
12 |
|
13 |
from youtureid import YoutuReID
|
14 |
|
15 |
+
# Check OpenCV version
|
16 |
+
assert cv.__version__ >= "4.7.0", \
|
17 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
18 |
+
|
19 |
+
# Valid combinations of backends and targets
|
20 |
+
backend_target_pairs = [
|
21 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
22 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
23 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
24 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
25 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
26 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
parser = argparse.ArgumentParser(
|
29 |
description="ReID baseline models from Tencent Youtu Lab")
|
30 |
+
parser.add_argument('--query_dir', '-q', type=str,
|
31 |
+
help='Query directory.')
|
32 |
+
parser.add_argument('--gallery_dir', '-g', type=str,
|
33 |
+
help='Gallery directory.')
|
34 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
35 |
+
help='''Choose one of the backend-target pair to run this demo:
|
36 |
+
{:d}: (default) OpenCV implementation + CPU,
|
37 |
+
{:d}: CUDA + GPU (CUDA),
|
38 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
39 |
+
{:d}: TIM-VX + NPU,
|
40 |
+
{:d}: CANN + NPU
|
41 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
42 |
+
parser.add_argument('--topk', type=int, default=10,
|
43 |
+
help='Top-K closest from gallery for each query.')
|
44 |
+
parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx',
|
45 |
+
help='Path to the model.')
|
46 |
+
parser.add_argument('--save', '-s', type=str2bool, default=False,
|
47 |
+
help='Set true to save results. This flag is invalid when using camera.')
|
48 |
+
parser.add_argument('--vis', '-v', type=str2bool, default=True,
|
49 |
+
help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
50 |
args = parser.parse_args()
|
51 |
|
52 |
def readImageFromDirectory(img_dir, w=128, h=256):
|
|
|
83 |
return results_vis
|
84 |
|
85 |
if __name__ == '__main__':
|
86 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
87 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
88 |
+
|
89 |
# Instantiate YoutuReID for person ReID
|
90 |
+
net = YoutuReID(modelPath=args.model, backendId=backend_id, targetId=target_id)
|
91 |
|
92 |
# Read images from dir
|
93 |
query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
|
models/person_reid_youtureid/youtureid.py
CHANGED
@@ -26,12 +26,10 @@ class YoutuReID:
|
|
26 |
def name(self):
|
27 |
return self.__class__.__name__
|
28 |
|
29 |
-
def
|
30 |
-
self._backendId =
|
|
|
31 |
self._model.setPreferableBackend(self._backendId)
|
32 |
-
|
33 |
-
def setTarget(self, target_id):
|
34 |
-
self._targetId = target_id
|
35 |
self._model.setPreferableTarget(self._targetId)
|
36 |
|
37 |
def _preprocess(self, image):
|
@@ -67,4 +65,3 @@ class YoutuReID:
|
|
67 |
dist = np.matmul(query_arr, gallery_arr.T)
|
68 |
idx = np.argsort(-dist, axis=1)
|
69 |
return [i[0:topK] for i in idx]
|
70 |
-
|
|
|
26 |
def name(self):
|
27 |
return self.__class__.__name__
|
28 |
|
29 |
+
def setBackendAndTarget(self, backendId, targetId):
|
30 |
+
self._backendId = backendId
|
31 |
+
self._targetId = targetId
|
32 |
self._model.setPreferableBackend(self._backendId)
|
|
|
|
|
|
|
33 |
self._model.setPreferableTarget(self._targetId)
|
34 |
|
35 |
def _preprocess(self, image):
|
|
|
65 |
dist = np.matmul(query_arr, gallery_arr.T)
|
66 |
idx = np.argsort(-dist, axis=1)
|
67 |
return [i[0:topK] for i in idx]
|
|
models/qrcode_wechatqrcode/demo.py
CHANGED
@@ -11,23 +11,43 @@ import cv2 as cv
|
|
11 |
|
12 |
from wechatqrcode import WeChatQRCode
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
parser = argparse.ArgumentParser(
|
23 |
description="WeChat QR code detector for detecting and parsing QR code (https://github.com/opencv/opencv_contrib/tree/master/modules/wechat_qrcode)")
|
24 |
-
parser.add_argument('--input', '-i', type=str,
|
25 |
-
|
26 |
-
parser.add_argument('--
|
27 |
-
|
28 |
-
parser.add_argument('--
|
29 |
-
|
30 |
-
parser.add_argument('--
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
args = parser.parse_args()
|
32 |
|
33 |
def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255, 0), fps=None):
|
@@ -56,11 +76,16 @@ def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255,
|
|
56 |
|
57 |
|
58 |
if __name__ == '__main__':
|
|
|
|
|
|
|
59 |
# Instantiate WeChatQRCode
|
60 |
model = WeChatQRCode(args.detect_prototxt_path,
|
61 |
args.detect_model_path,
|
62 |
args.sr_prototxt_path,
|
63 |
-
args.sr_model_path
|
|
|
|
|
64 |
|
65 |
# If input is an image:
|
66 |
if args.input is not None:
|
@@ -107,4 +132,4 @@ if __name__ == '__main__':
|
|
107 |
# Visualize results in a new window
|
108 |
cv.imshow('WeChatQRCode Demo', frame)
|
109 |
|
110 |
-
tm.reset()
|
|
|
11 |
|
12 |
from wechatqrcode import WeChatQRCode
|
13 |
|
14 |
+
# Check OpenCV version
|
15 |
+
assert cv.__version__ >= "4.7.0", \
|
16 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
17 |
+
|
18 |
+
# Valid combinations of backends and targets
|
19 |
+
backend_target_pairs = [
|
20 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
21 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
22 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
23 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
24 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
25 |
+
]
|
26 |
|
27 |
parser = argparse.ArgumentParser(
|
28 |
description="WeChat QR code detector for detecting and parsing QR code (https://github.com/opencv/opencv_contrib/tree/master/modules/wechat_qrcode)")
|
29 |
+
parser.add_argument('--input', '-i', type=str,
|
30 |
+
help='Usage: Set path to the input image. Omit for using default camera.')
|
31 |
+
parser.add_argument('--detect_prototxt_path', type=str, default='detect_2021sep.prototxt',
|
32 |
+
help='Usage: Set path to detect.prototxt.')
|
33 |
+
parser.add_argument('--detect_model_path', type=str, default='detect_2021sep.caffemodel',
|
34 |
+
help='Usage: Set path to detect.caffemodel.')
|
35 |
+
parser.add_argument('--sr_prototxt_path', type=str, default='sr_2021sep.prototxt',
|
36 |
+
help='Usage: Set path to sr.prototxt.')
|
37 |
+
parser.add_argument('--sr_model_path', type=str, default='sr_2021sep.caffemodel',
|
38 |
+
help='Usage: Set path to sr.caffemodel.')
|
39 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
40 |
+
help='''Choose one of the backend-target pair to run this demo:
|
41 |
+
{:d}: (default) OpenCV implementation + CPU,
|
42 |
+
{:d}: CUDA + GPU (CUDA),
|
43 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
44 |
+
{:d}: TIM-VX + NPU,
|
45 |
+
{:d}: CANN + NPU
|
46 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
47 |
+
parser.add_argument('--save', '-s', action='store_true',
|
48 |
+
help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
|
49 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
50 |
+
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
|
51 |
args = parser.parse_args()
|
52 |
|
53 |
def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255, 0), fps=None):
|
|
|
76 |
|
77 |
|
78 |
if __name__ == '__main__':
|
79 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
80 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
81 |
+
|
82 |
# Instantiate WeChatQRCode
|
83 |
model = WeChatQRCode(args.detect_prototxt_path,
|
84 |
args.detect_model_path,
|
85 |
args.sr_prototxt_path,
|
86 |
+
args.sr_model_path,
|
87 |
+
backendId=backend_id,
|
88 |
+
targetId=target_id)
|
89 |
|
90 |
# If input is an image:
|
91 |
if args.input is not None:
|
|
|
132 |
# Visualize results in a new window
|
133 |
cv.imshow('WeChatQRCode Demo', frame)
|
134 |
|
135 |
+
tm.reset()
|
models/qrcode_wechatqrcode/wechatqrcode.py
CHANGED
@@ -8,27 +8,27 @@ import numpy as np
|
|
8 |
import cv2 as cv # needs to have cv.wechat_qrcode_WeChatQRCode, which requires compile from source with opencv_contrib/modules/wechat_qrcode
|
9 |
|
10 |
class WeChatQRCode:
|
11 |
-
def __init__(self, detect_prototxt_path, detect_model_path, sr_prototxt_path, sr_model_path):
|
12 |
self._model = cv.wechat_qrcode_WeChatQRCode(
|
13 |
detect_prototxt_path,
|
14 |
detect_model_path,
|
15 |
sr_prototxt_path,
|
16 |
sr_model_path
|
17 |
)
|
|
|
|
|
|
|
|
|
18 |
|
19 |
@property
|
20 |
def name(self):
|
21 |
return self.__class__.__name__
|
22 |
|
23 |
-
def
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
def setTarget(self, target_id):
|
29 |
-
# self._model.setPreferableTarget(target_id)
|
30 |
-
if target_id != 0:
|
31 |
raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
|
32 |
|
33 |
def infer(self, image):
|
34 |
-
return self._model.detectAndDecode(image)
|
|
|
8 |
import cv2 as cv # needs to have cv.wechat_qrcode_WeChatQRCode, which requires compile from source with opencv_contrib/modules/wechat_qrcode
|
9 |
|
10 |
class WeChatQRCode:
|
11 |
+
def __init__(self, detect_prototxt_path, detect_model_path, sr_prototxt_path, sr_model_path, backendId=0, targetId=0):
|
12 |
self._model = cv.wechat_qrcode_WeChatQRCode(
|
13 |
detect_prototxt_path,
|
14 |
detect_model_path,
|
15 |
sr_prototxt_path,
|
16 |
sr_model_path
|
17 |
)
|
18 |
+
if backendId != 0:
|
19 |
+
raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()".format(backendId))
|
20 |
+
if targetId != 0:
|
21 |
+
raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
|
22 |
|
23 |
@property
|
24 |
def name(self):
|
25 |
return self.__class__.__name__
|
26 |
|
27 |
+
def setBackendAndTarget(self, backendId, targetId):
|
28 |
+
if backendId != 0:
|
29 |
+
raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()".format(backendId))
|
30 |
+
if targetId != 0:
|
|
|
|
|
|
|
|
|
31 |
raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()")
|
32 |
|
33 |
def infer(self, image):
|
34 |
+
return self._model.detectAndDecode(image)
|
models/text_detection_db/db.py
CHANGED
@@ -38,12 +38,10 @@ class DB:
|
|
38 |
def name(self):
|
39 |
return self.__class__.__name__
|
40 |
|
41 |
-
def
|
42 |
-
self._backendId =
|
|
|
43 |
self._model.setPreferableBackend(self._backendId)
|
44 |
-
|
45 |
-
def setTarget(self, target):
|
46 |
-
self._targetId = target
|
47 |
self._model.setPreferableTarget(self._targetId)
|
48 |
|
49 |
def setInputSize(self, input_size):
|
@@ -55,4 +53,3 @@ class DB:
|
|
55 |
assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
|
56 |
|
57 |
return self._model.detect(image)
|
58 |
-
|
|
|
38 |
def name(self):
|
39 |
return self.__class__.__name__
|
40 |
|
41 |
+
def setBackendAndTarget(self, backendId, targetId):
|
42 |
+
self._backendId = backendId
|
43 |
+
self._targetId = targetId
|
44 |
self._model.setPreferableBackend(self._backendId)
|
|
|
|
|
|
|
45 |
self._model.setPreferableTarget(self._targetId)
|
46 |
|
47 |
def setInputSize(self, input_size):
|
|
|
53 |
assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
|
54 |
|
55 |
return self._model.detect(image)
|
|
models/text_detection_db/demo.py
CHANGED
@@ -11,41 +11,48 @@ import cv2 as cv
|
|
11 |
|
12 |
from db import DB
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
try:
|
27 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
28 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
29 |
-
help_msg_backends += "; {:d}: TIMVX"
|
30 |
-
help_msg_targets += "; {:d}: NPU"
|
31 |
-
except:
|
32 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
33 |
|
34 |
parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
|
35 |
-
parser.add_argument('--input', '-i', type=str,
|
36 |
-
|
37 |
-
parser.add_argument('--
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
parser.add_argument('--width', type=int, default=736,
|
40 |
help='Usage: Resize input image to certain width, default = 736. It should be multiple by 32.')
|
41 |
parser.add_argument('--height', type=int, default=736,
|
42 |
help='Usage: Resize input image to certain height, default = 736. It should be multiple by 32.')
|
43 |
-
parser.add_argument('--binary_threshold', type=float, default=0.3,
|
44 |
-
|
45 |
-
parser.add_argument('--
|
46 |
-
|
47 |
-
parser.add_argument('--
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
args = parser.parse_args()
|
50 |
|
51 |
def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None):
|
@@ -60,6 +67,9 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isC
|
|
60 |
return output
|
61 |
|
62 |
if __name__ == '__main__':
|
|
|
|
|
|
|
63 |
# Instantiate DB
|
64 |
model = DB(modelPath=args.model,
|
65 |
inputSize=[args.width, args.height],
|
@@ -67,9 +77,8 @@ if __name__ == '__main__':
|
|
67 |
polygonThreshold=args.polygon_threshold,
|
68 |
maxCandidates=args.max_candidates,
|
69 |
unclipRatio=args.unclip_ratio,
|
70 |
-
backendId=
|
71 |
-
targetId=
|
72 |
-
)
|
73 |
|
74 |
# If input is an image
|
75 |
if args.input is not None:
|
@@ -143,4 +152,3 @@ if __name__ == '__main__':
|
|
143 |
cv.imshow('{} Demo'.format(model.name), original_image)
|
144 |
|
145 |
tm.reset()
|
146 |
-
|
|
|
11 |
|
12 |
from db import DB
|
13 |
|
14 |
+
# Check OpenCV version
|
15 |
+
assert cv.__version__ >= "4.7.0", \
|
16 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
17 |
+
|
18 |
+
# Valid combinations of backends and targets
|
19 |
+
backend_target_pairs = [
|
20 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
21 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
22 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
23 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
24 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
25 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
|
28 |
+
parser.add_argument('--input', '-i', type=str,
|
29 |
+
help='Usage: Set path to the input image. Omit for using default camera.')
|
30 |
+
parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx',
|
31 |
+
help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.')
|
32 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
33 |
+
help='''Choose one of the backend-target pair to run this demo:
|
34 |
+
{:d}: (default) OpenCV implementation + CPU,
|
35 |
+
{:d}: CUDA + GPU (CUDA),
|
36 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
37 |
+
{:d}: TIM-VX + NPU,
|
38 |
+
{:d}: CANN + NPU
|
39 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
40 |
parser.add_argument('--width', type=int, default=736,
|
41 |
help='Usage: Resize input image to certain width, default = 736. It should be multiple by 32.')
|
42 |
parser.add_argument('--height', type=int, default=736,
|
43 |
help='Usage: Resize input image to certain height, default = 736. It should be multiple by 32.')
|
44 |
+
parser.add_argument('--binary_threshold', type=float, default=0.3,
|
45 |
+
help='Usage: Threshold of the binary map, default = 0.3.')
|
46 |
+
parser.add_argument('--polygon_threshold', type=float, default=0.5,
|
47 |
+
help='Usage: Threshold of polygons, default = 0.5.')
|
48 |
+
parser.add_argument('--max_candidates', type=int, default=200,
|
49 |
+
help='Usage: Set maximum number of polygon candidates, default = 200.')
|
50 |
+
parser.add_argument('--unclip_ratio', type=np.float64, default=2.0,
|
51 |
+
help=' Usage: The unclip ratio of the detected text region, which determines the output size, default = 2.0.')
|
52 |
+
parser.add_argument('--save', '-s', action='store_true',
|
53 |
+
help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
|
54 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
55 |
+
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
|
56 |
args = parser.parse_args()
|
57 |
|
58 |
def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None):
|
|
|
67 |
return output
|
68 |
|
69 |
if __name__ == '__main__':
|
70 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
71 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
72 |
+
|
73 |
# Instantiate DB
|
74 |
model = DB(modelPath=args.model,
|
75 |
inputSize=[args.width, args.height],
|
|
|
77 |
polygonThreshold=args.polygon_threshold,
|
78 |
maxCandidates=args.max_candidates,
|
79 |
unclipRatio=args.unclip_ratio,
|
80 |
+
backendId=backend_id,
|
81 |
+
targetId=target_id)
|
|
|
82 |
|
83 |
# If input is an image
|
84 |
if args.input is not None:
|
|
|
152 |
cv.imshow('{} Demo'.format(model.name), original_image)
|
153 |
|
154 |
tm.reset()
|
|
models/text_recognition_crnn/crnn.py
CHANGED
@@ -43,12 +43,10 @@ class CRNN:
|
|
43 |
def _load_charset(self, charset):
|
44 |
return ''.join(charset.splitlines())
|
45 |
|
46 |
-
def
|
47 |
-
self._backendId =
|
|
|
48 |
self._model.setPreferableBackend(self._backendId)
|
49 |
-
|
50 |
-
def setTarget(self, target_id):
|
51 |
-
self._targetId = target_id
|
52 |
self._model.setPreferableTarget(self._targetId)
|
53 |
|
54 |
def _preprocess(self, image, rbbox):
|
|
|
43 |
def _load_charset(self, charset):
|
44 |
return ''.join(charset.splitlines())
|
45 |
|
46 |
+
def setBackendAndTarget(self, backendId, targetId):
|
47 |
+
self._backendId = backendId
|
48 |
+
self._targetId = targetId
|
49 |
self._model.setPreferableBackend(self._backendId)
|
|
|
|
|
|
|
50 |
self._model.setPreferableTarget(self._targetId)
|
51 |
|
52 |
def _preprocess(self, image, rbbox):
|
models/text_recognition_crnn/demo.py
CHANGED
@@ -15,38 +15,41 @@ from crnn import CRNN
|
|
15 |
sys.path.append('../text_detection_db')
|
16 |
from db import DB
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
try:
|
31 |
-
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
32 |
-
targets += [cv.dnn.DNN_TARGET_NPU]
|
33 |
-
help_msg_backends += "; {:d}: TIMVX"
|
34 |
-
help_msg_targets += "; {:d}: NPU"
|
35 |
-
except:
|
36 |
-
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
|
37 |
|
38 |
parser = argparse.ArgumentParser(
|
39 |
description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
|
40 |
-
parser.add_argument('--input', '-i', type=str,
|
41 |
-
|
42 |
-
parser.add_argument('--
|
43 |
-
|
44 |
-
parser.add_argument('--
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
parser.add_argument('--width', type=int, default=736,
|
47 |
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
|
48 |
parser.add_argument('--height', type=int, default=736,
|
49 |
help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
|
|
|
|
|
|
|
|
|
50 |
args = parser.parse_args()
|
51 |
|
52 |
def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
|
@@ -59,8 +62,9 @@ def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2
|
|
59 |
return output
|
60 |
|
61 |
if __name__ == '__main__':
|
62 |
-
|
63 |
-
|
|
|
64 |
# Instantiate DB for text detection
|
65 |
detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
|
66 |
inputSize=[args.width, args.height],
|
@@ -68,9 +72,10 @@ if __name__ == '__main__':
|
|
68 |
polygonThreshold=0.5,
|
69 |
maxCandidates=200,
|
70 |
unclipRatio=2.0,
|
71 |
-
backendId=
|
72 |
-
targetId=
|
73 |
-
|
|
|
74 |
|
75 |
# If input is an image
|
76 |
if args.input is not None:
|
@@ -161,4 +166,3 @@ if __name__ == '__main__':
|
|
161 |
|
162 |
# Visualize results in a new Window
|
163 |
cv.imshow('{} Demo'.format(recognizer.name), original_image)
|
164 |
-
|
|
|
15 |
sys.path.append('../text_detection_db')
|
16 |
from db import DB
|
17 |
|
18 |
+
# Check OpenCV version
|
19 |
+
assert cv.__version__ >= "4.7.0", \
|
20 |
+
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
21 |
+
|
22 |
+
# Valid combinations of backends and targets
|
23 |
+
backend_target_pairs = [
|
24 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
25 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
26 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
27 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
28 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
29 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
parser = argparse.ArgumentParser(
|
32 |
description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
|
33 |
+
parser.add_argument('--input', '-i', type=str,
|
34 |
+
help='Usage: Set path to the input image. Omit for using default camera.')
|
35 |
+
parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx',
|
36 |
+
help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.')
|
37 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
38 |
+
help='''Choose one of the backend-target pair to run this demo:
|
39 |
+
{:d}: (default) OpenCV implementation + CPU,
|
40 |
+
{:d}: CUDA + GPU (CUDA),
|
41 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
42 |
+
{:d}: TIM-VX + NPU,
|
43 |
+
{:d}: CANN + NPU
|
44 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
45 |
parser.add_argument('--width', type=int, default=736,
|
46 |
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
|
47 |
parser.add_argument('--height', type=int, default=736,
|
48 |
help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
|
49 |
+
parser.add_argument('--save', '-s', action='store_true',
|
50 |
+
help='Usage: Specify to save a file with results. Invalid in case of camera input.')
|
51 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
52 |
+
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
|
53 |
args = parser.parse_args()
|
54 |
|
55 |
def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
|
|
|
62 |
return output
|
63 |
|
64 |
if __name__ == '__main__':
|
65 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
66 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
67 |
+
|
68 |
# Instantiate DB for text detection
|
69 |
detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
|
70 |
inputSize=[args.width, args.height],
|
|
|
72 |
polygonThreshold=0.5,
|
73 |
maxCandidates=200,
|
74 |
unclipRatio=2.0,
|
75 |
+
backendId=backend_id,
|
76 |
+
targetId=target_id)
|
77 |
+
# Instantiate CRNN for text recognition
|
78 |
+
recognizer = CRNN(modelPath=args.model, backendId=backend_id, targetId=target_id)
|
79 |
|
80 |
# If input is an image
|
81 |
if args.input is not None:
|
|
|
166 |
|
167 |
# Visualize results in a new Window
|
168 |
cv.imshow('{} Demo'.format(recognizer.name), original_image)
|
|