ONNX
ytfeng commited on
Commit
3a3d327
·
1 Parent(s): 1867cdd

Limit combinations of backends and targets in demos and benchmark (#145)

Browse files

* limit backend and target combination in demos and benchmark

* simpler version checking

Files changed (2) hide show
  1. demo.py +58 -50
  2. yolox.py +3 -5
demo.py CHANGED
@@ -1,29 +1,21 @@
1
  import numpy as np
2
- import cv2
3
  import argparse
4
 
5
  from yolox import YoloX
6
 
7
- def str2bool(v):
8
- if v.lower() in ['on', 'yes', 'true', 'y', 't']:
9
- return True
10
- elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
11
- return False
12
- else:
13
- raise NotImplementedError
14
-
15
- backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA]
16
- targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16]
17
- help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
18
- help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
19
 
20
- try:
21
- backends += [cv2.dnn.DNN_BACKEND_TIMVX]
22
- targets += [cv2.dnn.DNN_TARGET_NPU]
23
- help_msg_backends += "; {:d}: TIMVX"
24
- help_msg_targets += "; {:d}: NPU"
25
- except:
26
- print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
 
27
 
28
  classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
29
  'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
@@ -43,8 +35,8 @@ classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
43
  def letterbox(srcimg, target_size=(640, 640)):
44
  padded_img = np.ones((target_size[0], target_size[1], 3)) * 114.0
45
  ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
46
- resized_img = cv2.resize(
47
- srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv2.INTER_LINEAR
48
  ).astype(np.float32)
49
  padded_img[: int(srcimg.shape[0] * ratio), : int(srcimg.shape[1] * ratio)] = resized_img
50
 
@@ -58,7 +50,7 @@ def vis(dets, srcimg, letterbox_scale, fps=None):
58
 
59
  if fps is not None:
60
  fps_label = "FPS: %.2f" % fps
61
- cv2.putText(res_img, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
62
 
63
  for det in dets:
64
  box = unletterbox(det[:4], letterbox_scale).astype(np.int32)
@@ -68,39 +60,55 @@ def vis(dets, srcimg, letterbox_scale, fps=None):
68
  x0, y0, x1, y1 = box
69
 
70
  text = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
71
- font = cv2.FONT_HERSHEY_SIMPLEX
72
- txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
73
- cv2.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), 2)
74
- cv2.rectangle(res_img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1)
75
- cv2.putText(res_img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 0, 0), thickness=1)
76
 
77
  return res_img
78
 
79
  if __name__=='__main__':
80
  parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
81
- parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
82
- parser.add_argument('--model', '-m', type=str, default='object_detection_yolox_2022nov.onnx', help="Path to the model")
83
- parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
84
- parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
85
- parser.add_argument('--confidence', default=0.5, type=float, help='Class confidence')
86
- parser.add_argument('--nms', default=0.5, type=float, help='Enter nms IOU threshold')
87
- parser.add_argument('--obj', default=0.5, type=float, help='Enter object threshold')
88
- parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
89
- parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  args = parser.parse_args()
91
 
 
 
 
92
  model_net = YoloX(modelPath= args.model,
93
  confThreshold=args.confidence,
94
  nmsThreshold=args.nms,
95
  objThreshold=args.obj,
96
- backendId=args.backend,
97
- targetId=args.target)
98
 
99
- tm = cv2.TickMeter()
100
  tm.reset()
101
  if args.input is not None:
102
- image = cv2.imread(args.input)
103
- input_blob = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
104
  input_blob, letterbox_scale = letterbox(input_blob)
105
 
106
  # Inference
@@ -113,25 +121,25 @@ if __name__=='__main__':
113
 
114
  if args.save:
115
  print('Resutls saved to result.jpg\n')
116
- cv2.imwrite('result.jpg', img)
117
 
118
  if args.vis:
119
- cv2.namedWindow(args.input, cv2.WINDOW_AUTOSIZE)
120
- cv2.imshow(args.input, img)
121
- cv2.waitKey(0)
122
 
123
  else:
124
  print("Press any key to stop video capture")
125
  deviceId = 0
126
- cap = cv2.VideoCapture(deviceId)
127
 
128
- while cv2.waitKey(1) < 0:
129
  hasFrame, frame = cap.read()
130
  if not hasFrame:
131
  print('No frames grabbed!')
132
  break
133
 
134
- input_blob = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
135
  input_blob, letterbox_scale = letterbox(input_blob)
136
 
137
  # Inference
@@ -141,6 +149,6 @@ if __name__=='__main__':
141
 
142
  img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
143
 
144
- cv2.imshow("YoloX Demo", img)
145
 
146
  tm.reset()
 
1
  import numpy as np
2
+ import cv2 as cv
3
  import argparse
4
 
5
  from yolox import YoloX
6
 
7
+ # Check OpenCV version
8
+ assert cv.__version__ >= "4.7.0", \
9
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
 
 
 
 
 
 
 
 
 
10
 
11
+ # Valid combinations of backends and targets
12
+ backend_target_pairs = [
13
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
14
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
15
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
16
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
17
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
18
+ ]
19
 
20
  classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
21
  'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
 
35
  def letterbox(srcimg, target_size=(640, 640)):
36
  padded_img = np.ones((target_size[0], target_size[1], 3)) * 114.0
37
  ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
38
+ resized_img = cv.resize(
39
+ srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv.INTER_LINEAR
40
  ).astype(np.float32)
41
  padded_img[: int(srcimg.shape[0] * ratio), : int(srcimg.shape[1] * ratio)] = resized_img
42
 
 
50
 
51
  if fps is not None:
52
  fps_label = "FPS: %.2f" % fps
53
+ cv.putText(res_img, fps_label, (10, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
54
 
55
  for det in dets:
56
  box = unletterbox(det[:4], letterbox_scale).astype(np.int32)
 
60
  x0, y0, x1, y1 = box
61
 
62
  text = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
63
+ font = cv.FONT_HERSHEY_SIMPLEX
64
+ txt_size = cv.getTextSize(text, font, 0.4, 1)[0]
65
+ cv.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), 2)
66
+ cv.rectangle(res_img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1)
67
+ cv.putText(res_img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 0, 0), thickness=1)
68
 
69
  return res_img
70
 
71
  if __name__=='__main__':
72
  parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022')
73
+ parser.add_argument('--input', '-i', type=str,
74
+ help='Path to the input image. Omit for using default camera.')
75
+ parser.add_argument('--model', '-m', type=str, default='object_detection_yolox_2022nov.onnx',
76
+ help="Path to the model")
77
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
78
+ help='''Choose one of the backend-target pair to run this demo:
79
+ {:d}: (default) OpenCV implementation + CPU,
80
+ {:d}: CUDA + GPU (CUDA),
81
+ {:d}: CUDA + GPU (CUDA FP16),
82
+ {:d}: TIM-VX + NPU,
83
+ {:d}: CANN + NPU
84
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
85
+ parser.add_argument('--confidence', default=0.5, type=float,
86
+ help='Class confidence')
87
+ parser.add_argument('--nms', default=0.5, type=float,
88
+ help='Enter nms IOU threshold')
89
+ parser.add_argument('--obj', default=0.5, type=float,
90
+ help='Enter object threshold')
91
+ parser.add_argument('--save', '-s', action='store_true',
92
+ help='Specify to save results. This flag is invalid when using camera.')
93
+ parser.add_argument('--vis', '-v', action='store_true',
94
+ help='Specify to open a window for result visualization. This flag is invalid when using camera.')
95
  args = parser.parse_args()
96
 
97
+ backend_id = backend_target_pairs[args.backend_target][0]
98
+ target_id = backend_target_pairs[args.backend_target][1]
99
+
100
  model_net = YoloX(modelPath= args.model,
101
  confThreshold=args.confidence,
102
  nmsThreshold=args.nms,
103
  objThreshold=args.obj,
104
+ backendId=backend_id,
105
+ targetId=target_id)
106
 
107
+ tm = cv.TickMeter()
108
  tm.reset()
109
  if args.input is not None:
110
+ image = cv.imread(args.input)
111
+ input_blob = cv.cvtColor(image, cv.COLOR_BGR2RGB)
112
  input_blob, letterbox_scale = letterbox(input_blob)
113
 
114
  # Inference
 
121
 
122
  if args.save:
123
  print('Resutls saved to result.jpg\n')
124
+ cv.imwrite('result.jpg', img)
125
 
126
  if args.vis:
127
+ cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
128
+ cv.imshow(args.input, img)
129
+ cv.waitKey(0)
130
 
131
  else:
132
  print("Press any key to stop video capture")
133
  deviceId = 0
134
+ cap = cv.VideoCapture(deviceId)
135
 
136
+ while cv.waitKey(1) < 0:
137
  hasFrame, frame = cap.read()
138
  if not hasFrame:
139
  print('No frames grabbed!')
140
  break
141
 
142
+ input_blob = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
143
  input_blob, letterbox_scale = letterbox(input_blob)
144
 
145
  # Inference
 
149
 
150
  img = vis(preds, frame, letterbox_scale, fps=tm.getFPS())
151
 
152
+ cv.imshow("YoloX Demo", img)
153
 
154
  tm.reset()
yolox.py CHANGED
@@ -23,12 +23,10 @@ class YoloX:
23
  def name(self):
24
  return self.__class__.__name__
25
 
26
- def setBackend(self, backendId):
27
- self.backendId = backendId
 
28
  self.net.setPreferableBackend(self.backendId)
29
-
30
- def setTarget(self, targetId):
31
- self.targetId = targetId
32
  self.net.setPreferableTarget(self.targetId)
33
 
34
  def preprocess(self, img):
 
23
  def name(self):
24
  return self.__class__.__name__
25
 
26
+ def setBackendAndTarget(self, backendId, targetId):
27
+ self._backendId = backendId
28
+ self._targetId = targetId
29
  self.net.setPreferableBackend(self.backendId)
 
 
 
30
  self.net.setPreferableTarget(self.targetId)
31
 
32
  def preprocess(self, img):