Ryan Lee commited on
Commit
ea4ac47
·
1 Parent(s): 44fe25c

C++ Demo - Object Detection (NanoDet) (#232)

Browse files

* Functional and Refactored demo.cpp for MobileNet

* Fix inference timer text, added timer reset.

* Updated README.md, remove FPS text for single image processing

* Add matching saved image message

* Removing inference time printout for video inputs

* Update FPS text to 2 decimal places

* Update coding style for braces

* Address PR comments. Adjusted to C++11 standard.

* Addressed PR comments. Added C++11 cmake configuration, extracted classID and confidence function, and use NMSBoxesBatched now.

models/object_detection_nanodet/CMakeLists.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.24)
2
+ set(project_name "opencv_zoo_object_detection_nanodet")
3
+
4
+ PROJECT (${project_name})
5
+
6
+ set(OPENCV_VERSION "4.9.0")
7
+ set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
8
+ find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
9
+ # Find OpenCV, you may need to set OpenCV_DIR variable
10
+ # to the absolute path to the directory containing OpenCVConfig.cmake file
11
+ # via the command line or GUI
12
+
13
+ file(GLOB SourceFile
14
+ "demo.cpp")
15
+ # If the package has been found, several variables will
16
+ # be set, you can find the full list with descriptions
17
+ # in the OpenCVConfig.cmake file.
18
+ # Print some message showing some of them
19
+ message(STATUS "OpenCV library status:")
20
+ message(STATUS " config: ${OpenCV_DIR}")
21
+ message(STATUS " version: ${OpenCV_VERSION}")
22
+ message(STATUS " libraries: ${OpenCV_LIBS}")
23
+ message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
24
+
25
+ # Declare the executable target built from your sources
26
+ add_executable(${project_name} ${SourceFile})
27
+
28
+ # Set C++ compilation standard to C++11
29
+ set(CMAKE_CXX_STANDARD 11)
30
+
31
+ # Link your application with OpenCV libraries
32
+ target_link_libraries(${project_name} PRIVATE ${OpenCV_LIBS})
models/object_detection_nanodet/README.md CHANGED
@@ -7,6 +7,8 @@ Note:
7
 
8
  ## Demo
9
 
 
 
10
  Run the following command to try the demo:
11
  ```shell
12
  # detect on camera input
@@ -17,6 +19,23 @@ python demo.py --input /path/to/image -v
17
  Note:
18
  - image result saved as "result.jpg"
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  ## Results
22
 
 
7
 
8
  ## Demo
9
 
10
+ ### Python
11
+
12
  Run the following command to try the demo:
13
  ```shell
14
  # detect on camera input
 
19
  Note:
20
  - image result saved as "result.jpg"
21
 
22
+ ### C++
23
+
24
+ Install latest OpenCV and CMake >= 3.24.0 to get started with:
25
+
26
+ ```shell
27
+ # A typical and default installation path of OpenCV is /usr/local
28
+ cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
29
+ cmake --build build
30
+
31
+ # detect on camera input
32
+ ./build/opencv_zoo_object_detection_nanodet
33
+ # detect on an image
34
+ ./build/opencv_zoo_object_detection_nanodet -i=/path/to/image
35
+ # get help messages
36
+ ./build/opencv_zoo_object_detection_nanodet -h
37
+ ```
38
+
39
 
40
  ## Results
41
 
models/object_detection_nanodet/demo.cpp ADDED
@@ -0,0 +1,508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <vector>
2
+ #include <string>
3
+ #include <iostream>
4
+
5
+ #include <opencv2/opencv.hpp>
6
+
7
+ using namespace std;
8
+ using namespace cv;
9
+ using namespace dnn;
10
+
11
+ const auto backendTargetPairs = vector<pair<Backend, Target>>
12
+ {
13
+ {DNN_BACKEND_OPENCV, DNN_TARGET_CPU},
14
+ {DNN_BACKEND_CUDA, DNN_TARGET_CUDA},
15
+ {DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16},
16
+ {DNN_BACKEND_TIMVX, DNN_TARGET_NPU},
17
+ {DNN_BACKEND_CANN, DNN_TARGET_NPU}
18
+ };
19
+
20
+ const vector<string> nanodetClassLabels =
21
+ {
22
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus",
23
+ "train", "truck", "boat", "traffic light", "fire hydrant",
24
+ "stop sign", "parking meter", "bench", "bird", "cat", "dog",
25
+ "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
26
+ "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
27
+ "skis", "snowboard", "sports ball", "kite", "baseball bat",
28
+ "baseball glove", "skateboard", "surfboard", "tennis racket",
29
+ "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
30
+ "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
31
+ "hot dog", "pizza", "donut", "cake", "chair", "couch",
32
+ "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
33
+ "mouse", "remote", "keyboard", "cell phone", "microwave",
34
+ "oven", "toaster", "sink", "refrigerator", "book", "clock",
35
+ "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
36
+ };
37
+
38
+ class NanoDet
39
+ {
40
+ public:
41
+ NanoDet(const String& modelPath, const float probThresh = 0.35, const float iouThresh = 0.6,
42
+ const Backend bId = DNN_BACKEND_DEFAULT, const Target tId = DNN_TARGET_CPU) :
43
+ modelPath(modelPath), probThreshold(probThresh),
44
+ iouThreshold(iouThresh), backendId(bId), targetId(tId),
45
+ imageShape(416, 416), regMax(7)
46
+ {
47
+ this->strides = { 8, 16, 32, 64 };
48
+ this->net = readNet(modelPath);
49
+ setBackendAndTarget(bId, tId);
50
+ this->project = Mat::zeros(1, this->regMax + 1, CV_32F);
51
+ for (size_t i = 0; i <= this->regMax; ++i)
52
+ {
53
+ this->project.at<float>(0, i) = static_cast<float>(i);
54
+ }
55
+ this->mean = Scalar(103.53, 116.28, 123.675);
56
+ this->std = Scalar(1.0 / 57.375, 1.0 / 57.12, 1.0 / 58.395);
57
+ this->generateAnchors();
58
+ }
59
+
60
+ void setBackendAndTarget(Backend bId, Target tId)
61
+ {
62
+ this->net.setPreferableBackend(bId);
63
+ this->net.setPreferableTarget(tId);
64
+ }
65
+
66
+ Mat preProcess(const Mat& inputImage)
67
+ {
68
+ Image2BlobParams paramNanodet;
69
+ paramNanodet.datalayout = DNN_LAYOUT_NCHW;
70
+ paramNanodet.ddepth = CV_32F;
71
+ paramNanodet.mean = this->mean;
72
+ paramNanodet.scalefactor = this->std;
73
+ paramNanodet.size = this->imageShape;
74
+ Mat blob;
75
+ blobFromImageWithParams(inputImage, blob, paramNanodet);
76
+ return blob;
77
+ }
78
+
79
+ Mat infer(const Mat& sourceImage)
80
+ {
81
+ Mat blob = this->preProcess(sourceImage);
82
+ this->net.setInput(blob);
83
+ vector<Mat> modelOutput;
84
+ this->net.forward(modelOutput, this->net.getUnconnectedOutLayersNames());
85
+ Mat preds = this->postProcess(modelOutput);
86
+ return preds;
87
+ }
88
+
89
+ Mat reshapeIfNeeded(const Mat& input)
90
+ {
91
+ if (input.dims == 3)
92
+ {
93
+ return input.reshape(0, input.size[1]);
94
+ }
95
+ return input;
96
+ }
97
+
98
+ Mat softmaxActivation(const Mat& input)
99
+ {
100
+ Mat x_exp, x_sum, x_repeat_sum, result;
101
+ exp(input.reshape(0, input.total() / (this->regMax + 1)), x_exp);
102
+ reduce(x_exp, x_sum, 1, REDUCE_SUM, CV_32F);
103
+ repeat(x_sum, 1, this->regMax + 1, x_repeat_sum);
104
+ divide(x_exp, x_repeat_sum, result);
105
+ return result;
106
+ }
107
+
108
+ Mat applyProjection(Mat& input)
109
+ {
110
+ Mat repeat_project;
111
+ repeat(this->project, input.rows, 1, repeat_project);
112
+ multiply(input, repeat_project, input);
113
+ reduce(input, input, 1, REDUCE_SUM, CV_32F);
114
+ Mat projection = input.col(0).clone();
115
+ return projection.reshape(0, projection.total() / 4);
116
+ }
117
+
118
+ void preNMS(Mat& anchors, Mat& bbox_pred, Mat& cls_score, const int nms_pre = 1000)
119
+ {
120
+ Mat max_scores;
121
+ reduce(cls_score, max_scores, 1, REDUCE_MAX);
122
+
123
+ Mat indices;
124
+ sortIdx(max_scores.t(), indices, SORT_DESCENDING);
125
+
126
+ Mat indices_float = indices.colRange(0, nms_pre);
127
+ Mat selected_anchors, selected_bbox_pred, selected_cls_score;
128
+ for (int j = 0; j < indices_float.cols; ++j)
129
+ {
130
+ selected_anchors.push_back(anchors.row(indices_float.at<int>(j)));
131
+ selected_bbox_pred.push_back(bbox_pred.row(indices_float.at<int>(j)));
132
+ selected_cls_score.push_back(cls_score.row(indices_float.at<int>(j)));
133
+ }
134
+
135
+ anchors = selected_anchors;
136
+ bbox_pred = selected_bbox_pred;
137
+ cls_score = selected_cls_score;
138
+ }
139
+
140
+ void clipBoundingBoxes(Mat& x1, Mat& y1, Mat& x2, Mat& y2)
141
+ {
142
+ Mat zeros = Mat::zeros(x1.size(), x1.type());
143
+ x1 = min(max(x1, zeros), Scalar(this->imageShape.width - 1));
144
+ y1 = min(max(y1, zeros), Scalar(this->imageShape.height - 1));
145
+ x2 = min(max(x2, zeros), Scalar(this->imageShape.width - 1));
146
+ y2 = min(max(y2, zeros), Scalar(this->imageShape.height - 1));
147
+ }
148
+
149
+ Mat calculateBoundingBoxes(const Mat& anchors, const Mat& bbox_pred)
150
+ {
151
+ Mat x1 = anchors.col(0) - bbox_pred.col(0);
152
+ Mat y1 = anchors.col(1) - bbox_pred.col(1);
153
+ Mat x2 = anchors.col(0) + bbox_pred.col(2);
154
+ Mat y2 = anchors.col(1) + bbox_pred.col(3);
155
+
156
+ clipBoundingBoxes(x1, y1, x2, y2);
157
+
158
+ Mat bboxes;
159
+ hconcat(vector<Mat>{x1, y1, x2, y2}, bboxes);
160
+
161
+ return bboxes;
162
+ }
163
+
164
+ vector<Rect2d> bboxMatToRect2d(const Mat& bboxes)
165
+ {
166
+ Mat bboxes_wh(bboxes.clone());
167
+ bboxes_wh.colRange(2, 4) = bboxes_wh.colRange(2, 4) -= bboxes_wh.colRange(0, 2);
168
+ vector<Rect2d> boxesXYXY;
169
+ for (size_t i = 0; i < bboxes_wh.rows; i++)
170
+ {
171
+ boxesXYXY.emplace_back(bboxes.at<float>(i, 0),
172
+ bboxes.at<float>(i, 1),
173
+ bboxes.at<float>(i, 2),
174
+ bboxes.at<float>(i, 3));
175
+ }
176
+ return boxesXYXY;
177
+ }
178
+
179
+ Mat postProcess(const vector<Mat>& preds)
180
+ {
181
+ vector<Mat> cls_scores, bbox_preds;
182
+ for (size_t i = 0; i < preds.size(); i += 2)
183
+ {
184
+ cls_scores.push_back(preds[i]);
185
+ bbox_preds.push_back(preds[i + 1]);
186
+ }
187
+
188
+ vector<Mat> bboxes_mlvl;
189
+ vector<Mat> scores_mlvl;
190
+
191
+ for (size_t i = 0; i < strides.size(); ++i)
192
+ {
193
+ if (i >= cls_scores.size() || i >= bbox_preds.size()) continue;
194
+ // Extract necessary data
195
+ int stride = strides[i];
196
+ Mat cls_score = reshapeIfNeeded(cls_scores[i]);
197
+ Mat bbox_pred = reshapeIfNeeded(bbox_preds[i]);
198
+ Mat anchors = anchorsMlvl[i].t();
199
+
200
+ // Softmax activation, projection, and calculate bounding boxes
201
+ bbox_pred = softmaxActivation(bbox_pred);
202
+ bbox_pred = applyProjection(bbox_pred);
203
+ bbox_pred = stride * bbox_pred;
204
+
205
+ const int nms_pre = 1000;
206
+ if (nms_pre > 0 && cls_score.rows > nms_pre)
207
+ {
208
+ preNMS(anchors, bbox_pred, cls_score, nms_pre);
209
+ }
210
+
211
+ Mat bboxes = calculateBoundingBoxes(anchors, bbox_pred);
212
+
213
+
214
+ bboxes_mlvl.push_back(bboxes);
215
+ scores_mlvl.push_back(cls_score);
216
+ }
217
+ Mat bboxes;
218
+ Mat scores;
219
+ vconcat(bboxes_mlvl, bboxes);
220
+ vconcat(scores_mlvl, scores);
221
+
222
+ vector<Rect2d> boxesXYXY = bboxMatToRect2d(bboxes);
223
+ vector<int> classIds;
224
+ vector<float> confidences;
225
+ for (size_t i = 0; i < scores.rows; ++i)
226
+ {
227
+ Point maxLoc;
228
+ minMaxLoc(scores.row(i), nullptr, nullptr, nullptr, &maxLoc);
229
+ classIds.push_back(maxLoc.x);
230
+ confidences.push_back(scores.at<float>(i, maxLoc.x));
231
+ }
232
+
233
+ vector<int> indices;
234
+ NMSBoxesBatched(boxesXYXY, confidences, classIds, probThreshold, iouThreshold, indices);
235
+ Mat result(int(indices.size()), 6, CV_32FC1);
236
+ int row = 0;
237
+ for (auto idx : indices)
238
+ {
239
+ bboxes.rowRange(idx, idx + 1).copyTo(result(Rect(0, row, 4, 1)));
240
+ result.at<float>(row, 4) = confidences[idx];
241
+ result.at<float>(row, 5) = static_cast<float>(classIds[idx]);
242
+ row++;
243
+ }
244
+ if (indices.size() == 0)
245
+ {
246
+ return Mat();
247
+ }
248
+ return result;
249
+ }
250
+
251
+ void generateAnchors()
252
+ {
253
+ for (const int stride : strides) {
254
+ int feat_h = this->imageShape.height / stride;
255
+ int feat_w = this->imageShape.width / stride;
256
+
257
+ vector<Mat> anchors;
258
+
259
+ for (int y = 0; y < feat_h; ++y)
260
+ {
261
+ for (int x = 0; x < feat_w; ++x)
262
+ {
263
+ float shift_x = x * stride;
264
+ float shift_y = y * stride;
265
+ float cx = shift_x + 0.5 * (stride - 1);
266
+ float cy = shift_y + 0.5 * (stride - 1);
267
+ Mat anchor_point = (Mat_<float>(2, 1) << cx, cy);
268
+ anchors.push_back(anchor_point);
269
+ }
270
+ }
271
+ Mat anchors_mat;
272
+ hconcat(anchors, anchors_mat);
273
+ this->anchorsMlvl.push_back(anchors_mat);
274
+ }
275
+ }
276
+ private:
277
+ Net net;
278
+ String modelPath;
279
+ vector<int> strides;
280
+ Size imageShape;
281
+ int regMax;
282
+ float probThreshold;
283
+ float iouThreshold;
284
+ Backend backendId;
285
+ Target targetId;
286
+ Mat project;
287
+ Scalar mean;
288
+ Scalar std;
289
+ vector<Mat> anchorsMlvl;
290
+ };
291
+
292
+ // Function to resize and pad an image and return both the image and scale information
293
+ tuple<Mat, vector<double>> letterbox(const Mat& sourceImage, const Size& target_size = Size(416, 416))
294
+ {
295
+ Mat img = sourceImage.clone();
296
+
297
+ double top = 0, left = 0, newh = target_size.height, neww = target_size.width;
298
+
299
+ if (img.rows != img.cols)
300
+ {
301
+ double hw_scale = static_cast<double>(img.rows) / img.cols;
302
+ if (hw_scale > 1)
303
+ {
304
+ newh = target_size.height;
305
+ neww = static_cast<int>(target_size.width / hw_scale);
306
+ resize(img, img, Size(neww, newh), 0, 0, INTER_AREA);
307
+ left = static_cast<int>((target_size.width - neww) * 0.5);
308
+ copyMakeBorder(img, img, 0, 0, left, target_size.width - neww - left, BORDER_CONSTANT, Scalar(0));
309
+ }
310
+ else
311
+ {
312
+ newh = static_cast<int>(target_size.height * hw_scale);
313
+ neww = target_size.width;
314
+ resize(img, img, Size(neww, newh), 0, 0, INTER_AREA);
315
+ top = static_cast<int>((target_size.height - newh) * 0.5);
316
+ copyMakeBorder(img, img, top, target_size.height - newh - top, 0, 0, BORDER_CONSTANT, Scalar(0));
317
+ }
318
+ }
319
+ else
320
+ {
321
+ resize(img, img, target_size, 0, 0, INTER_AREA);
322
+ }
323
+ vector<double> letterbox_scale = {top, left, newh, neww};
324
+
325
+ return make_tuple(img, letterbox_scale);
326
+ }
327
+
328
+ // Function to scale bounding boxes back to original image coordinates
329
+ vector<int> unletterbox(const Mat& bbox, const Size& original_image_shape, const vector<double>& letterbox_scale)
330
+ {
331
+ vector<int> ret(bbox.cols);
332
+
333
+ int h = original_image_shape.height;
334
+ int w = original_image_shape.width;
335
+ double top = letterbox_scale[0];
336
+ double left = letterbox_scale[1];
337
+ double newh = letterbox_scale[2];
338
+ double neww = letterbox_scale[3];
339
+
340
+ if (h == w)
341
+ {
342
+ double ratio = static_cast<double>(h) / newh;
343
+ for (int& val : ret)
344
+ {
345
+ val = static_cast<int>(val * ratio);
346
+ }
347
+ return ret;
348
+ }
349
+
350
+ double ratioh = static_cast<double>(h) / newh;
351
+ double ratiow = static_cast<double>(w) / neww;
352
+ ret[0] = max(static_cast<int>((bbox.at<float>(0) - left) * ratiow), 0);
353
+ ret[1] = max(static_cast<int>((bbox.at<float>(1) - top) * ratioh), 0);
354
+ ret[2] = min(static_cast<int>((bbox.at<float>(2) - left) * ratiow), w);
355
+ ret[3] = min(static_cast<int>((bbox.at<float>(3) - top) * ratioh), h);
356
+
357
+ return ret;
358
+ }
359
+
360
+ // Function to visualize predictions on an image
361
+ Mat visualize(const Mat& preds, const Mat& result_image, const vector<double>& letterbox_scale, bool video, double fps = 0.0)
362
+ {
363
+ Mat visualized_image = result_image.clone();
364
+
365
+ // Draw FPS if provided
366
+ if (fps > 0.0 && video)
367
+ {
368
+ std::ostringstream fps_stream;
369
+ fps_stream << "FPS: " << std::fixed << std::setprecision(2) << fps;
370
+ putText(visualized_image, fps_stream.str(), Point(10, 25), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 2);
371
+ }
372
+
373
+ // Draw bounding boxes and labels for each prediction
374
+ for (size_t i = 0; i < preds.rows; i++)
375
+ {
376
+ Mat pred = preds.row(i);
377
+ Mat bbox = pred.colRange(0, 4);
378
+ double conf = pred.at<float>(4);
379
+ int classid = static_cast<int>(pred.at<float>(5));
380
+
381
+ // Convert bbox coordinates back to original image space
382
+ vector<int> unnormalized_bbox = unletterbox(bbox, visualized_image.size(), letterbox_scale);
383
+
384
+ // Draw bounding box
385
+ rectangle(visualized_image, Point(unnormalized_bbox[0], unnormalized_bbox[1]),
386
+ Point(unnormalized_bbox[2], unnormalized_bbox[3]), Scalar(0, 255, 0), 2);
387
+
388
+ // Draw label
389
+ stringstream label;
390
+ label << nanodetClassLabels[classid] << ": " << fixed << setprecision(2) << conf;
391
+ putText(visualized_image, label.str(), Point(unnormalized_bbox[0], unnormalized_bbox[1] - 10),
392
+ FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0), 2);
393
+ }
394
+
395
+ return visualized_image;
396
+ }
397
+
398
+ void processImage(Mat& inputImage, NanoDet& nanodet, TickMeter& tm, bool save, bool vis, bool video)
399
+ {
400
+ cvtColor(inputImage, inputImage, COLOR_BGR2RGB);
401
+ tuple<Mat, vector<double>> w = letterbox(inputImage);
402
+ Mat inputBlob = get<0>(w);
403
+ vector<double> letterboxScale = get<1>(w);
404
+
405
+ tm.start();
406
+ Mat predictions = nanodet.infer(inputBlob);
407
+ tm.stop();
408
+ if (!video)
409
+ {
410
+ cout << "Inference time: " << tm.getTimeMilli() << " ms\n";
411
+ }
412
+
413
+ Mat img = visualize(predictions, inputImage, letterboxScale, video, tm.getFPS());
414
+ cvtColor(img, img, COLOR_BGR2RGB);
415
+ if (save)
416
+ {
417
+ static const string kOutputName = "result.jpg";
418
+ imwrite(kOutputName, img);
419
+ if (!video)
420
+ {
421
+ cout << "Results saved to " + kOutputName << endl;
422
+ }
423
+ }
424
+ if (vis)
425
+ {
426
+ static const string kWinName = "model";
427
+ imshow(kWinName, img);
428
+ }
429
+ }
430
+
431
+
432
+ const String keys =
433
+ "{ help h | | Print help message. }"
434
+ "{ model m | object_detection_nanodet_2022nov.onnx | Usage: Path to the model, defaults to object_detection_nanodet_2022nov.onnx }"
435
+ "{ input i | | Path to the input image. Omit for using the default camera.}"
436
+ "{ confidence | 0.35 | Class confidence }"
437
+ "{ nms | 0.6 | Enter nms IOU threshold }"
438
+ "{ save s | true | Specify to save results. This flag is invalid when using the camera. }"
439
+ "{ vis v | true | Specify to open a window for result visualization. This flag is invalid when using the camera. }"
440
+ "{ backend bt | 0 | Choose one of computation backends: "
441
+ "0: (default) OpenCV implementation + CPU, "
442
+ "1: CUDA + GPU (CUDA), "
443
+ "2: CUDA + GPU (CUDA FP16), "
444
+ "3: TIM-VX + NPU, "
445
+ "4: CANN + NPU}";
446
+
447
+ int main(int argc, char** argv)
448
+ {
449
+ CommandLineParser parser(argc, argv, keys);
450
+
451
+ parser.about("Use this script to run Nanodet inference using OpenCV, a contribution by Sri Siddarth Chakaravarthy as part of GSOC_2022.");
452
+ if (parser.has("help"))
453
+ {
454
+ parser.printMessage();
455
+ return 0;
456
+ }
457
+
458
+ string model = parser.get<String>("model");
459
+ string inputPath = parser.get<String>("input");
460
+ float confThreshold = parser.get<float>("confidence");
461
+ float nmsThreshold = parser.get<float>("nms");
462
+ bool save = parser.get<bool>("save");
463
+ bool vis = parser.get<bool>("vis");
464
+ int backendTargetid = parser.get<int>("backend");
465
+
466
+ if (model.empty())
467
+ {
468
+ CV_Error(Error::StsError, "Model file " + model + " not found");
469
+ }
470
+
471
+ NanoDet nanodet(model, confThreshold, nmsThreshold,
472
+ backendTargetPairs[backendTargetid].first, backendTargetPairs[backendTargetid].second);
473
+
474
+ TickMeter tm;
475
+ if (parser.has("input"))
476
+ {
477
+ Mat inputImage = imread(samples::findFile(inputPath));
478
+ static const bool kNotVideo = false;
479
+ processImage(inputImage, nanodet, tm, save, vis, kNotVideo);
480
+ waitKey(0);
481
+ }
482
+ else
483
+ {
484
+ VideoCapture cap;
485
+ cap.open(0);
486
+ if (!cap.isOpened())
487
+ {
488
+ CV_Error(Error::StsError, "Cannot open video or file");
489
+ }
490
+
491
+ Mat frame;
492
+ while (waitKey(1) < 0)
493
+ {
494
+ cap >> frame;
495
+ if (frame.empty())
496
+ {
497
+ cout << "Frame is empty" << endl;
498
+ waitKey();
499
+ break;
500
+ }
501
+ tm.reset();
502
+ static const bool kIsVideo = true;
503
+ processImage(frame, nanodet, tm, save, vis, kIsVideo);
504
+ }
505
+ cap.release();
506
+ }
507
+ return 0;
508
+ }