lonalala commited on
Commit
c690051
·
verified ·
1 Parent(s): 35cb9a2

Upload 16 files

Browse files
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/README.md CHANGED
@@ -1,60 +1,60 @@
1
- ## Model Information
2
- ### Source model
3
-
4
- - Input shape: 640x640
5
- - Number of parameters: 1.968M
6
- - Model size: 7.56 MB
7
- - Output shape: 1x25200x85
8
-
9
- Source model repository: [yolov5](https://github.com/ultralytics/yolov5)
10
-
11
- ### Converted model
12
-
13
- - Precision: INT8
14
- - Backend: QNN2.16
15
- - Target Device: FV01 QCS6490
16
-
17
- ## Inference with AidLite SDK
18
-
19
- ### SDK installation
20
- Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
21
-
22
- - install AidLite SDK
23
-
24
- ```bash
25
- # Install the appropriate version of the aidlite sdk
26
- sudo aid-pkg update
27
- sudo aid-pkg install aidlite-sdk
28
- # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
29
- sudo aid-pkg install aidlite-{QNN VERSION}
30
- # eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
31
- ```
32
-
33
- - Verify AidLite SDK
34
-
35
- ```bash
36
- # aidlite sdk c++ check
37
- python3 -c "import aidlite; print(aidlite.get_library_version())"
38
-
39
- # aidlite sdk python check
40
- python3 -c "import aidlite; print(aidlite.get_py_library_version())"
41
- ```
42
-
43
- ### Run python demo
44
-
45
- ```bash
46
- cd python
47
- python3 demo_qnn.py
48
-
49
- ```
50
-
51
- ### Run c++ demo
52
-
53
- ```bash
54
- cd yolov5n/model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp
55
- mkdir build
56
- cd build
57
- cmake ..
58
- make
59
- ./run_yolov5
60
  ```
 
1
+ ## Model Information
2
+ ### Source model
3
+
4
+ - Input shape: 640x640
5
+ - Number of parameters: 1.968M
6
+ - Model size: 7.56 MB
7
+ - Output shape: 1x25200x85
8
+
9
+ Source model repository: [yolov5](https://github.com/ultralytics/yolov5)
10
+
11
+ ### Converted model
12
+
13
+ - Precision: INT8
14
+ - Backend: QNN2.16
15
+ - Target Device: FV01 QCS6490
16
+
17
+ ## Inference with AidLite SDK
18
+
19
+ ### SDK installation
20
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
21
+
22
+ - install AidLite SDK
23
+
24
+ ```bash
25
+ # Install the appropriate version of the aidlite sdk
26
+ sudo aid-pkg update
27
+ sudo aid-pkg install aidlite-sdk
28
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
29
+ sudo aid-pkg install aidlite-{QNN VERSION}
30
+ # eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
31
+ ```
32
+
33
+ - Verify AidLite SDK
34
+
35
+ ```bash
36
+ # aidlite sdk c++ check
37
+ python3 -c "import aidlite; print(aidlite.get_library_version())"
38
+
39
+ # aidlite sdk python check
40
+ python3 -c "import aidlite; print(aidlite.get_py_library_version())"
41
+ ```
42
+
43
+ ### Run python demo
44
+
45
+ ```bash
46
+ cd python
47
+ python3 demo_qnn.py
48
+
49
+ ```
50
+
51
+ ### Run c++ demo
52
+
53
+ ```bash
54
+ cd yolov5n/model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp
55
+ mkdir build
56
+ cd build
57
+ cmake ..
58
+ make
59
+ ./run_yolov5
60
  ```
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt CHANGED
@@ -1,36 +1,36 @@
1
- cmake_minimum_required(VERSION 3.12)
2
- project(aidlite_cpp_samples)
3
-
4
- set(CMAKE_BUILD_TYPE Release)
5
-
6
- set(OPENCV_INCLUDE_DIR /usr/include/opencv4)
7
- set(OPENCV_LINK_DIR "")
8
- set(OPENCV_LIBS opencv_imgcodecs opencv_imgproc opencv_core) # 如果是静态库 需要注意先后顺序
9
-
10
- set(AIDLITE_INCLUDE_DIR /usr/local/include)
11
- set(AIDLITE_LINK_DIR /usr/local/lib)
12
- set(AIDLITE_LIB aidlite)
13
-
14
- function(func_generate_sample_exe sample_name)
15
-
16
- set(demo_name ${sample_name})
17
-
18
- file(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/${demo_name}.cpp)
19
-
20
- add_executable(${demo_name} ${src_files})
21
-
22
- target_compile_options(${demo_name} PRIVATE -std=c++11)
23
- target_include_directories(${demo_name} PUBLIC ${OPENCV_INCLUDE_DIR} ${AIDLITE_INCLUDE_DIR})
24
- target_link_directories(${demo_name} PUBLIC ${OPENCV_LINK_DIR} ${AIDLITE_LINK_DIR})
25
- target_link_libraries(${demo_name} PUBLIC ${AIDLITE_LIB} ${OPENCV_LIBS} pthread)
26
- message(STATUS "[CMAKEMSG] ${demo_name} need libraries is : ${AIDLITE_LIB} ${OPENCV_LIBS}")
27
-
28
- endfunction()
29
-
30
- set(SAMPLE_LIST run_yolov5)
31
-
32
- FOREACH(sample ${SAMPLE_LIST})
33
- message("prepare to generate cpp sample : ${sample}")
34
-
35
- func_generate_sample_exe(${sample})
36
  ENDFOREACH(sample)
 
1
+ cmake_minimum_required(VERSION 3.12)
2
+ project(aidlite_cpp_samples)
3
+
4
+ set(CMAKE_BUILD_TYPE Release)
5
+
6
+ set(OPENCV_INCLUDE_DIR /usr/include/opencv4)
7
+ set(OPENCV_LINK_DIR "")
8
+ set(OPENCV_LIBS opencv_imgcodecs opencv_imgproc opencv_core) # 如果是静态库 需要注意先后顺序
9
+
10
+ set(AIDLITE_INCLUDE_DIR /usr/local/include)
11
+ set(AIDLITE_LINK_DIR /usr/local/lib)
12
+ set(AIDLITE_LIB aidlite)
13
+
14
+ function(func_generate_sample_exe sample_name)
15
+
16
+ set(demo_name ${sample_name})
17
+
18
+ file(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/${demo_name}.cpp)
19
+
20
+ add_executable(${demo_name} ${src_files})
21
+
22
+ target_compile_options(${demo_name} PRIVATE -std=c++11)
23
+ target_include_directories(${demo_name} PUBLIC ${OPENCV_INCLUDE_DIR} ${AIDLITE_INCLUDE_DIR})
24
+ target_link_directories(${demo_name} PUBLIC ${OPENCV_LINK_DIR} ${AIDLITE_LINK_DIR})
25
+ target_link_libraries(${demo_name} PUBLIC ${AIDLITE_LIB} ${OPENCV_LIBS} pthread)
26
+ message(STATUS "[CMAKEMSG] ${demo_name} need libraries is : ${AIDLITE_LIB} ${OPENCV_LIBS}")
27
+
28
+ endfunction()
29
+
30
+ set(SAMPLE_LIST run_yolov5)
31
+
32
+ FOREACH(sample ${SAMPLE_LIST})
33
+ message("prepare to generate cpp sample : ${sample}")
34
+
35
+ func_generate_sample_exe(${sample})
36
  ENDFOREACH(sample)
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp/run_yolov5.cpp CHANGED
@@ -1,455 +1,455 @@
1
- #include <thread>
2
- #include <future>
3
- #include <opencv2/opencv.hpp>
4
- #include "aidlux/aidlite/aidlite.hpp"
5
-
6
- using namespace Aidlux::Aidlite;
7
- using namespace std;
8
-
9
- #define OBJ_CLASS_NUM 80
10
- #define NMS_THRESH 0.45
11
- #define BOX_THRESH 0.5
12
- #define MODEL_SIZE 640
13
- #define OBJ_NUMB_MAX_SIZE 64
14
- #define PROP_BOX_SIZE (5+OBJ_CLASS_NUM)
15
- #define STRIDE8_SIZE (MODEL_SIZE / 8)
16
- #define STRIDE16_SIZE (MODEL_SIZE / 16)
17
- #define STRIDE32_SIZE (MODEL_SIZE / 32)
18
-
19
- const float anchor0[6] = {10, 13, 16, 30, 33, 23};
20
- const float anchor1[6] = {30, 61, 62, 45, 59, 119};
21
- const float anchor2[6] = {116, 90, 156, 198, 373, 326};
22
-
23
- string class_names[] = {
24
- "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
25
- "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
26
- "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
27
- "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
28
- "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
29
- "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
30
- "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
31
- "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
32
-
33
-
34
- static float sigmoid(float x) { return 1.f / (1.f + exp(-x)); }
35
-
36
- float eqprocess(cv::Mat* src, cv::Mat* dst, int width, int height)
37
- {
38
- int w = src->cols;
39
- int h = src->rows;
40
- float scale_h = float(h) / float(height);
41
- float scale_w = float(w) / float(width);
42
-
43
- float scale;
44
- if (scale_h > scale_w)
45
- {
46
- scale = scale_h;
47
- }
48
- else
49
- {
50
- scale = scale_w;
51
- }
52
-
53
- int rel_width = int(w / scale);
54
- int rel_height = int(h / scale);
55
-
56
- cv::Mat tmp = (*dst)(cv::Rect(0, 0, rel_width, rel_height));
57
- cv::resize(*src, tmp, cv::Size(rel_width, rel_height));
58
- return scale;
59
- }
60
-
61
- std::vector<std::string> split(const std::string& str)
62
- {
63
- std::stringstream ss(str);
64
- std::vector<std::string> elems;
65
- std::string item;
66
- while (std::getline(ss, item, ','))
67
- {
68
- elems.push_back(item);
69
- }
70
- return elems;
71
- }
72
-
73
-
74
- int process(float* output, std::vector<float>& boxes, std::vector<float>& objProbs, std::vector<int>& classId, float * anchor, int grid_h, int grid_w, int stride, int imgsz)
75
- {
76
- int ct = 0;
77
- int validCount = 0;
78
- for (int a = 0; a < 3; a++)
79
- {
80
- for (int i = 0; i < grid_h; i++)
81
- {
82
- for (int j = 0; j < grid_w; j++)
83
- {
84
- int idx = a * PROP_BOX_SIZE + (i * grid_w + j) * 3 * PROP_BOX_SIZE;
85
- float box_confidence = sigmoid(output[idx + 4]);
86
- if (box_confidence >= BOX_THRESH )
87
- {
88
- float box_x = sigmoid(output[idx]) * 2 - 0.5;
89
- float box_y = sigmoid(output[idx + 1]) * 2 - 0.5;
90
- float box_w = pow(sigmoid(output[idx + 2]) * 2, 2);
91
- float box_h = pow(sigmoid(output[idx + 3]) * 2, 2);
92
-
93
- box_x = (box_x + j) * (float)stride;
94
- box_y = (box_y + i) * (float)stride;
95
- box_w = box_w * anchor[a * 2];
96
- box_h = box_h * anchor[a * 2 + 1];
97
-
98
- box_x -= (box_w / 2.0);
99
- box_y -= (box_h / 2.0);
100
-
101
- float maxClassProbs = 0;
102
- int maxClassId = 0;
103
-
104
- for(int k = 0; k < OBJ_CLASS_NUM ; k++)
105
- {
106
- float prob = output[idx + 5 + k];
107
- if (prob > maxClassProbs)
108
- {
109
- maxClassId = k;
110
- maxClassProbs = prob;
111
- }
112
- }
113
- if (maxClassProbs > BOX_THRESH)
114
- {
115
- objProbs.push_back(sigmoid(maxClassProbs) * box_confidence);
116
- classId.push_back(maxClassId);
117
- validCount++;
118
- boxes.push_back(box_x);
119
- boxes.push_back(box_y);
120
- boxes.push_back(box_w);
121
- boxes.push_back(box_h);
122
- }
123
- }
124
- }
125
- }
126
- }
127
-
128
- return validCount;
129
- }
130
-
131
-
132
- static int quick_sort_indice_inverse(std::vector<float>& input, int left, int right, std::vector<int>& indices)
133
- {
134
- float key;
135
- int key_index;
136
- int low = left;
137
- int high = right;
138
- if (left < right) {
139
- key_index = indices[left];
140
- key = input[left];
141
- while (low < high) {
142
- while (low < high && input[high] <= key) {
143
- high--;
144
- }
145
- input[low] = input[high];
146
- indices[low] = indices[high];
147
- while (low < high && input[low] >= key) {
148
- low++;
149
- }
150
- input[high] = input[low];
151
- indices[high] = indices[low];
152
- }
153
- input[low] = key;
154
- indices[low] = key_index;
155
- quick_sort_indice_inverse(input, left, low - 1, indices);
156
- quick_sort_indice_inverse(input, low + 1, right, indices);
157
- }
158
- return low;
159
- }
160
-
161
- static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
162
- float ymax1)
163
- {
164
- float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
165
- float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
166
- float i = w * h;
167
- float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
168
- return u <= 0.f ? 0.f : (i / u);
169
- }
170
-
171
-
172
- static int nms(int validCount, std::vector<float>& outputLocations, std::vector<int> classIds, std::vector<int>& order,
173
- int filterId, float threshold)
174
- {
175
- for (int i = 0; i < validCount; ++i) {
176
- if (order[i] == -1 || classIds[i] != filterId) {
177
- continue;
178
- }
179
- int n = order[i];
180
- for (int j = i + 1; j < validCount; ++j) {
181
- int m = order[j];
182
- if (m == -1 || classIds[i] != filterId) {
183
- continue;
184
- }
185
- float xmin0 = outputLocations[n * 4 + 0];
186
- float ymin0 = outputLocations[n * 4 + 1];
187
- float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
188
- float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
189
-
190
- float xmin1 = outputLocations[m * 4 + 0];
191
- float ymin1 = outputLocations[m * 4 + 1];
192
- float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
193
- float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
194
-
195
- float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
196
-
197
- if (iou > threshold) {
198
- order[j] = -1;
199
- }
200
- }
201
- }
202
- return 0;
203
- }
204
-
205
- int32_t thread_func(int thread_idx){
206
-
207
- printf("entry thread_func[%d]\n", thread_idx);
208
-
209
- std::string image_path = "../bus.jpg";
210
- std::string save_name = "out_yolov5_qnn";
211
- std::string model_path = "../../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem";
212
-
213
- // image process
214
- cv::Mat frame = cv::imread(image_path);
215
- cv::cvtColor(frame, frame , cv::COLOR_BGR2RGB);
216
- cv::Scalar stds_scale(255, 255, 255);
217
- cv::Size target_shape(MODEL_SIZE, MODEL_SIZE);
218
-
219
- cv::Mat frame_resized = cv::Mat::zeros(MODEL_SIZE, MODEL_SIZE, CV_8UC3);
220
- float scale = eqprocess(&frame, &frame_resized, MODEL_SIZE, MODEL_SIZE);
221
-
222
- cv::Mat input_data;
223
- frame_resized.convertTo(input_data, CV_32FC3);
224
- cv::divide(input_data, stds_scale, input_data);
225
-
226
- // model init
227
- printf("Aidlite library version : %s\n", Aidlux::Aidlite::get_library_version().c_str());
228
-
229
- // 以下三个接口请按需组合调用。如果不调用这些函数,默认只打印错误日志到标准错误终端。
230
- Aidlux::Aidlite::set_log_level(Aidlux::Aidlite::LogLevel::INFO);
231
- Aidlux::Aidlite::log_to_stderr();
232
- // Aidlux::Aidlite::log_to_file("./qnn_yolov5_multi_");
233
-
234
- Model* model = Model::create_instance(model_path);
235
- if(model == nullptr){
236
- printf("Create Model object failed !\n");
237
- return EXIT_FAILURE;
238
- }
239
- std::vector<std::vector<uint32_t>> input_shapes = {{1,640,640,3}};
240
- std::vector<std::vector<uint32_t>> output_shapes = {{1,40,40,255}, {1,20,20,255}, {1,80,80,255}};
241
- model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
242
-
243
- Config* config = Config::create_instance();
244
- if(config == nullptr){
245
- printf("Create Config object failed !\n");
246
- return EXIT_FAILURE;
247
- }
248
-
249
- config->implement_type = ImplementType::TYPE_LOCAL;
250
- config->framework_type = FrameworkType::TYPE_QNN216;
251
- config->accelerate_type = AccelerateType::TYPE_DSP;
252
-
253
- std::unique_ptr<Interpreter>&& fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
254
- if(fast_interpreter == nullptr){
255
- printf("build_interpretper_from_model_and_config failed !\n");
256
- return EXIT_FAILURE;
257
- }
258
-
259
- int result = fast_interpreter->init();
260
- if(result != EXIT_SUCCESS){
261
- printf("interpreter->init() failed !\n");
262
- return EXIT_FAILURE;
263
- }
264
-
265
- result = fast_interpreter->load_model();
266
- if(result != EXIT_SUCCESS){
267
- printf("interpreter->load_model() failed !\n");
268
- return EXIT_FAILURE;
269
- }
270
-
271
- printf("load model load success!\n");
272
-
273
- float* stride8 = nullptr;
274
- float* stride16 = nullptr;
275
- float* stride32 = nullptr;
276
-
277
- // post_process
278
- std::vector<float> filterBoxes;
279
- std::vector<float> objProbs;
280
- std::vector<int> classId;
281
-
282
- double sum_time_0 = 0.0, sum_time_1 = 0.0, sum_time_2 = 0.0;
283
- int _counter = 10;
284
- for(int idx = 0; idx < _counter; ++idx){
285
- std::chrono::steady_clock::time_point st0 = std::chrono::steady_clock::now();
286
-
287
- void* input_tensor_data = (void*)input_data.data;
288
- result = fast_interpreter->set_input_tensor(0,input_tensor_data);
289
- if(result != EXIT_SUCCESS){
290
- printf("interpreter->set_input_tensor() failed !\n");
291
- return EXIT_FAILURE;
292
- }
293
-
294
- std::chrono::steady_clock::time_point et0 = std::chrono::steady_clock::now();
295
- std::chrono::steady_clock::duration dur0 = et0 - st0;
296
- printf("current thread_idx[%d] [%d] set_input_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur0).count()*1000);
297
- sum_time_0 += std::chrono::duration<double>(dur0).count()*1000;
298
-
299
- std::chrono::steady_clock::time_point st1 = std::chrono::steady_clock::now();
300
-
301
- result = fast_interpreter->invoke();
302
- if(result != EXIT_SUCCESS){
303
- printf("interpreter->invoke() failed !\n");
304
- return EXIT_FAILURE;
305
- }
306
-
307
- std::chrono::steady_clock::time_point et1 = std::chrono::steady_clock::now();
308
- std::chrono::steady_clock::duration dur1 = et1 - st1;
309
- printf("current thread_idx[%d] [%d] invoke cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur1).count()*1000);
310
- sum_time_1 += std::chrono::duration<double>(dur1).count()*1000;
311
-
312
- std::chrono::steady_clock::time_point st2 = std::chrono::steady_clock::now();
313
-
314
- uint32_t output_tensor_length_0 = 0;
315
- result = fast_interpreter->get_output_tensor(0, (void**)&stride8, &output_tensor_length_0);
316
- if(result != EXIT_SUCCESS){
317
- printf("interpreter->get_output_tensor() 0 failed !\n");
318
- return EXIT_FAILURE;
319
- }
320
- printf("sample : interpreter->get_output_tensor() 0 length is [%d] !\n", output_tensor_length_0);
321
-
322
- uint32_t output_tensor_length_1 = 0;
323
- result = fast_interpreter->get_output_tensor(1, (void**)&stride16, &output_tensor_length_1);
324
- if(result != EXIT_SUCCESS){
325
- printf("interpreter->get_output_tensor() 1 failed !\n");
326
- return EXIT_FAILURE;
327
- }
328
- printf("sample : interpreter->get_output_tensor() 1 length is [%d] !\n", output_tensor_length_1);
329
-
330
- uint32_t output_tensor_length_2 = 0;
331
- result = fast_interpreter->get_output_tensor(2, (void**)&stride32, &output_tensor_length_2);
332
- if(result != EXIT_SUCCESS){
333
- printf("interpreter->get_output_tensor() 2 failed !\n");
334
- return EXIT_FAILURE;
335
- }
336
- printf("sample : interpreter->get_output_tensor() 2 length is [%d] !\n", output_tensor_length_2);
337
-
338
- std::chrono::steady_clock::time_point et2 = std::chrono::steady_clock::now();
339
- std::chrono::steady_clock::duration dur2 = et2 - st2;
340
- printf("current thread_idx[%d] [%d] get_output_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur2).count()*1000);
341
- sum_time_2 += std::chrono::duration<double>(dur2).count()*1000;
342
- }
343
- printf("repeat [%d] time , input[%f] --- invoke[%f] --- output[%f] --- sum[%f]ms\n", _counter, sum_time_0, sum_time_1, sum_time_2, sum_time_0+sum_time_1+sum_time_2);
344
-
345
- std::chrono::steady_clock::time_point pps = std::chrono::steady_clock::now();
346
-
347
- filterBoxes.clear();
348
- objProbs.clear();
349
- classId.clear();
350
- int validCount0 = process(stride8, filterBoxes, objProbs, classId, (float*)anchor0, STRIDE8_SIZE, STRIDE8_SIZE, 8, MODEL_SIZE);
351
- int validCount1 = process(stride16, filterBoxes, objProbs, classId, (float*)anchor1, STRIDE16_SIZE, STRIDE16_SIZE, 16, MODEL_SIZE);
352
- int validCount2 = process(stride32, filterBoxes, objProbs, classId, (float*)anchor2, STRIDE32_SIZE, STRIDE32_SIZE, 32, MODEL_SIZE);
353
-
354
- int validCount = validCount0 + validCount1 +validCount2;
355
-
356
- std::vector<int> indexArray;
357
- for (int i = 0; i < validCount; ++i){
358
- indexArray.push_back(i);
359
- }
360
-
361
- quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
362
-
363
- std::set<int> class_set(std::begin(classId), std::end(classId));
364
-
365
- for (auto c : class_set) {
366
- nms(validCount, filterBoxes, classId, indexArray, c, NMS_THRESH);
367
- }
368
-
369
- std::chrono::steady_clock::time_point ppe = std::chrono::steady_clock::now();
370
- std::chrono::steady_clock::duration durpp = ppe - pps;
371
- printf("postprocess cost time : %f ms\n", std::chrono::duration<double>(durpp).count()*1000);
372
-
373
- // 数据来源于 SNPE2 FP32 CPU 运行结果 [x1, y1, x2, y2] 坐标向下取整
374
- const float expected_box_0[3][4] = {{210, 241, 285, 519}, {473, 229, 560, 522}, {108, 231, 231, 542}};
375
- const float expected_box_5[1][4] = {{91, 131, 551, 464}};
376
-
377
- unsigned int box_count = 0;
378
- unsigned int verify_pass_count = 0;
379
- for (int i = 0; i < validCount; ++i) {
380
-
381
- if (indexArray[i] == -1) {
382
- continue;
383
- }
384
- int n = indexArray[i];
385
-
386
- float x1 = filterBoxes[n * 4 + 0] * scale;
387
- float y1 = filterBoxes[n * 4 + 1] * scale;
388
- float x2 = x1 + filterBoxes[n * 4 + 2] * scale;
389
- float y2 = y1 + filterBoxes[n * 4 + 3] * scale;
390
- int id = classId[n];
391
- float obj_conf = objProbs[i];
392
-
393
- // string show_info = "class " + to_string(id) + ": " + to_string(obj_conf);
394
- string show_info = class_names[id] + ": " + to_string(obj_conf);
395
- cv::putText(frame, show_info.c_str(), cv::Point(x1, y1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2, 2); // color-BGR
396
- cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2, 2, 0);
397
-
398
- // 结果正确性验证
399
- printf("Result id[%d]-x1[%f]-y1[%f]-x2[%f]-y2[%f]\n", id, x1, y1, x2, y2);
400
-
401
- ++box_count;
402
- if(id == 0){
403
- for(int idx = 0; idx < 3; ++idx){
404
- float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
405
- expected_box_0[idx][0], expected_box_0[idx][1], expected_box_0[idx][2], expected_box_0[idx][3]);
406
- printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
407
- if(coverage_ratio > 0.9){
408
- ++verify_pass_count;
409
- break;
410
- }
411
- }
412
- }else if(id == 5){
413
- for(int idx = 0; idx < 1; ++idx){
414
- float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
415
- expected_box_5[idx][0], expected_box_5[idx][1], expected_box_5[idx][2], expected_box_5[idx][3]);
416
- printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
417
- if(coverage_ratio > 0.9){
418
- ++verify_pass_count;
419
- break;
420
- }
421
- }
422
- }else{
423
- printf("ERROR : The Yolov5s model inference result is not the expected classification category.\n");
424
- return EXIT_FAILURE;
425
- }
426
- }
427
-
428
- // 保存结果图片
429
- cv::cvtColor(frame, frame , cv::COLOR_RGB2BGR);
430
- cv::imwrite("result.jpg", frame);
431
-
432
- result = fast_interpreter->destory();
433
- if(result != EXIT_SUCCESS){
434
- printf("interpreter->destory() failed !\n");
435
- return EXIT_FAILURE;
436
- }
437
-
438
- printf("exit thread_func[%d]\n", thread_idx);
439
-
440
- return EXIT_SUCCESS;
441
- }
442
-
443
- int main(int argc, char** args)
444
- {
445
-
446
- std::future<int> thread_01_result = std::async(std::launch::async, thread_func, 1);
447
-
448
- if(EXIT_SUCCESS != thread_01_result.get()){
449
- printf("ERROR : thread_01 run failed.\n");
450
- return EXIT_FAILURE;
451
- }
452
-
453
- printf("Exit main function .\n");
454
- return 0;
455
  }
 
1
+ #include <thread>
2
+ #include <future>
3
+ #include <opencv2/opencv.hpp>
4
+ #include "aidlux/aidlite/aidlite.hpp"
5
+
6
+ using namespace Aidlux::Aidlite;
7
+ using namespace std;
8
+
9
+ #define OBJ_CLASS_NUM 80
10
+ #define NMS_THRESH 0.45
11
+ #define BOX_THRESH 0.5
12
+ #define MODEL_SIZE 640
13
+ #define OBJ_NUMB_MAX_SIZE 64
14
+ #define PROP_BOX_SIZE (5+OBJ_CLASS_NUM)
15
+ #define STRIDE8_SIZE (MODEL_SIZE / 8)
16
+ #define STRIDE16_SIZE (MODEL_SIZE / 16)
17
+ #define STRIDE32_SIZE (MODEL_SIZE / 32)
18
+
19
+ const float anchor0[6] = {10, 13, 16, 30, 33, 23};
20
+ const float anchor1[6] = {30, 61, 62, 45, 59, 119};
21
+ const float anchor2[6] = {116, 90, 156, 198, 373, 326};
22
+
23
+ string class_names[] = {
24
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
25
+ "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
26
+ "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
27
+ "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
28
+ "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
29
+ "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
30
+ "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
31
+ "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
32
+
33
+
34
+ static float sigmoid(float x) { return 1.f / (1.f + exp(-x)); }
35
+
36
+ float eqprocess(cv::Mat* src, cv::Mat* dst, int width, int height)
37
+ {
38
+ int w = src->cols;
39
+ int h = src->rows;
40
+ float scale_h = float(h) / float(height);
41
+ float scale_w = float(w) / float(width);
42
+
43
+ float scale;
44
+ if (scale_h > scale_w)
45
+ {
46
+ scale = scale_h;
47
+ }
48
+ else
49
+ {
50
+ scale = scale_w;
51
+ }
52
+
53
+ int rel_width = int(w / scale);
54
+ int rel_height = int(h / scale);
55
+
56
+ cv::Mat tmp = (*dst)(cv::Rect(0, 0, rel_width, rel_height));
57
+ cv::resize(*src, tmp, cv::Size(rel_width, rel_height));
58
+ return scale;
59
+ }
60
+
61
+ std::vector<std::string> split(const std::string& str)
62
+ {
63
+ std::stringstream ss(str);
64
+ std::vector<std::string> elems;
65
+ std::string item;
66
+ while (std::getline(ss, item, ','))
67
+ {
68
+ elems.push_back(item);
69
+ }
70
+ return elems;
71
+ }
72
+
73
+
74
+ int process(float* output, std::vector<float>& boxes, std::vector<float>& objProbs, std::vector<int>& classId, float * anchor, int grid_h, int grid_w, int stride, int imgsz)
75
+ {
76
+ int ct = 0;
77
+ int validCount = 0;
78
+ for (int a = 0; a < 3; a++)
79
+ {
80
+ for (int i = 0; i < grid_h; i++)
81
+ {
82
+ for (int j = 0; j < grid_w; j++)
83
+ {
84
+ int idx = a * PROP_BOX_SIZE + (i * grid_w + j) * 3 * PROP_BOX_SIZE;
85
+ float box_confidence = sigmoid(output[idx + 4]);
86
+ if (box_confidence >= BOX_THRESH )
87
+ {
88
+ float box_x = sigmoid(output[idx]) * 2 - 0.5;
89
+ float box_y = sigmoid(output[idx + 1]) * 2 - 0.5;
90
+ float box_w = pow(sigmoid(output[idx + 2]) * 2, 2);
91
+ float box_h = pow(sigmoid(output[idx + 3]) * 2, 2);
92
+
93
+ box_x = (box_x + j) * (float)stride;
94
+ box_y = (box_y + i) * (float)stride;
95
+ box_w = box_w * anchor[a * 2];
96
+ box_h = box_h * anchor[a * 2 + 1];
97
+
98
+ box_x -= (box_w / 2.0);
99
+ box_y -= (box_h / 2.0);
100
+
101
+ float maxClassProbs = 0;
102
+ int maxClassId = 0;
103
+
104
+ for(int k = 0; k < OBJ_CLASS_NUM ; k++)
105
+ {
106
+ float prob = output[idx + 5 + k];
107
+ if (prob > maxClassProbs)
108
+ {
109
+ maxClassId = k;
110
+ maxClassProbs = prob;
111
+ }
112
+ }
113
+ if (maxClassProbs > BOX_THRESH)
114
+ {
115
+ objProbs.push_back(sigmoid(maxClassProbs) * box_confidence);
116
+ classId.push_back(maxClassId);
117
+ validCount++;
118
+ boxes.push_back(box_x);
119
+ boxes.push_back(box_y);
120
+ boxes.push_back(box_w);
121
+ boxes.push_back(box_h);
122
+ }
123
+ }
124
+ }
125
+ }
126
+ }
127
+
128
+ return validCount;
129
+ }
130
+
131
+
132
+ static int quick_sort_indice_inverse(std::vector<float>& input, int left, int right, std::vector<int>& indices)
133
+ {
134
+ float key;
135
+ int key_index;
136
+ int low = left;
137
+ int high = right;
138
+ if (left < right) {
139
+ key_index = indices[left];
140
+ key = input[left];
141
+ while (low < high) {
142
+ while (low < high && input[high] <= key) {
143
+ high--;
144
+ }
145
+ input[low] = input[high];
146
+ indices[low] = indices[high];
147
+ while (low < high && input[low] >= key) {
148
+ low++;
149
+ }
150
+ input[high] = input[low];
151
+ indices[high] = indices[low];
152
+ }
153
+ input[low] = key;
154
+ indices[low] = key_index;
155
+ quick_sort_indice_inverse(input, left, low - 1, indices);
156
+ quick_sort_indice_inverse(input, low + 1, right, indices);
157
+ }
158
+ return low;
159
+ }
160
+
161
+ static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
162
+ float ymax1)
163
+ {
164
+ float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
165
+ float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
166
+ float i = w * h;
167
+ float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
168
+ return u <= 0.f ? 0.f : (i / u);
169
+ }
170
+
171
+
172
+ static int nms(int validCount, std::vector<float>& outputLocations, std::vector<int> classIds, std::vector<int>& order,
173
+ int filterId, float threshold)
174
+ {
175
+ for (int i = 0; i < validCount; ++i) {
176
+ if (order[i] == -1 || classIds[i] != filterId) {
177
+ continue;
178
+ }
179
+ int n = order[i];
180
+ for (int j = i + 1; j < validCount; ++j) {
181
+ int m = order[j];
182
+ if (m == -1 || classIds[i] != filterId) {
183
+ continue;
184
+ }
185
+ float xmin0 = outputLocations[n * 4 + 0];
186
+ float ymin0 = outputLocations[n * 4 + 1];
187
+ float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
188
+ float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
189
+
190
+ float xmin1 = outputLocations[m * 4 + 0];
191
+ float ymin1 = outputLocations[m * 4 + 1];
192
+ float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
193
+ float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
194
+
195
+ float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
196
+
197
+ if (iou > threshold) {
198
+ order[j] = -1;
199
+ }
200
+ }
201
+ }
202
+ return 0;
203
+ }
204
+
205
+ int32_t thread_func(int thread_idx){
206
+
207
+ printf("entry thread_func[%d]\n", thread_idx);
208
+
209
+ std::string image_path = "../bus.jpg";
210
+ std::string save_name = "out_yolov5_qnn";
211
+ std::string model_path = "../../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin";
212
+
213
+ // image process
214
+ cv::Mat frame = cv::imread(image_path);
215
+ cv::cvtColor(frame, frame , cv::COLOR_BGR2RGB);
216
+ cv::Scalar stds_scale(255, 255, 255);
217
+ cv::Size target_shape(MODEL_SIZE, MODEL_SIZE);
218
+
219
+ cv::Mat frame_resized = cv::Mat::zeros(MODEL_SIZE, MODEL_SIZE, CV_8UC3);
220
+ float scale = eqprocess(&frame, &frame_resized, MODEL_SIZE, MODEL_SIZE);
221
+
222
+ cv::Mat input_data;
223
+ frame_resized.convertTo(input_data, CV_32FC3);
224
+ cv::divide(input_data, stds_scale, input_data);
225
+
226
+ // model init
227
+ printf("Aidlite library version : %s\n", Aidlux::Aidlite::get_library_version().c_str());
228
+
229
+ // 以下三个接口请按需组合调用。如果不调用这些函数,默认只打印错误日志到标准错误终端。
230
+ Aidlux::Aidlite::set_log_level(Aidlux::Aidlite::LogLevel::INFO);
231
+ Aidlux::Aidlite::log_to_stderr();
232
+ // Aidlux::Aidlite::log_to_file("./qnn_yolov5_multi_");
233
+
234
+ Model* model = Model::create_instance(model_path);
235
+ if(model == nullptr){
236
+ printf("Create Model object failed !\n");
237
+ return EXIT_FAILURE;
238
+ }
239
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,640,640,3}};
240
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,40,40,255}, {1,20,20,255}, {1,80,80,255}};
241
+ model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
242
+
243
+ Config* config = Config::create_instance();
244
+ if(config == nullptr){
245
+ printf("Create Config object failed !\n");
246
+ return EXIT_FAILURE;
247
+ }
248
+
249
+ config->implement_type = ImplementType::TYPE_LOCAL;
250
+ config->framework_type = FrameworkType::TYPE_QNN216;
251
+ config->accelerate_type = AccelerateType::TYPE_DSP;
252
+
253
+ std::unique_ptr<Interpreter>&& fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
254
+ if(fast_interpreter == nullptr){
255
+ printf("build_interpretper_from_model_and_config failed !\n");
256
+ return EXIT_FAILURE;
257
+ }
258
+
259
+ int result = fast_interpreter->init();
260
+ if(result != EXIT_SUCCESS){
261
+ printf("interpreter->init() failed !\n");
262
+ return EXIT_FAILURE;
263
+ }
264
+
265
+ result = fast_interpreter->load_model();
266
+ if(result != EXIT_SUCCESS){
267
+ printf("interpreter->load_model() failed !\n");
268
+ return EXIT_FAILURE;
269
+ }
270
+
271
+ printf("load model load success!\n");
272
+
273
+ float* stride8 = nullptr;
274
+ float* stride16 = nullptr;
275
+ float* stride32 = nullptr;
276
+
277
+ // post_process
278
+ std::vector<float> filterBoxes;
279
+ std::vector<float> objProbs;
280
+ std::vector<int> classId;
281
+
282
+ double sum_time_0 = 0.0, sum_time_1 = 0.0, sum_time_2 = 0.0;
283
+ int _counter = 10;
284
+ for(int idx = 0; idx < _counter; ++idx){
285
+ std::chrono::steady_clock::time_point st0 = std::chrono::steady_clock::now();
286
+
287
+ void* input_tensor_data = (void*)input_data.data;
288
+ result = fast_interpreter->set_input_tensor(0,input_tensor_data);
289
+ if(result != EXIT_SUCCESS){
290
+ printf("interpreter->set_input_tensor() failed !\n");
291
+ return EXIT_FAILURE;
292
+ }
293
+
294
+ std::chrono::steady_clock::time_point et0 = std::chrono::steady_clock::now();
295
+ std::chrono::steady_clock::duration dur0 = et0 - st0;
296
+ printf("current thread_idx[%d] [%d] set_input_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur0).count()*1000);
297
+ sum_time_0 += std::chrono::duration<double>(dur0).count()*1000;
298
+
299
+ std::chrono::steady_clock::time_point st1 = std::chrono::steady_clock::now();
300
+
301
+ result = fast_interpreter->invoke();
302
+ if(result != EXIT_SUCCESS){
303
+ printf("interpreter->invoke() failed !\n");
304
+ return EXIT_FAILURE;
305
+ }
306
+
307
+ std::chrono::steady_clock::time_point et1 = std::chrono::steady_clock::now();
308
+ std::chrono::steady_clock::duration dur1 = et1 - st1;
309
+ printf("current thread_idx[%d] [%d] invoke cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur1).count()*1000);
310
+ sum_time_1 += std::chrono::duration<double>(dur1).count()*1000;
311
+
312
+ std::chrono::steady_clock::time_point st2 = std::chrono::steady_clock::now();
313
+
314
+ uint32_t output_tensor_length_0 = 0;
315
+ result = fast_interpreter->get_output_tensor(0, (void**)&stride8, &output_tensor_length_0);
316
+ if(result != EXIT_SUCCESS){
317
+ printf("interpreter->get_output_tensor() 0 failed !\n");
318
+ return EXIT_FAILURE;
319
+ }
320
+ printf("sample : interpreter->get_output_tensor() 0 length is [%d] !\n", output_tensor_length_0);
321
+
322
+ uint32_t output_tensor_length_1 = 0;
323
+ result = fast_interpreter->get_output_tensor(1, (void**)&stride16, &output_tensor_length_1);
324
+ if(result != EXIT_SUCCESS){
325
+ printf("interpreter->get_output_tensor() 1 failed !\n");
326
+ return EXIT_FAILURE;
327
+ }
328
+ printf("sample : interpreter->get_output_tensor() 1 length is [%d] !\n", output_tensor_length_1);
329
+
330
+ uint32_t output_tensor_length_2 = 0;
331
+ result = fast_interpreter->get_output_tensor(2, (void**)&stride32, &output_tensor_length_2);
332
+ if(result != EXIT_SUCCESS){
333
+ printf("interpreter->get_output_tensor() 2 failed !\n");
334
+ return EXIT_FAILURE;
335
+ }
336
+ printf("sample : interpreter->get_output_tensor() 2 length is [%d] !\n", output_tensor_length_2);
337
+
338
+ std::chrono::steady_clock::time_point et2 = std::chrono::steady_clock::now();
339
+ std::chrono::steady_clock::duration dur2 = et2 - st2;
340
+ printf("current thread_idx[%d] [%d] get_output_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur2).count()*1000);
341
+ sum_time_2 += std::chrono::duration<double>(dur2).count()*1000;
342
+ }
343
+ printf("repeat [%d] time , input[%f] --- invoke[%f] --- output[%f] --- sum[%f]ms\n", _counter, sum_time_0, sum_time_1, sum_time_2, sum_time_0+sum_time_1+sum_time_2);
344
+
345
+ std::chrono::steady_clock::time_point pps = std::chrono::steady_clock::now();
346
+
347
+ filterBoxes.clear();
348
+ objProbs.clear();
349
+ classId.clear();
350
+ int validCount0 = process(stride8, filterBoxes, objProbs, classId, (float*)anchor0, STRIDE8_SIZE, STRIDE8_SIZE, 8, MODEL_SIZE);
351
+ int validCount1 = process(stride16, filterBoxes, objProbs, classId, (float*)anchor1, STRIDE16_SIZE, STRIDE16_SIZE, 16, MODEL_SIZE);
352
+ int validCount2 = process(stride32, filterBoxes, objProbs, classId, (float*)anchor2, STRIDE32_SIZE, STRIDE32_SIZE, 32, MODEL_SIZE);
353
+
354
+ int validCount = validCount0 + validCount1 +validCount2;
355
+
356
+ std::vector<int> indexArray;
357
+ for (int i = 0; i < validCount; ++i){
358
+ indexArray.push_back(i);
359
+ }
360
+
361
+ quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
362
+
363
+ std::set<int> class_set(std::begin(classId), std::end(classId));
364
+
365
+ for (auto c : class_set) {
366
+ nms(validCount, filterBoxes, classId, indexArray, c, NMS_THRESH);
367
+ }
368
+
369
+ std::chrono::steady_clock::time_point ppe = std::chrono::steady_clock::now();
370
+ std::chrono::steady_clock::duration durpp = ppe - pps;
371
+ printf("postprocess cost time : %f ms\n", std::chrono::duration<double>(durpp).count()*1000);
372
+
373
+ // 数据来源于 SNPE2 FP32 CPU 运行结果 [x1, y1, x2, y2] 坐标向下取整
374
+ const float expected_box_0[3][4] = {{210, 241, 285, 519}, {473, 229, 560, 522}, {108, 231, 231, 542}};
375
+ const float expected_box_5[1][4] = {{91, 131, 551, 464}};
376
+
377
+ unsigned int box_count = 0;
378
+ unsigned int verify_pass_count = 0;
379
+ for (int i = 0; i < validCount; ++i) {
380
+
381
+ if (indexArray[i] == -1) {
382
+ continue;
383
+ }
384
+ int n = indexArray[i];
385
+
386
+ float x1 = filterBoxes[n * 4 + 0] * scale;
387
+ float y1 = filterBoxes[n * 4 + 1] * scale;
388
+ float x2 = x1 + filterBoxes[n * 4 + 2] * scale;
389
+ float y2 = y1 + filterBoxes[n * 4 + 3] * scale;
390
+ int id = classId[n];
391
+ float obj_conf = objProbs[i];
392
+
393
+ // string show_info = "class " + to_string(id) + ": " + to_string(obj_conf);
394
+ string show_info = class_names[id] + ": " + to_string(obj_conf);
395
+ cv::putText(frame, show_info.c_str(), cv::Point(x1, y1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2, 2); // color-BGR
396
+ cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2, 2, 0);
397
+
398
+ // 结果正确性验证
399
+ printf("Result id[%d]-x1[%f]-y1[%f]-x2[%f]-y2[%f]\n", id, x1, y1, x2, y2);
400
+
401
+ ++box_count;
402
+ if(id == 0){
403
+ for(int idx = 0; idx < 3; ++idx){
404
+ float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
405
+ expected_box_0[idx][0], expected_box_0[idx][1], expected_box_0[idx][2], expected_box_0[idx][3]);
406
+ printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
407
+ if(coverage_ratio > 0.9){
408
+ ++verify_pass_count;
409
+ break;
410
+ }
411
+ }
412
+ }else if(id == 5){
413
+ for(int idx = 0; idx < 1; ++idx){
414
+ float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
415
+ expected_box_5[idx][0], expected_box_5[idx][1], expected_box_5[idx][2], expected_box_5[idx][3]);
416
+ printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
417
+ if(coverage_ratio > 0.9){
418
+ ++verify_pass_count;
419
+ break;
420
+ }
421
+ }
422
+ }else{
423
+ printf("ERROR : The Yolov5s model inference result is not the expected classification category.\n");
424
+ return EXIT_FAILURE;
425
+ }
426
+ }
427
+
428
+ // 保存结果图片
429
+ cv::cvtColor(frame, frame , cv::COLOR_RGB2BGR);
430
+ cv::imwrite("result.jpg", frame);
431
+
432
+ result = fast_interpreter->destory();
433
+ if(result != EXIT_SUCCESS){
434
+ printf("interpreter->destory() failed !\n");
435
+ return EXIT_FAILURE;
436
+ }
437
+
438
+ printf("exit thread_func[%d]\n", thread_idx);
439
+
440
+ return EXIT_SUCCESS;
441
+ }
442
+
443
+ int main(int argc, char** args)
444
+ {
445
+
446
+ std::future<int> thread_01_result = std::async(std::launch::async, thread_func, 1);
447
+
448
+ if(EXIT_SUCCESS != thread_01_result.get()){
449
+ printf("ERROR : thread_01 run failed.\n");
450
+ return EXIT_FAILURE;
451
+ }
452
+
453
+ printf("Exit main function .\n");
454
+ return 0;
455
  }
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/models/cutoff_yolov5n_w8a8.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d8e9700e6f44fc35b2bc8d6bd459d3acda007e4dc09bd92ab450ac1b0f358cb
3
+ size 2124248
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py CHANGED
@@ -1,338 +1,338 @@
1
- import time
2
- import numpy as np
3
- import cv2
4
- import aidlite
5
- import argparse
6
- import os
7
- OBJ_CLASS_NUM = 80
8
- NMS_THRESH = 0.45
9
- BOX_THRESH = 0.5
10
- MODEL_SIZE = 640
11
-
12
- OBJ_NUMB_MAX_SIZE = 64
13
- PROP_BOX_SIZE = (5 + OBJ_CLASS_NUM)
14
- STRIDE8_SIZE = (MODEL_SIZE / 8)
15
- STRIDE16_SIZE = (MODEL_SIZE / 16)
16
- STRIDE32_SIZE = (MODEL_SIZE / 32)
17
-
18
- anchors = [[10, 13, 16, 30, 33, 23],
19
- [30, 61, 62, 45, 59, 119],
20
- [116, 90, 156, 198, 373, 326]]
21
-
22
- current_p =os.path.dirname(os.path.abspath(__file__))
23
-
24
- coco_class = [
25
- 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
26
- 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
27
- 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
28
- 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
29
- 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
30
- 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
31
- 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
32
- 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
33
-
34
-
35
- def eqprocess(image, size1, size2):
36
- h, w, _ = image.shape
37
- mask = np.zeros((size1, size2, 3), dtype=np.float32)
38
- scale1 = h / size1
39
- scale2 = w / size2
40
- if scale1 > scale2:
41
- scale = scale1
42
- else:
43
- scale = scale2
44
- img = cv2.resize(image, (int(w / scale), int(h / scale)))
45
- mask[:int(h / scale), :int(w / scale), :] = img
46
- return mask, scale
47
-
48
-
49
- def xywh2xyxy(x):
50
- '''
51
- Box (center x, center y, width, height) to (x1, y1, x2, y2)
52
- '''
53
- y = np.copy(x)
54
- y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
55
- y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
56
- y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
57
- y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
58
- return y
59
-
60
-
61
- def xyxy2xywh(box):
62
- '''
63
- Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
64
- '''
65
- box[:, 2:] = box[:, 2:] - box[:, :2]
66
- return box
67
-
68
-
69
- def NMS(dets, scores, thresh):
70
- '''
71
- 单类NMS算法
72
- dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
73
- '''
74
- x1 = dets[:, 0]
75
- y1 = dets[:, 1]
76
- x2 = dets[:, 2]
77
- y2 = dets[:, 3]
78
- areas = (y2 - y1 + 1) * (x2 - x1 + 1)
79
- keep = []
80
- index = scores.argsort()[::-1]
81
- while index.size > 0:
82
- i = index[0] # every time the first is the biggst, and add it directly
83
- keep.append(i)
84
- x11 = np.maximum(x1[i], x1[index[1:]]) # calculate the points of overlap
85
- y11 = np.maximum(y1[i], y1[index[1:]])
86
- x22 = np.minimum(x2[i], x2[index[1:]])
87
- y22 = np.minimum(y2[i], y2[index[1:]])
88
- w = np.maximum(0, x22 - x11 + 1) # the weights of overlap
89
- h = np.maximum(0, y22 - y11 + 1) # the height of overlap
90
- overlaps = w * h
91
- ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
92
- idx = np.where(ious <= thresh)[0]
93
- index = index[idx + 1] # because index start from 1
94
-
95
- return keep
96
-
97
-
98
- def clip_coords(boxes, img_shape):
99
- # Clip bounding xyxy bounding boxes to image shape (height, width)
100
- boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0]) # x1
101
- boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1]) # y1
102
- boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2]) # x2
103
- boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3]) # y2
104
-
105
-
106
- def detect_postprocess(prediction, img0shape, img1shape, conf_thres=0.25, iou_thres=0.45):
107
- '''
108
- 检测输出后处理
109
- prediction: aidlite模型预测输出
110
- img0shape: 原始图片shape
111
- img1shape: 输入图片shape
112
- conf_thres: 置信度阈值
113
- iou_thres: IOU阈值
114
- return: list[np.ndarray(N, 5)], 对应类别的坐标框信息, xywh、conf
115
- '''
116
- h, w, _ = img1shape
117
- valid_condidates = prediction[prediction[..., 4] > conf_thres]
118
- valid_condidates[:, 5:] *= valid_condidates[:, 4:5]
119
- valid_condidates[:, :4] = xywh2xyxy(valid_condidates[:, :4])
120
-
121
- max_det = 300
122
- max_wh = 7680
123
- max_nms = 30000
124
- valid_condidates[:, 4] = valid_condidates[:, 5:].max(1)
125
- valid_condidates[:, 5] = valid_condidates[:, 5:].argmax(1)
126
- sort_id = np.argsort(valid_condidates[:, 4])[::-1]
127
- valid_condidates = valid_condidates[sort_id[:max_nms]]
128
- boxes, scores = valid_condidates[:, :4] + valid_condidates[:, 5:6] * max_wh, valid_condidates[:, 4]
129
- index = NMS(boxes, scores, iou_thres)[:max_det]
130
- out_boxes = valid_condidates[index]
131
- clip_coords(out_boxes[:, :4], img0shape)
132
- out_boxes[:, :4] = xyxy2xywh(out_boxes[:, :4])
133
- print("检测到{}个区域".format(len(out_boxes)))
134
- return out_boxes
135
-
136
-
137
- def draw_detect_res(img, det_pred):
138
- '''
139
- 检测结果绘制
140
- '''
141
- img = img.astype(np.uint8)
142
- color_step = int(255 / len(coco_class))
143
- for i in range(len(det_pred)):
144
- x1, y1, x2, y2 = [int(t) for t in det_pred[i][:4]]
145
- score = det_pred[i][4]
146
- cls_id = int(det_pred[i][5])
147
-
148
- print(i + 1, [x1, y1, x2, y2], score, coco_class[cls_id])
149
-
150
- cv2.putText(img, f'{coco_class[cls_id]}', (x1, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
151
- cv2.rectangle(img, (x1, y1), (x2 + x1, y2 + y1), (0, int(cls_id * color_step), int(255 - cls_id * color_step)),
152
- thickness=2)
153
-
154
- return img
155
-
156
-
157
- class Detect():
158
- # YOLOv5 Detect head for detection models
159
- def __init__(self, nc=80, anchors=(), stride=[], image_size=640): # detection layer
160
- super().__init__()
161
- self.nc = nc # number of classes
162
- self.no = nc + 5 # number of outputs per anchor
163
- self.stride = stride
164
- self.nl = len(anchors) # number of detection layers
165
- self.na = len(anchors[0]) // 2 # number of anchors
166
- self.grid, self.anchor_grid = [0] * self.nl, [0] * self.nl
167
- self.anchors = np.array(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
168
-
169
- base_scale = image_size // 8
170
- for i in range(self.nl):
171
- self.grid[i], self.anchor_grid[i] = self._make_grid(base_scale // (2 ** i), base_scale // (2 ** i), i)
172
-
173
- def _make_grid(self, nx=20, ny=20, i=0):
174
- y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
175
- yv, xv = np.meshgrid(y, x)
176
- yv, xv = yv.T, xv.T
177
- # add grid offset, i.e. y = 2.0 * x - 0.5
178
- grid = np.stack((xv, yv), 2)
179
- grid = grid[np.newaxis, np.newaxis, ...]
180
- grid = np.repeat(grid, self.na, axis=1) - 0.5
181
- anchor_grid = self.anchors[i].reshape((1, self.na, 1, 1, 2))
182
- anchor_grid = np.repeat(anchor_grid, repeats=ny, axis=2)
183
- anchor_grid = np.repeat(anchor_grid, repeats=nx, axis=3)
184
- return grid, anchor_grid
185
-
186
- def sigmoid(self, arr):
187
- return 1 / (1 + np.exp(-arr))
188
-
189
- def __call__(self, x):
190
- z = [] # inference output
191
- for i in range(self.nl):
192
- bs, _, ny, nx = x[i].shape
193
- x[i] = x[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
194
- y = self.sigmoid(x[i])
195
- y[..., 0:2] = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i] # xy
196
- y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
197
- z.append(y.reshape(bs, self.na * nx * ny, self.no))
198
-
199
- return np.concatenate(z, 1)
200
-
201
- def main():
202
- args = parser_args()
203
- target_model = args.target_model
204
- model_type = args.model_type
205
- size = int(args.size)
206
- imgs = args.imgs
207
- invoke_nums = int(args.invoke_nums)
208
- print("Start main ... ...")
209
- # aidlite.set_log_level(aidlite.LogLevel.INFO)
210
- # aidlite.log_to_stderr()
211
- # print(f"Aidlite library version : {aidlite.get_library_version()}")
212
- # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
213
-
214
- config = aidlite.Config.create_instance()
215
- if config is None:
216
- print("Create config failed !")
217
- return False
218
-
219
-
220
- config.implement_type = aidlite.ImplementType.TYPE_LOCAL
221
- if model_type.lower()=="qnn":
222
- config.framework_type = aidlite.FrameworkType.TYPE_QNN
223
- elif model_type.lower()=="snpe2" or model_type.lower()=="snpe":
224
- config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
225
-
226
- config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
227
- config.is_quantify_model = 1
228
-
229
-
230
- model = aidlite.Model.create_instance(target_model)
231
- if model is None:
232
- print("Create model failed !")
233
- return False
234
- input_shapes = [[1, size, size, 3]]
235
- output_shapes = [[1, 20, 20, 255], [1, 40, 40, 255], [1, 80, 80, 255]]
236
- model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
237
- output_shapes, aidlite.DataType.TYPE_FLOAT32)
238
-
239
- interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
240
- if interpreter is None:
241
- print("build_interpretper_from_model_and_config failed !")
242
- return None
243
- result = interpreter.init()
244
- if result != 0:
245
- print(f"interpreter init failed !")
246
- return False
247
- result = interpreter.load_model()
248
- if result != 0:
249
- print("interpreter load model failed !")
250
- return False
251
- print("detect model load success!")
252
-
253
- # image process
254
- frame = cv2.imread(imgs)
255
- # 图片做等比缩放
256
- img_processed = np.copy(frame)
257
- [height, width, _] = img_processed.shape
258
- length = max((height, width))
259
- scale = length / size
260
- ratio=[scale,scale]
261
- image = np.zeros((length, length, 3), np.uint8)
262
- image[0:height, 0:width] = img_processed
263
- img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
264
- img_input=cv2.resize(img_input,(size,size))
265
-
266
- mean_data=[0, 0, 0]
267
- std_data=[255, 255, 255]
268
- img_input = (img_input-mean_data)/std_data # HWC
269
-
270
- img_input = img_input.astype(np.float32)
271
-
272
-
273
- # qnn run
274
- invoke_time=[]
275
- for i in range(invoke_nums):
276
- result = interpreter.set_input_tensor(0, img_input.data)
277
- if result != 0:
278
- print("interpreter set_input_tensor() failed")
279
-
280
- t1=time.time()
281
- result = interpreter.invoke()
282
- cost_time = (time.time()-t1)*1000
283
- invoke_time.append(cost_time)
284
-
285
- if result != 0:
286
- print("interpreter set_input_tensor() failed")
287
- stride8 = interpreter.get_output_tensor(0)
288
- stride16 = interpreter.get_output_tensor(1)
289
- stride32 = interpreter.get_output_tensor(2)
290
-
291
-
292
- result = interpreter.destory()
293
-
294
- ## time 统计
295
- max_invoke_time = max(invoke_time)
296
- min_invoke_time = min(invoke_time)
297
- mean_invoke_time = sum(invoke_time)/invoke_nums
298
- var_invoketime=np.var(invoke_time)
299
- print("=======================================")
300
- print(f"QNN inference {invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
301
- print("=======================================")
302
-
303
- ## 后处理
304
- stride = [8, 16, 32]
305
- yolo_head = Detect(OBJ_CLASS_NUM, anchors, stride, MODEL_SIZE)
306
- validCount0 = stride8.reshape(*output_shapes[2]).transpose(0, 3, 1, 2)
307
- validCount1 = stride16.reshape(*output_shapes[1]).transpose(0, 3, 1, 2)
308
- validCount2 = stride32.reshape(*output_shapes[0]).transpose(0, 3, 1, 2)
309
- pred = yolo_head([validCount0, validCount1, validCount2])
310
- det_pred = detect_postprocess(pred, frame.shape, [MODEL_SIZE, MODEL_SIZE, 3], conf_thres=0.5, iou_thres=0.45)
311
- det_pred[np.isnan(det_pred)] = 0.0
312
- det_pred[:, :4] = det_pred[:, :4] * scale
313
- res_img = draw_detect_res(frame, det_pred)
314
-
315
- save_path=os.path.join(current_p,"result.jpg")
316
- cv2.imwrite(save_path, res_img)
317
- print("图片保存在",save_path)
318
- print("=======================================")
319
-
320
- return True
321
-
322
-
323
-
324
-
325
- image_path = os.path.join(current_p,"bus.jpg")
326
- def parser_args():
327
- parser = argparse.ArgumentParser(description="Run model benchmarks")
328
- parser.add_argument('--target_model',type=str,default=os.path.join(current_p,'../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem'),help="inference model path")
329
- parser.add_argument('--imgs',type=str,default=image_path,help="Predict images path")
330
- parser.add_argument('--invoke_nums',type=str,default=10,help="Inference nums")
331
- parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
332
- parser.add_argument('--size',type=str,default=640,help="model input size")
333
- args = parser.parse_args()
334
- return args
335
-
336
- if __name__ == "__main__":
337
- main()
338
-
 
1
+ import time
2
+ import numpy as np
3
+ import cv2
4
+ import aidlite
5
+ import argparse
6
+ import os
7
+ OBJ_CLASS_NUM = 80
8
+ NMS_THRESH = 0.45
9
+ BOX_THRESH = 0.5
10
+ MODEL_SIZE = 640
11
+
12
+ OBJ_NUMB_MAX_SIZE = 64
13
+ PROP_BOX_SIZE = (5 + OBJ_CLASS_NUM)
14
+ STRIDE8_SIZE = (MODEL_SIZE / 8)
15
+ STRIDE16_SIZE = (MODEL_SIZE / 16)
16
+ STRIDE32_SIZE = (MODEL_SIZE / 32)
17
+
18
+ anchors = [[10, 13, 16, 30, 33, 23],
19
+ [30, 61, 62, 45, 59, 119],
20
+ [116, 90, 156, 198, 373, 326]]
21
+
22
+ current_p =os.path.dirname(os.path.abspath(__file__))
23
+
24
+ coco_class = [
25
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
26
+ 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
27
+ 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
28
+ 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
29
+ 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
30
+ 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
31
+ 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
32
+ 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
33
+
34
+
35
+ def eqprocess(image, size1, size2):
36
+ h, w, _ = image.shape
37
+ mask = np.zeros((size1, size2, 3), dtype=np.float32)
38
+ scale1 = h / size1
39
+ scale2 = w / size2
40
+ if scale1 > scale2:
41
+ scale = scale1
42
+ else:
43
+ scale = scale2
44
+ img = cv2.resize(image, (int(w / scale), int(h / scale)))
45
+ mask[:int(h / scale), :int(w / scale), :] = img
46
+ return mask, scale
47
+
48
+
49
+ def xywh2xyxy(x):
50
+ '''
51
+ Box (center x, center y, width, height) to (x1, y1, x2, y2)
52
+ '''
53
+ y = np.copy(x)
54
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
55
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
56
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
57
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
58
+ return y
59
+
60
+
61
+ def xyxy2xywh(box):
62
+ '''
63
+ Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
64
+ '''
65
+ box[:, 2:] = box[:, 2:] - box[:, :2]
66
+ return box
67
+
68
+
69
+ def NMS(dets, scores, thresh):
70
+ '''
71
+ 单类NMS算法
72
+ dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
73
+ '''
74
+ x1 = dets[:, 0]
75
+ y1 = dets[:, 1]
76
+ x2 = dets[:, 2]
77
+ y2 = dets[:, 3]
78
+ areas = (y2 - y1 + 1) * (x2 - x1 + 1)
79
+ keep = []
80
+ index = scores.argsort()[::-1]
81
+ while index.size > 0:
82
+ i = index[0] # every time the first is the biggst, and add it directly
83
+ keep.append(i)
84
+ x11 = np.maximum(x1[i], x1[index[1:]]) # calculate the points of overlap
85
+ y11 = np.maximum(y1[i], y1[index[1:]])
86
+ x22 = np.minimum(x2[i], x2[index[1:]])
87
+ y22 = np.minimum(y2[i], y2[index[1:]])
88
+ w = np.maximum(0, x22 - x11 + 1) # the weights of overlap
89
+ h = np.maximum(0, y22 - y11 + 1) # the height of overlap
90
+ overlaps = w * h
91
+ ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
92
+ idx = np.where(ious <= thresh)[0]
93
+ index = index[idx + 1] # because index start from 1
94
+
95
+ return keep
96
+
97
+
98
+ def clip_coords(boxes, img_shape):
99
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
100
+ boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0]) # x1
101
+ boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1]) # y1
102
+ boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2]) # x2
103
+ boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3]) # y2
104
+
105
+
106
+ def detect_postprocess(prediction, img0shape, img1shape, conf_thres=0.25, iou_thres=0.45):
107
+ '''
108
+ 检测输出后处理
109
+ prediction: aidlite模型预测输出
110
+ img0shape: 原始图片shape
111
+ img1shape: 输入图片shape
112
+ conf_thres: 置信度阈值
113
+ iou_thres: IOU阈值
114
+ return: list[np.ndarray(N, 5)], 对应类别的坐标框信息, xywh、conf
115
+ '''
116
+ h, w, _ = img1shape
117
+ valid_condidates = prediction[prediction[..., 4] > conf_thres]
118
+ valid_condidates[:, 5:] *= valid_condidates[:, 4:5]
119
+ valid_condidates[:, :4] = xywh2xyxy(valid_condidates[:, :4])
120
+
121
+ max_det = 300
122
+ max_wh = 7680
123
+ max_nms = 30000
124
+ valid_condidates[:, 4] = valid_condidates[:, 5:].max(1)
125
+ valid_condidates[:, 5] = valid_condidates[:, 5:].argmax(1)
126
+ sort_id = np.argsort(valid_condidates[:, 4])[::-1]
127
+ valid_condidates = valid_condidates[sort_id[:max_nms]]
128
+ boxes, scores = valid_condidates[:, :4] + valid_condidates[:, 5:6] * max_wh, valid_condidates[:, 4]
129
+ index = NMS(boxes, scores, iou_thres)[:max_det]
130
+ out_boxes = valid_condidates[index]
131
+ clip_coords(out_boxes[:, :4], img0shape)
132
+ out_boxes[:, :4] = xyxy2xywh(out_boxes[:, :4])
133
+ print("检测到{}个区域".format(len(out_boxes)))
134
+ return out_boxes
135
+
136
+
137
+ def draw_detect_res(img, det_pred):
138
+ '''
139
+ 检测结果绘制
140
+ '''
141
+ img = img.astype(np.uint8)
142
+ color_step = int(255 / len(coco_class))
143
+ for i in range(len(det_pred)):
144
+ x1, y1, x2, y2 = [int(t) for t in det_pred[i][:4]]
145
+ score = det_pred[i][4]
146
+ cls_id = int(det_pred[i][5])
147
+
148
+ print(i + 1, [x1, y1, x2, y2], score, coco_class[cls_id])
149
+
150
+ cv2.putText(img, f'{coco_class[cls_id]}', (x1, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
151
+ cv2.rectangle(img, (x1, y1), (x2 + x1, y2 + y1), (0, int(cls_id * color_step), int(255 - cls_id * color_step)),
152
+ thickness=2)
153
+
154
+ return img
155
+
156
+
157
+ class Detect():
158
+ # YOLOv5 Detect head for detection models
159
+ def __init__(self, nc=80, anchors=(), stride=[], image_size=640): # detection layer
160
+ super().__init__()
161
+ self.nc = nc # number of classes
162
+ self.no = nc + 5 # number of outputs per anchor
163
+ self.stride = stride
164
+ self.nl = len(anchors) # number of detection layers
165
+ self.na = len(anchors[0]) // 2 # number of anchors
166
+ self.grid, self.anchor_grid = [0] * self.nl, [0] * self.nl
167
+ self.anchors = np.array(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
168
+
169
+ base_scale = image_size // 8
170
+ for i in range(self.nl):
171
+ self.grid[i], self.anchor_grid[i] = self._make_grid(base_scale // (2 ** i), base_scale // (2 ** i), i)
172
+
173
+ def _make_grid(self, nx=20, ny=20, i=0):
174
+ y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
175
+ yv, xv = np.meshgrid(y, x)
176
+ yv, xv = yv.T, xv.T
177
+ # add grid offset, i.e. y = 2.0 * x - 0.5
178
+ grid = np.stack((xv, yv), 2)
179
+ grid = grid[np.newaxis, np.newaxis, ...]
180
+ grid = np.repeat(grid, self.na, axis=1) - 0.5
181
+ anchor_grid = self.anchors[i].reshape((1, self.na, 1, 1, 2))
182
+ anchor_grid = np.repeat(anchor_grid, repeats=ny, axis=2)
183
+ anchor_grid = np.repeat(anchor_grid, repeats=nx, axis=3)
184
+ return grid, anchor_grid
185
+
186
+ def sigmoid(self, arr):
187
+ return 1 / (1 + np.exp(-arr))
188
+
189
+ def __call__(self, x):
190
+ z = [] # inference output
191
+ for i in range(self.nl):
192
+ bs, _, ny, nx = x[i].shape
193
+ x[i] = x[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
194
+ y = self.sigmoid(x[i])
195
+ y[..., 0:2] = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i] # xy
196
+ y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
197
+ z.append(y.reshape(bs, self.na * nx * ny, self.no))
198
+
199
+ return np.concatenate(z, 1)
200
+
201
+ def main():
202
+ args = parser_args()
203
+ target_model = args.target_model
204
+ model_type = args.model_type
205
+ size = int(args.size)
206
+ imgs = args.imgs
207
+ invoke_nums = int(args.invoke_nums)
208
+ print("Start main ... ...")
209
+ # aidlite.set_log_level(aidlite.LogLevel.INFO)
210
+ # aidlite.log_to_stderr()
211
+ # print(f"Aidlite library version : {aidlite.get_library_version()}")
212
+ # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
213
+
214
+ config = aidlite.Config.create_instance()
215
+ if config is None:
216
+ print("Create config failed !")
217
+ return False
218
+
219
+
220
+ config.implement_type = aidlite.ImplementType.TYPE_LOCAL
221
+ if model_type.lower()=="qnn":
222
+ config.framework_type = aidlite.FrameworkType.TYPE_QNN
223
+ elif model_type.lower()=="snpe2" or model_type.lower()=="snpe":
224
+ config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
225
+
226
+ config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
227
+ config.is_quantify_model = 1
228
+
229
+
230
+ model = aidlite.Model.create_instance(target_model)
231
+ if model is None:
232
+ print("Create model failed !")
233
+ return False
234
+ input_shapes = [[1, size, size, 3]]
235
+ output_shapes = [[1, 20, 20, 255], [1, 40, 40, 255], [1, 80, 80, 255]]
236
+ model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
237
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
238
+
239
+ interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
240
+ if interpreter is None:
241
+ print("build_interpretper_from_model_and_config failed !")
242
+ return None
243
+ result = interpreter.init()
244
+ if result != 0:
245
+ print(f"interpreter init failed !")
246
+ return False
247
+ result = interpreter.load_model()
248
+ if result != 0:
249
+ print("interpreter load model failed !")
250
+ return False
251
+ print("detect model load success!")
252
+
253
+ # image process
254
+ frame = cv2.imread(imgs)
255
+ # 图片做等比缩放
256
+ img_processed = np.copy(frame)
257
+ [height, width, _] = img_processed.shape
258
+ length = max((height, width))
259
+ scale = length / size
260
+ ratio=[scale,scale]
261
+ image = np.zeros((length, length, 3), np.uint8)
262
+ image[0:height, 0:width] = img_processed
263
+ img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
264
+ img_input=cv2.resize(img_input,(size,size))
265
+
266
+ mean_data=[0, 0, 0]
267
+ std_data=[255, 255, 255]
268
+ img_input = (img_input-mean_data)/std_data # HWC
269
+
270
+ img_input = img_input.astype(np.float32)
271
+
272
+
273
+ # qnn run
274
+ invoke_time=[]
275
+ for i in range(invoke_nums):
276
+ result = interpreter.set_input_tensor(0, img_input.data)
277
+ if result != 0:
278
+ print("interpreter set_input_tensor() failed")
279
+
280
+ t1=time.time()
281
+ result = interpreter.invoke()
282
+ cost_time = (time.time()-t1)*1000
283
+ invoke_time.append(cost_time)
284
+
285
+ if result != 0:
286
+ print("interpreter set_input_tensor() failed")
287
+ stride8 = interpreter.get_output_tensor(0)
288
+ stride16 = interpreter.get_output_tensor(1)
289
+ stride32 = interpreter.get_output_tensor(2)
290
+
291
+
292
+ result = interpreter.destory()
293
+
294
+ ## time 统计
295
+ max_invoke_time = max(invoke_time)
296
+ min_invoke_time = min(invoke_time)
297
+ mean_invoke_time = sum(invoke_time)/invoke_nums
298
+ var_invoketime=np.var(invoke_time)
299
+ print("=======================================")
300
+ print(f"QNN inference {invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
301
+ print("=======================================")
302
+
303
+ ## 后处理
304
+ stride = [8, 16, 32]
305
+ yolo_head = Detect(OBJ_CLASS_NUM, anchors, stride, MODEL_SIZE)
306
+ validCount0 = stride8.reshape(*output_shapes[2]).transpose(0, 3, 1, 2)
307
+ validCount1 = stride16.reshape(*output_shapes[1]).transpose(0, 3, 1, 2)
308
+ validCount2 = stride32.reshape(*output_shapes[0]).transpose(0, 3, 1, 2)
309
+ pred = yolo_head([validCount0, validCount1, validCount2])
310
+ det_pred = detect_postprocess(pred, frame.shape, [MODEL_SIZE, MODEL_SIZE, 3], conf_thres=0.5, iou_thres=0.45)
311
+ det_pred[np.isnan(det_pred)] = 0.0
312
+ det_pred[:, :4] = det_pred[:, :4] * scale
313
+ res_img = draw_detect_res(frame, det_pred)
314
+
315
+ save_path=os.path.join(current_p,"result.jpg")
316
+ cv2.imwrite(save_path, res_img)
317
+ print("图片保存在",save_path)
318
+ print("=======================================")
319
+
320
+ return True
321
+
322
+
323
+
324
+
325
+ image_path = os.path.join(current_p,"bus.jpg")
326
+ def parser_args():
327
+ parser = argparse.ArgumentParser(description="Run model benchmarks")
328
+ parser.add_argument('--target_model',type=str,default=os.path.join(current_p,'../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin'),help="inference model path")
329
+ parser.add_argument('--imgs',type=str,default=image_path,help="Predict images path")
330
+ parser.add_argument('--invoke_nums',type=str,default=10,help="Inference nums")
331
+ parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
332
+ parser.add_argument('--size',type=str,default=640,help="model input size")
333
+ args = parser.parse_args()
334
+ return args
335
+
336
+ if __name__ == "__main__":
337
+ main()
338
+
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/README.md CHANGED
@@ -1,60 +1,60 @@
1
- ## Model Information
2
- ### Source model
3
-
4
- - Input shape: 640x640
5
- - Number of parameters: 1.968M
6
- - Model size: 7.56 MB
7
- - Output shape: 1x25200x85
8
-
9
- Source model repository: [yolov5](https://github.com/ultralytics/yolov5)
10
-
11
- ### Converted model
12
-
13
- - Precision: INT8
14
- - Backend: QNN2.16
15
- - Target Device: SNM972
16
-
17
- ## Inference with AidLite SDK
18
-
19
- ### SDK installation
20
- Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
21
-
22
- - install AidLite SDK
23
-
24
- ```bash
25
- # Install the appropriate version of the aidlite sdk
26
- sudo aid-pkg update
27
- sudo aid-pkg install aidlite-sdk
28
- # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
29
- sudo aid-pkg install aidlite-{QNN VERSION}
30
- # eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
31
- ```
32
-
33
- - Verify AidLite SDK
34
-
35
- ```bash
36
- # aidlite sdk c++ check
37
- python3 -c "import aidlite; print(aidlite.get_library_version())"
38
-
39
- # aidlite sdk python check
40
- python3 -c "import aidlite; print(aidlite.get_py_library_version())"
41
- ```
42
-
43
- ### Run python demo
44
-
45
- ```bash
46
- cd python
47
- python3 demo_qnn.py
48
-
49
- ```
50
-
51
- ### Run c++ demo
52
-
53
- ```bash
54
- cd yolov5n/model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp
55
- mkdir build
56
- cd build
57
- cmake ..
58
- make
59
- ./run_yolov5
60
  ```
 
1
+ ## Model Information
2
+ ### Source model
3
+
4
+ - Input shape: 640x640
5
+ - Number of parameters: 1.968M
6
+ - Model size: 7.56 MB
7
+ - Output shape: 1x25200x85
8
+
9
+ Source model repository: [yolov5](https://github.com/ultralytics/yolov5)
10
+
11
+ ### Converted model
12
+
13
+ - Precision: INT8
14
+ - Backend: QNN2.16
15
+ - Target Device: SNM972
16
+
17
+ ## Inference with AidLite SDK
18
+
19
+ ### SDK installation
20
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
21
+
22
+ - install AidLite SDK
23
+
24
+ ```bash
25
+ # Install the appropriate version of the aidlite sdk
26
+ sudo aid-pkg update
27
+ sudo aid-pkg install aidlite-sdk
28
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
29
+ sudo aid-pkg install aidlite-{QNN VERSION}
30
+ # eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
31
+ ```
32
+
33
+ - Verify AidLite SDK
34
+
35
+ ```bash
36
+ # aidlite sdk c++ check
37
+ python3 -c "import aidlite; print(aidlite.get_library_version())"
38
+
39
+ # aidlite sdk python check
40
+ python3 -c "import aidlite; print(aidlite.get_py_library_version())"
41
+ ```
42
+
43
+ ### Run python demo
44
+
45
+ ```bash
46
+ cd python
47
+ python3 demo_qnn.py
48
+
49
+ ```
50
+
51
+ ### Run c++ demo
52
+
53
+ ```bash
54
+ cd yolov5n/model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp
55
+ mkdir build
56
+ cd build
57
+ cmake ..
58
+ make
59
+ ./run_yolov5
60
  ```
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt CHANGED
@@ -1,36 +1,36 @@
1
- cmake_minimum_required(VERSION 3.12)
2
- project(aidlite_cpp_samples)
3
-
4
- set(CMAKE_BUILD_TYPE Release)
5
-
6
- set(OPENCV_INCLUDE_DIR /usr/include/opencv4)
7
- set(OPENCV_LINK_DIR "")
8
- set(OPENCV_LIBS opencv_imgcodecs opencv_imgproc opencv_core) # 如果是静态库 需要注意先后顺序
9
-
10
- set(AIDLITE_INCLUDE_DIR /usr/local/include)
11
- set(AIDLITE_LINK_DIR /usr/local/lib)
12
- set(AIDLITE_LIB aidlite)
13
-
14
- function(func_generate_sample_exe sample_name)
15
-
16
- set(demo_name ${sample_name})
17
-
18
- file(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/${demo_name}.cpp)
19
-
20
- add_executable(${demo_name} ${src_files})
21
-
22
- target_compile_options(${demo_name} PRIVATE -std=c++11)
23
- target_include_directories(${demo_name} PUBLIC ${OPENCV_INCLUDE_DIR} ${AIDLITE_INCLUDE_DIR})
24
- target_link_directories(${demo_name} PUBLIC ${OPENCV_LINK_DIR} ${AIDLITE_LINK_DIR})
25
- target_link_libraries(${demo_name} PUBLIC ${AIDLITE_LIB} ${OPENCV_LIBS} pthread)
26
- message(STATUS "[CMAKEMSG] ${demo_name} need libraries is : ${AIDLITE_LIB} ${OPENCV_LIBS}")
27
-
28
- endfunction()
29
-
30
- set(SAMPLE_LIST run_yolov5)
31
-
32
- FOREACH(sample ${SAMPLE_LIST})
33
- message("prepare to generate cpp sample : ${sample}")
34
-
35
- func_generate_sample_exe(${sample})
36
  ENDFOREACH(sample)
 
1
+ cmake_minimum_required(VERSION 3.12)
2
+ project(aidlite_cpp_samples)
3
+
4
+ set(CMAKE_BUILD_TYPE Release)
5
+
6
+ set(OPENCV_INCLUDE_DIR /usr/include/opencv4)
7
+ set(OPENCV_LINK_DIR "")
8
+ set(OPENCV_LIBS opencv_imgcodecs opencv_imgproc opencv_core) # 如果是静态库 需要注意先后顺序
9
+
10
+ set(AIDLITE_INCLUDE_DIR /usr/local/include)
11
+ set(AIDLITE_LINK_DIR /usr/local/lib)
12
+ set(AIDLITE_LIB aidlite)
13
+
14
+ function(func_generate_sample_exe sample_name)
15
+
16
+ set(demo_name ${sample_name})
17
+
18
+ file(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/${demo_name}.cpp)
19
+
20
+ add_executable(${demo_name} ${src_files})
21
+
22
+ target_compile_options(${demo_name} PRIVATE -std=c++11)
23
+ target_include_directories(${demo_name} PUBLIC ${OPENCV_INCLUDE_DIR} ${AIDLITE_INCLUDE_DIR})
24
+ target_link_directories(${demo_name} PUBLIC ${OPENCV_LINK_DIR} ${AIDLITE_LINK_DIR})
25
+ target_link_libraries(${demo_name} PUBLIC ${AIDLITE_LIB} ${OPENCV_LIBS} pthread)
26
+ message(STATUS "[CMAKEMSG] ${demo_name} need libraries is : ${AIDLITE_LIB} ${OPENCV_LIBS}")
27
+
28
+ endfunction()
29
+
30
+ set(SAMPLE_LIST run_yolov5)
31
+
32
+ FOREACH(sample ${SAMPLE_LIST})
33
+ message("prepare to generate cpp sample : ${sample}")
34
+
35
+ func_generate_sample_exe(${sample})
36
  ENDFOREACH(sample)
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp/run_yolov5.cpp CHANGED
@@ -1,455 +1,455 @@
1
- #include <thread>
2
- #include <future>
3
- #include <opencv2/opencv.hpp>
4
- #include "aidlux/aidlite/aidlite.hpp"
5
-
6
- using namespace Aidlux::Aidlite;
7
- using namespace std;
8
-
9
- #define OBJ_CLASS_NUM 80
10
- #define NMS_THRESH 0.45
11
- #define BOX_THRESH 0.5
12
- #define MODEL_SIZE 640
13
- #define OBJ_NUMB_MAX_SIZE 64
14
- #define PROP_BOX_SIZE (5+OBJ_CLASS_NUM)
15
- #define STRIDE8_SIZE (MODEL_SIZE / 8)
16
- #define STRIDE16_SIZE (MODEL_SIZE / 16)
17
- #define STRIDE32_SIZE (MODEL_SIZE / 32)
18
-
19
- const float anchor0[6] = {10, 13, 16, 30, 33, 23};
20
- const float anchor1[6] = {30, 61, 62, 45, 59, 119};
21
- const float anchor2[6] = {116, 90, 156, 198, 373, 326};
22
-
23
- string class_names[] = {
24
- "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
25
- "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
26
- "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
27
- "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
28
- "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
29
- "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
30
- "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
31
- "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
32
-
33
-
34
- static float sigmoid(float x) { return 1.f / (1.f + exp(-x)); }
35
-
36
- float eqprocess(cv::Mat* src, cv::Mat* dst, int width, int height)
37
- {
38
- int w = src->cols;
39
- int h = src->rows;
40
- float scale_h = float(h) / float(height);
41
- float scale_w = float(w) / float(width);
42
-
43
- float scale;
44
- if (scale_h > scale_w)
45
- {
46
- scale = scale_h;
47
- }
48
- else
49
- {
50
- scale = scale_w;
51
- }
52
-
53
- int rel_width = int(w / scale);
54
- int rel_height = int(h / scale);
55
-
56
- cv::Mat tmp = (*dst)(cv::Rect(0, 0, rel_width, rel_height));
57
- cv::resize(*src, tmp, cv::Size(rel_width, rel_height));
58
- return scale;
59
- }
60
-
61
- std::vector<std::string> split(const std::string& str)
62
- {
63
- std::stringstream ss(str);
64
- std::vector<std::string> elems;
65
- std::string item;
66
- while (std::getline(ss, item, ','))
67
- {
68
- elems.push_back(item);
69
- }
70
- return elems;
71
- }
72
-
73
-
74
- int process(float* output, std::vector<float>& boxes, std::vector<float>& objProbs, std::vector<int>& classId, float * anchor, int grid_h, int grid_w, int stride, int imgsz)
75
- {
76
- int ct = 0;
77
- int validCount = 0;
78
- for (int a = 0; a < 3; a++)
79
- {
80
- for (int i = 0; i < grid_h; i++)
81
- {
82
- for (int j = 0; j < grid_w; j++)
83
- {
84
- int idx = a * PROP_BOX_SIZE + (i * grid_w + j) * 3 * PROP_BOX_SIZE;
85
- float box_confidence = sigmoid(output[idx + 4]);
86
- if (box_confidence >= BOX_THRESH )
87
- {
88
- float box_x = sigmoid(output[idx]) * 2 - 0.5;
89
- float box_y = sigmoid(output[idx + 1]) * 2 - 0.5;
90
- float box_w = pow(sigmoid(output[idx + 2]) * 2, 2);
91
- float box_h = pow(sigmoid(output[idx + 3]) * 2, 2);
92
-
93
- box_x = (box_x + j) * (float)stride;
94
- box_y = (box_y + i) * (float)stride;
95
- box_w = box_w * anchor[a * 2];
96
- box_h = box_h * anchor[a * 2 + 1];
97
-
98
- box_x -= (box_w / 2.0);
99
- box_y -= (box_h / 2.0);
100
-
101
- float maxClassProbs = 0;
102
- int maxClassId = 0;
103
-
104
- for(int k = 0; k < OBJ_CLASS_NUM ; k++)
105
- {
106
- float prob = output[idx + 5 + k];
107
- if (prob > maxClassProbs)
108
- {
109
- maxClassId = k;
110
- maxClassProbs = prob;
111
- }
112
- }
113
- if (maxClassProbs > BOX_THRESH)
114
- {
115
- objProbs.push_back(sigmoid(maxClassProbs) * box_confidence);
116
- classId.push_back(maxClassId);
117
- validCount++;
118
- boxes.push_back(box_x);
119
- boxes.push_back(box_y);
120
- boxes.push_back(box_w);
121
- boxes.push_back(box_h);
122
- }
123
- }
124
- }
125
- }
126
- }
127
-
128
- return validCount;
129
- }
130
-
131
-
132
- static int quick_sort_indice_inverse(std::vector<float>& input, int left, int right, std::vector<int>& indices)
133
- {
134
- float key;
135
- int key_index;
136
- int low = left;
137
- int high = right;
138
- if (left < right) {
139
- key_index = indices[left];
140
- key = input[left];
141
- while (low < high) {
142
- while (low < high && input[high] <= key) {
143
- high--;
144
- }
145
- input[low] = input[high];
146
- indices[low] = indices[high];
147
- while (low < high && input[low] >= key) {
148
- low++;
149
- }
150
- input[high] = input[low];
151
- indices[high] = indices[low];
152
- }
153
- input[low] = key;
154
- indices[low] = key_index;
155
- quick_sort_indice_inverse(input, left, low - 1, indices);
156
- quick_sort_indice_inverse(input, low + 1, right, indices);
157
- }
158
- return low;
159
- }
160
-
161
- static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
162
- float ymax1)
163
- {
164
- float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
165
- float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
166
- float i = w * h;
167
- float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
168
- return u <= 0.f ? 0.f : (i / u);
169
- }
170
-
171
-
172
- static int nms(int validCount, std::vector<float>& outputLocations, std::vector<int> classIds, std::vector<int>& order,
173
- int filterId, float threshold)
174
- {
175
- for (int i = 0; i < validCount; ++i) {
176
- if (order[i] == -1 || classIds[i] != filterId) {
177
- continue;
178
- }
179
- int n = order[i];
180
- for (int j = i + 1; j < validCount; ++j) {
181
- int m = order[j];
182
- if (m == -1 || classIds[i] != filterId) {
183
- continue;
184
- }
185
- float xmin0 = outputLocations[n * 4 + 0];
186
- float ymin0 = outputLocations[n * 4 + 1];
187
- float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
188
- float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
189
-
190
- float xmin1 = outputLocations[m * 4 + 0];
191
- float ymin1 = outputLocations[m * 4 + 1];
192
- float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
193
- float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
194
-
195
- float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
196
-
197
- if (iou > threshold) {
198
- order[j] = -1;
199
- }
200
- }
201
- }
202
- return 0;
203
- }
204
-
205
- int32_t thread_func(int thread_idx){
206
-
207
- printf("entry thread_func[%d]\n", thread_idx);
208
-
209
- std::string image_path = "../bus.jpg";
210
- std::string save_name = "out_yolov5_qnn";
211
- std::string model_path = "../../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem";
212
-
213
- // image process
214
- cv::Mat frame = cv::imread(image_path);
215
- cv::cvtColor(frame, frame , cv::COLOR_BGR2RGB);
216
- cv::Scalar stds_scale(255, 255, 255);
217
- cv::Size target_shape(MODEL_SIZE, MODEL_SIZE);
218
-
219
- cv::Mat frame_resized = cv::Mat::zeros(MODEL_SIZE, MODEL_SIZE, CV_8UC3);
220
- float scale = eqprocess(&frame, &frame_resized, MODEL_SIZE, MODEL_SIZE);
221
-
222
- cv::Mat input_data;
223
- frame_resized.convertTo(input_data, CV_32FC3);
224
- cv::divide(input_data, stds_scale, input_data);
225
-
226
- // model init
227
- printf("Aidlite library version : %s\n", Aidlux::Aidlite::get_library_version().c_str());
228
-
229
- // 以下三个接口请按需组合调用。如果不调用这些函数,默认只打印错误日志到标准错误终端。
230
- Aidlux::Aidlite::set_log_level(Aidlux::Aidlite::LogLevel::INFO);
231
- Aidlux::Aidlite::log_to_stderr();
232
- // Aidlux::Aidlite::log_to_file("./qnn_yolov5_multi_");
233
-
234
- Model* model = Model::create_instance(model_path);
235
- if(model == nullptr){
236
- printf("Create Model object failed !\n");
237
- return EXIT_FAILURE;
238
- }
239
- std::vector<std::vector<uint32_t>> input_shapes = {{1,640,640,3}};
240
- std::vector<std::vector<uint32_t>> output_shapes = {{1,40,40,255}, {1,20,20,255}, {1,80,80,255}};
241
- model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
242
-
243
- Config* config = Config::create_instance();
244
- if(config == nullptr){
245
- printf("Create Config object failed !\n");
246
- return EXIT_FAILURE;
247
- }
248
-
249
- config->implement_type = ImplementType::TYPE_LOCAL;
250
- config->framework_type = FrameworkType::TYPE_QNN216;
251
- config->accelerate_type = AccelerateType::TYPE_DSP;
252
-
253
- std::unique_ptr<Interpreter>&& fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
254
- if(fast_interpreter == nullptr){
255
- printf("build_interpretper_from_model_and_config failed !\n");
256
- return EXIT_FAILURE;
257
- }
258
-
259
- int result = fast_interpreter->init();
260
- if(result != EXIT_SUCCESS){
261
- printf("interpreter->init() failed !\n");
262
- return EXIT_FAILURE;
263
- }
264
-
265
- result = fast_interpreter->load_model();
266
- if(result != EXIT_SUCCESS){
267
- printf("interpreter->load_model() failed !\n");
268
- return EXIT_FAILURE;
269
- }
270
-
271
- printf("load model load success!\n");
272
-
273
- float* stride8 = nullptr;
274
- float* stride16 = nullptr;
275
- float* stride32 = nullptr;
276
-
277
- // post_process
278
- std::vector<float> filterBoxes;
279
- std::vector<float> objProbs;
280
- std::vector<int> classId;
281
-
282
- double sum_time_0 = 0.0, sum_time_1 = 0.0, sum_time_2 = 0.0;
283
- int _counter = 10;
284
- for(int idx = 0; idx < _counter; ++idx){
285
- std::chrono::steady_clock::time_point st0 = std::chrono::steady_clock::now();
286
-
287
- void* input_tensor_data = (void*)input_data.data;
288
- result = fast_interpreter->set_input_tensor(0,input_tensor_data);
289
- if(result != EXIT_SUCCESS){
290
- printf("interpreter->set_input_tensor() failed !\n");
291
- return EXIT_FAILURE;
292
- }
293
-
294
- std::chrono::steady_clock::time_point et0 = std::chrono::steady_clock::now();
295
- std::chrono::steady_clock::duration dur0 = et0 - st0;
296
- printf("current thread_idx[%d] [%d] set_input_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur0).count()*1000);
297
- sum_time_0 += std::chrono::duration<double>(dur0).count()*1000;
298
-
299
- std::chrono::steady_clock::time_point st1 = std::chrono::steady_clock::now();
300
-
301
- result = fast_interpreter->invoke();
302
- if(result != EXIT_SUCCESS){
303
- printf("interpreter->invoke() failed !\n");
304
- return EXIT_FAILURE;
305
- }
306
-
307
- std::chrono::steady_clock::time_point et1 = std::chrono::steady_clock::now();
308
- std::chrono::steady_clock::duration dur1 = et1 - st1;
309
- printf("current thread_idx[%d] [%d] invoke cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur1).count()*1000);
310
- sum_time_1 += std::chrono::duration<double>(dur1).count()*1000;
311
-
312
- std::chrono::steady_clock::time_point st2 = std::chrono::steady_clock::now();
313
-
314
- uint32_t output_tensor_length_0 = 0;
315
- result = fast_interpreter->get_output_tensor(0, (void**)&stride8, &output_tensor_length_0);
316
- if(result != EXIT_SUCCESS){
317
- printf("interpreter->get_output_tensor() 0 failed !\n");
318
- return EXIT_FAILURE;
319
- }
320
- printf("sample : interpreter->get_output_tensor() 0 length is [%d] !\n", output_tensor_length_0);
321
-
322
- uint32_t output_tensor_length_1 = 0;
323
- result = fast_interpreter->get_output_tensor(1, (void**)&stride16, &output_tensor_length_1);
324
- if(result != EXIT_SUCCESS){
325
- printf("interpreter->get_output_tensor() 1 failed !\n");
326
- return EXIT_FAILURE;
327
- }
328
- printf("sample : interpreter->get_output_tensor() 1 length is [%d] !\n", output_tensor_length_1);
329
-
330
- uint32_t output_tensor_length_2 = 0;
331
- result = fast_interpreter->get_output_tensor(2, (void**)&stride32, &output_tensor_length_2);
332
- if(result != EXIT_SUCCESS){
333
- printf("interpreter->get_output_tensor() 2 failed !\n");
334
- return EXIT_FAILURE;
335
- }
336
- printf("sample : interpreter->get_output_tensor() 2 length is [%d] !\n", output_tensor_length_2);
337
-
338
- std::chrono::steady_clock::time_point et2 = std::chrono::steady_clock::now();
339
- std::chrono::steady_clock::duration dur2 = et2 - st2;
340
- printf("current thread_idx[%d] [%d] get_output_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur2).count()*1000);
341
- sum_time_2 += std::chrono::duration<double>(dur2).count()*1000;
342
- }
343
- printf("repeat [%d] time , input[%f] --- invoke[%f] --- output[%f] --- sum[%f]ms\n", _counter, sum_time_0, sum_time_1, sum_time_2, sum_time_0+sum_time_1+sum_time_2);
344
-
345
- std::chrono::steady_clock::time_point pps = std::chrono::steady_clock::now();
346
-
347
- filterBoxes.clear();
348
- objProbs.clear();
349
- classId.clear();
350
- int validCount0 = process(stride8, filterBoxes, objProbs, classId, (float*)anchor0, STRIDE8_SIZE, STRIDE8_SIZE, 8, MODEL_SIZE);
351
- int validCount1 = process(stride16, filterBoxes, objProbs, classId, (float*)anchor1, STRIDE16_SIZE, STRIDE16_SIZE, 16, MODEL_SIZE);
352
- int validCount2 = process(stride32, filterBoxes, objProbs, classId, (float*)anchor2, STRIDE32_SIZE, STRIDE32_SIZE, 32, MODEL_SIZE);
353
-
354
- int validCount = validCount0 + validCount1 +validCount2;
355
-
356
- std::vector<int> indexArray;
357
- for (int i = 0; i < validCount; ++i){
358
- indexArray.push_back(i);
359
- }
360
-
361
- quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
362
-
363
- std::set<int> class_set(std::begin(classId), std::end(classId));
364
-
365
- for (auto c : class_set) {
366
- nms(validCount, filterBoxes, classId, indexArray, c, NMS_THRESH);
367
- }
368
-
369
- std::chrono::steady_clock::time_point ppe = std::chrono::steady_clock::now();
370
- std::chrono::steady_clock::duration durpp = ppe - pps;
371
- printf("postprocess cost time : %f ms\n", std::chrono::duration<double>(durpp).count()*1000);
372
-
373
- // 数据来源于 SNPE2 FP32 CPU 运行结果 [x1, y1, x2, y2] 坐标向下取整
374
- const float expected_box_0[3][4] = {{210, 241, 285, 519}, {473, 229, 560, 522}, {108, 231, 231, 542}};
375
- const float expected_box_5[1][4] = {{91, 131, 551, 464}};
376
-
377
- unsigned int box_count = 0;
378
- unsigned int verify_pass_count = 0;
379
- for (int i = 0; i < validCount; ++i) {
380
-
381
- if (indexArray[i] == -1) {
382
- continue;
383
- }
384
- int n = indexArray[i];
385
-
386
- float x1 = filterBoxes[n * 4 + 0] * scale;
387
- float y1 = filterBoxes[n * 4 + 1] * scale;
388
- float x2 = x1 + filterBoxes[n * 4 + 2] * scale;
389
- float y2 = y1 + filterBoxes[n * 4 + 3] * scale;
390
- int id = classId[n];
391
- float obj_conf = objProbs[i];
392
-
393
- // string show_info = "class " + to_string(id) + ": " + to_string(obj_conf);
394
- string show_info = class_names[id] + ": " + to_string(obj_conf);
395
- cv::putText(frame, show_info.c_str(), cv::Point(x1, y1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2, 2); // color-BGR
396
- cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2, 2, 0);
397
-
398
- // 结果正确性验证
399
- printf("Result id[%d]-x1[%f]-y1[%f]-x2[%f]-y2[%f]\n", id, x1, y1, x2, y2);
400
-
401
- ++box_count;
402
- if(id == 0){
403
- for(int idx = 0; idx < 3; ++idx){
404
- float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
405
- expected_box_0[idx][0], expected_box_0[idx][1], expected_box_0[idx][2], expected_box_0[idx][3]);
406
- printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
407
- if(coverage_ratio > 0.9){
408
- ++verify_pass_count;
409
- break;
410
- }
411
- }
412
- }else if(id == 5){
413
- for(int idx = 0; idx < 1; ++idx){
414
- float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
415
- expected_box_5[idx][0], expected_box_5[idx][1], expected_box_5[idx][2], expected_box_5[idx][3]);
416
- printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
417
- if(coverage_ratio > 0.9){
418
- ++verify_pass_count;
419
- break;
420
- }
421
- }
422
- }else{
423
- printf("ERROR : The Yolov5s model inference result is not the expected classification category.\n");
424
- return EXIT_FAILURE;
425
- }
426
- }
427
-
428
- // 保存结果图片
429
- cv::cvtColor(frame, frame , cv::COLOR_RGB2BGR);
430
- cv::imwrite("result.jpg", frame);
431
-
432
- result = fast_interpreter->destory();
433
- if(result != EXIT_SUCCESS){
434
- printf("interpreter->destory() failed !\n");
435
- return EXIT_FAILURE;
436
- }
437
-
438
- printf("exit thread_func[%d]\n", thread_idx);
439
-
440
- return EXIT_SUCCESS;
441
- }
442
-
443
- int main(int argc, char** args)
444
- {
445
-
446
- std::future<int> thread_01_result = std::async(std::launch::async, thread_func, 1);
447
-
448
- if(EXIT_SUCCESS != thread_01_result.get()){
449
- printf("ERROR : thread_01 run failed.\n");
450
- return EXIT_FAILURE;
451
- }
452
-
453
- printf("Exit main function .\n");
454
- return 0;
455
  }
 
1
+ #include <thread>
2
+ #include <future>
3
+ #include <opencv2/opencv.hpp>
4
+ #include "aidlux/aidlite/aidlite.hpp"
5
+
6
+ using namespace Aidlux::Aidlite;
7
+ using namespace std;
8
+
9
+ #define OBJ_CLASS_NUM 80
10
+ #define NMS_THRESH 0.45
11
+ #define BOX_THRESH 0.5
12
+ #define MODEL_SIZE 640
13
+ #define OBJ_NUMB_MAX_SIZE 64
14
+ #define PROP_BOX_SIZE (5+OBJ_CLASS_NUM)
15
+ #define STRIDE8_SIZE (MODEL_SIZE / 8)
16
+ #define STRIDE16_SIZE (MODEL_SIZE / 16)
17
+ #define STRIDE32_SIZE (MODEL_SIZE / 32)
18
+
19
+ const float anchor0[6] = {10, 13, 16, 30, 33, 23};
20
+ const float anchor1[6] = {30, 61, 62, 45, 59, 119};
21
+ const float anchor2[6] = {116, 90, 156, 198, 373, 326};
22
+
23
+ string class_names[] = {
24
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
25
+ "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
26
+ "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
27
+ "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
28
+ "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
29
+ "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
30
+ "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
31
+ "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
32
+
33
+
34
+ static float sigmoid(float x) { return 1.f / (1.f + exp(-x)); }
35
+
36
+ float eqprocess(cv::Mat* src, cv::Mat* dst, int width, int height)
37
+ {
38
+ int w = src->cols;
39
+ int h = src->rows;
40
+ float scale_h = float(h) / float(height);
41
+ float scale_w = float(w) / float(width);
42
+
43
+ float scale;
44
+ if (scale_h > scale_w)
45
+ {
46
+ scale = scale_h;
47
+ }
48
+ else
49
+ {
50
+ scale = scale_w;
51
+ }
52
+
53
+ int rel_width = int(w / scale);
54
+ int rel_height = int(h / scale);
55
+
56
+ cv::Mat tmp = (*dst)(cv::Rect(0, 0, rel_width, rel_height));
57
+ cv::resize(*src, tmp, cv::Size(rel_width, rel_height));
58
+ return scale;
59
+ }
60
+
61
+ std::vector<std::string> split(const std::string& str)
62
+ {
63
+ std::stringstream ss(str);
64
+ std::vector<std::string> elems;
65
+ std::string item;
66
+ while (std::getline(ss, item, ','))
67
+ {
68
+ elems.push_back(item);
69
+ }
70
+ return elems;
71
+ }
72
+
73
+
74
+ int process(float* output, std::vector<float>& boxes, std::vector<float>& objProbs, std::vector<int>& classId, float * anchor, int grid_h, int grid_w, int stride, int imgsz)
75
+ {
76
+ int ct = 0;
77
+ int validCount = 0;
78
+ for (int a = 0; a < 3; a++)
79
+ {
80
+ for (int i = 0; i < grid_h; i++)
81
+ {
82
+ for (int j = 0; j < grid_w; j++)
83
+ {
84
+ int idx = a * PROP_BOX_SIZE + (i * grid_w + j) * 3 * PROP_BOX_SIZE;
85
+ float box_confidence = sigmoid(output[idx + 4]);
86
+ if (box_confidence >= BOX_THRESH )
87
+ {
88
+ float box_x = sigmoid(output[idx]) * 2 - 0.5;
89
+ float box_y = sigmoid(output[idx + 1]) * 2 - 0.5;
90
+ float box_w = pow(sigmoid(output[idx + 2]) * 2, 2);
91
+ float box_h = pow(sigmoid(output[idx + 3]) * 2, 2);
92
+
93
+ box_x = (box_x + j) * (float)stride;
94
+ box_y = (box_y + i) * (float)stride;
95
+ box_w = box_w * anchor[a * 2];
96
+ box_h = box_h * anchor[a * 2 + 1];
97
+
98
+ box_x -= (box_w / 2.0);
99
+ box_y -= (box_h / 2.0);
100
+
101
+ float maxClassProbs = 0;
102
+ int maxClassId = 0;
103
+
104
+ for(int k = 0; k < OBJ_CLASS_NUM ; k++)
105
+ {
106
+ float prob = output[idx + 5 + k];
107
+ if (prob > maxClassProbs)
108
+ {
109
+ maxClassId = k;
110
+ maxClassProbs = prob;
111
+ }
112
+ }
113
+ if (maxClassProbs > BOX_THRESH)
114
+ {
115
+ objProbs.push_back(sigmoid(maxClassProbs) * box_confidence);
116
+ classId.push_back(maxClassId);
117
+ validCount++;
118
+ boxes.push_back(box_x);
119
+ boxes.push_back(box_y);
120
+ boxes.push_back(box_w);
121
+ boxes.push_back(box_h);
122
+ }
123
+ }
124
+ }
125
+ }
126
+ }
127
+
128
+ return validCount;
129
+ }
130
+
131
+
132
+ static int quick_sort_indice_inverse(std::vector<float>& input, int left, int right, std::vector<int>& indices)
133
+ {
134
+ float key;
135
+ int key_index;
136
+ int low = left;
137
+ int high = right;
138
+ if (left < right) {
139
+ key_index = indices[left];
140
+ key = input[left];
141
+ while (low < high) {
142
+ while (low < high && input[high] <= key) {
143
+ high--;
144
+ }
145
+ input[low] = input[high];
146
+ indices[low] = indices[high];
147
+ while (low < high && input[low] >= key) {
148
+ low++;
149
+ }
150
+ input[high] = input[low];
151
+ indices[high] = indices[low];
152
+ }
153
+ input[low] = key;
154
+ indices[low] = key_index;
155
+ quick_sort_indice_inverse(input, left, low - 1, indices);
156
+ quick_sort_indice_inverse(input, low + 1, right, indices);
157
+ }
158
+ return low;
159
+ }
160
+
161
+ static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
162
+ float ymax1)
163
+ {
164
+ float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
165
+ float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
166
+ float i = w * h;
167
+ float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
168
+ return u <= 0.f ? 0.f : (i / u);
169
+ }
170
+
171
+
172
+ static int nms(int validCount, std::vector<float>& outputLocations, std::vector<int> classIds, std::vector<int>& order,
173
+ int filterId, float threshold)
174
+ {
175
+ for (int i = 0; i < validCount; ++i) {
176
+ if (order[i] == -1 || classIds[i] != filterId) {
177
+ continue;
178
+ }
179
+ int n = order[i];
180
+ for (int j = i + 1; j < validCount; ++j) {
181
+ int m = order[j];
182
+ if (m == -1 || classIds[i] != filterId) {
183
+ continue;
184
+ }
185
+ float xmin0 = outputLocations[n * 4 + 0];
186
+ float ymin0 = outputLocations[n * 4 + 1];
187
+ float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
188
+ float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
189
+
190
+ float xmin1 = outputLocations[m * 4 + 0];
191
+ float ymin1 = outputLocations[m * 4 + 1];
192
+ float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
193
+ float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
194
+
195
+ float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
196
+
197
+ if (iou > threshold) {
198
+ order[j] = -1;
199
+ }
200
+ }
201
+ }
202
+ return 0;
203
+ }
204
+
205
+ int32_t thread_func(int thread_idx){
206
+
207
+ printf("entry thread_func[%d]\n", thread_idx);
208
+
209
+ std::string image_path = "../bus.jpg";
210
+ std::string save_name = "out_yolov5_qnn";
211
+ std::string model_path = "../../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin";
212
+
213
+ // image process
214
+ cv::Mat frame = cv::imread(image_path);
215
+ cv::cvtColor(frame, frame , cv::COLOR_BGR2RGB);
216
+ cv::Scalar stds_scale(255, 255, 255);
217
+ cv::Size target_shape(MODEL_SIZE, MODEL_SIZE);
218
+
219
+ cv::Mat frame_resized = cv::Mat::zeros(MODEL_SIZE, MODEL_SIZE, CV_8UC3);
220
+ float scale = eqprocess(&frame, &frame_resized, MODEL_SIZE, MODEL_SIZE);
221
+
222
+ cv::Mat input_data;
223
+ frame_resized.convertTo(input_data, CV_32FC3);
224
+ cv::divide(input_data, stds_scale, input_data);
225
+
226
+ // model init
227
+ printf("Aidlite library version : %s\n", Aidlux::Aidlite::get_library_version().c_str());
228
+
229
+ // 以下三个接口请按需组合调用。如果不调用这些函数,默认只打印错误日志到标准错误终端。
230
+ Aidlux::Aidlite::set_log_level(Aidlux::Aidlite::LogLevel::INFO);
231
+ Aidlux::Aidlite::log_to_stderr();
232
+ // Aidlux::Aidlite::log_to_file("./qnn_yolov5_multi_");
233
+
234
+ Model* model = Model::create_instance(model_path);
235
+ if(model == nullptr){
236
+ printf("Create Model object failed !\n");
237
+ return EXIT_FAILURE;
238
+ }
239
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,640,640,3}};
240
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,40,40,255}, {1,20,20,255}, {1,80,80,255}};
241
+ model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
242
+
243
+ Config* config = Config::create_instance();
244
+ if(config == nullptr){
245
+ printf("Create Config object failed !\n");
246
+ return EXIT_FAILURE;
247
+ }
248
+
249
+ config->implement_type = ImplementType::TYPE_LOCAL;
250
+ config->framework_type = FrameworkType::TYPE_QNN216;
251
+ config->accelerate_type = AccelerateType::TYPE_DSP;
252
+
253
+ std::unique_ptr<Interpreter>&& fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
254
+ if(fast_interpreter == nullptr){
255
+ printf("build_interpretper_from_model_and_config failed !\n");
256
+ return EXIT_FAILURE;
257
+ }
258
+
259
+ int result = fast_interpreter->init();
260
+ if(result != EXIT_SUCCESS){
261
+ printf("interpreter->init() failed !\n");
262
+ return EXIT_FAILURE;
263
+ }
264
+
265
+ result = fast_interpreter->load_model();
266
+ if(result != EXIT_SUCCESS){
267
+ printf("interpreter->load_model() failed !\n");
268
+ return EXIT_FAILURE;
269
+ }
270
+
271
+ printf("load model load success!\n");
272
+
273
+ float* stride8 = nullptr;
274
+ float* stride16 = nullptr;
275
+ float* stride32 = nullptr;
276
+
277
+ // post_process
278
+ std::vector<float> filterBoxes;
279
+ std::vector<float> objProbs;
280
+ std::vector<int> classId;
281
+
282
+ double sum_time_0 = 0.0, sum_time_1 = 0.0, sum_time_2 = 0.0;
283
+ int _counter = 10;
284
+ for(int idx = 0; idx < _counter; ++idx){
285
+ std::chrono::steady_clock::time_point st0 = std::chrono::steady_clock::now();
286
+
287
+ void* input_tensor_data = (void*)input_data.data;
288
+ result = fast_interpreter->set_input_tensor(0,input_tensor_data);
289
+ if(result != EXIT_SUCCESS){
290
+ printf("interpreter->set_input_tensor() failed !\n");
291
+ return EXIT_FAILURE;
292
+ }
293
+
294
+ std::chrono::steady_clock::time_point et0 = std::chrono::steady_clock::now();
295
+ std::chrono::steady_clock::duration dur0 = et0 - st0;
296
+ printf("current thread_idx[%d] [%d] set_input_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur0).count()*1000);
297
+ sum_time_0 += std::chrono::duration<double>(dur0).count()*1000;
298
+
299
+ std::chrono::steady_clock::time_point st1 = std::chrono::steady_clock::now();
300
+
301
+ result = fast_interpreter->invoke();
302
+ if(result != EXIT_SUCCESS){
303
+ printf("interpreter->invoke() failed !\n");
304
+ return EXIT_FAILURE;
305
+ }
306
+
307
+ std::chrono::steady_clock::time_point et1 = std::chrono::steady_clock::now();
308
+ std::chrono::steady_clock::duration dur1 = et1 - st1;
309
+ printf("current thread_idx[%d] [%d] invoke cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur1).count()*1000);
310
+ sum_time_1 += std::chrono::duration<double>(dur1).count()*1000;
311
+
312
+ std::chrono::steady_clock::time_point st2 = std::chrono::steady_clock::now();
313
+
314
+ uint32_t output_tensor_length_0 = 0;
315
+ result = fast_interpreter->get_output_tensor(0, (void**)&stride8, &output_tensor_length_0);
316
+ if(result != EXIT_SUCCESS){
317
+ printf("interpreter->get_output_tensor() 0 failed !\n");
318
+ return EXIT_FAILURE;
319
+ }
320
+ printf("sample : interpreter->get_output_tensor() 0 length is [%d] !\n", output_tensor_length_0);
321
+
322
+ uint32_t output_tensor_length_1 = 0;
323
+ result = fast_interpreter->get_output_tensor(1, (void**)&stride16, &output_tensor_length_1);
324
+ if(result != EXIT_SUCCESS){
325
+ printf("interpreter->get_output_tensor() 1 failed !\n");
326
+ return EXIT_FAILURE;
327
+ }
328
+ printf("sample : interpreter->get_output_tensor() 1 length is [%d] !\n", output_tensor_length_1);
329
+
330
+ uint32_t output_tensor_length_2 = 0;
331
+ result = fast_interpreter->get_output_tensor(2, (void**)&stride32, &output_tensor_length_2);
332
+ if(result != EXIT_SUCCESS){
333
+ printf("interpreter->get_output_tensor() 2 failed !\n");
334
+ return EXIT_FAILURE;
335
+ }
336
+ printf("sample : interpreter->get_output_tensor() 2 length is [%d] !\n", output_tensor_length_2);
337
+
338
+ std::chrono::steady_clock::time_point et2 = std::chrono::steady_clock::now();
339
+ std::chrono::steady_clock::duration dur2 = et2 - st2;
340
+ printf("current thread_idx[%d] [%d] get_output_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur2).count()*1000);
341
+ sum_time_2 += std::chrono::duration<double>(dur2).count()*1000;
342
+ }
343
+ printf("repeat [%d] time , input[%f] --- invoke[%f] --- output[%f] --- sum[%f]ms\n", _counter, sum_time_0, sum_time_1, sum_time_2, sum_time_0+sum_time_1+sum_time_2);
344
+
345
+ std::chrono::steady_clock::time_point pps = std::chrono::steady_clock::now();
346
+
347
+ filterBoxes.clear();
348
+ objProbs.clear();
349
+ classId.clear();
350
+ int validCount0 = process(stride8, filterBoxes, objProbs, classId, (float*)anchor0, STRIDE8_SIZE, STRIDE8_SIZE, 8, MODEL_SIZE);
351
+ int validCount1 = process(stride16, filterBoxes, objProbs, classId, (float*)anchor1, STRIDE16_SIZE, STRIDE16_SIZE, 16, MODEL_SIZE);
352
+ int validCount2 = process(stride32, filterBoxes, objProbs, classId, (float*)anchor2, STRIDE32_SIZE, STRIDE32_SIZE, 32, MODEL_SIZE);
353
+
354
+ int validCount = validCount0 + validCount1 +validCount2;
355
+
356
+ std::vector<int> indexArray;
357
+ for (int i = 0; i < validCount; ++i){
358
+ indexArray.push_back(i);
359
+ }
360
+
361
+ quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
362
+
363
+ std::set<int> class_set(std::begin(classId), std::end(classId));
364
+
365
+ for (auto c : class_set) {
366
+ nms(validCount, filterBoxes, classId, indexArray, c, NMS_THRESH);
367
+ }
368
+
369
+ std::chrono::steady_clock::time_point ppe = std::chrono::steady_clock::now();
370
+ std::chrono::steady_clock::duration durpp = ppe - pps;
371
+ printf("postprocess cost time : %f ms\n", std::chrono::duration<double>(durpp).count()*1000);
372
+
373
+ // 数据来源于 SNPE2 FP32 CPU 运行结果 [x1, y1, x2, y2] 坐标向下取整
374
+ const float expected_box_0[3][4] = {{210, 241, 285, 519}, {473, 229, 560, 522}, {108, 231, 231, 542}};
375
+ const float expected_box_5[1][4] = {{91, 131, 551, 464}};
376
+
377
+ unsigned int box_count = 0;
378
+ unsigned int verify_pass_count = 0;
379
+ for (int i = 0; i < validCount; ++i) {
380
+
381
+ if (indexArray[i] == -1) {
382
+ continue;
383
+ }
384
+ int n = indexArray[i];
385
+
386
+ float x1 = filterBoxes[n * 4 + 0] * scale;
387
+ float y1 = filterBoxes[n * 4 + 1] * scale;
388
+ float x2 = x1 + filterBoxes[n * 4 + 2] * scale;
389
+ float y2 = y1 + filterBoxes[n * 4 + 3] * scale;
390
+ int id = classId[n];
391
+ float obj_conf = objProbs[i];
392
+
393
+ // string show_info = "class " + to_string(id) + ": " + to_string(obj_conf);
394
+ string show_info = class_names[id] + ": " + to_string(obj_conf);
395
+ cv::putText(frame, show_info.c_str(), cv::Point(x1, y1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2, 2); // color-BGR
396
+ cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2, 2, 0);
397
+
398
+ // 结果正确性验证
399
+ printf("Result id[%d]-x1[%f]-y1[%f]-x2[%f]-y2[%f]\n", id, x1, y1, x2, y2);
400
+
401
+ ++box_count;
402
+ if(id == 0){
403
+ for(int idx = 0; idx < 3; ++idx){
404
+ float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
405
+ expected_box_0[idx][0], expected_box_0[idx][1], expected_box_0[idx][2], expected_box_0[idx][3]);
406
+ printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
407
+ if(coverage_ratio > 0.9){
408
+ ++verify_pass_count;
409
+ break;
410
+ }
411
+ }
412
+ }else if(id == 5){
413
+ for(int idx = 0; idx < 1; ++idx){
414
+ float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
415
+ expected_box_5[idx][0], expected_box_5[idx][1], expected_box_5[idx][2], expected_box_5[idx][3]);
416
+ printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
417
+ if(coverage_ratio > 0.9){
418
+ ++verify_pass_count;
419
+ break;
420
+ }
421
+ }
422
+ }else{
423
+ printf("ERROR : The Yolov5s model inference result is not the expected classification category.\n");
424
+ return EXIT_FAILURE;
425
+ }
426
+ }
427
+
428
+ // 保存结果图片
429
+ cv::cvtColor(frame, frame , cv::COLOR_RGB2BGR);
430
+ cv::imwrite("result.jpg", frame);
431
+
432
+ result = fast_interpreter->destory();
433
+ if(result != EXIT_SUCCESS){
434
+ printf("interpreter->destory() failed !\n");
435
+ return EXIT_FAILURE;
436
+ }
437
+
438
+ printf("exit thread_func[%d]\n", thread_idx);
439
+
440
+ return EXIT_SUCCESS;
441
+ }
442
+
443
+ int main(int argc, char** args)
444
+ {
445
+
446
+ std::future<int> thread_01_result = std::async(std::launch::async, thread_func, 1);
447
+
448
+ if(EXIT_SUCCESS != thread_01_result.get()){
449
+ printf("ERROR : thread_01 run failed.\n");
450
+ return EXIT_FAILURE;
451
+ }
452
+
453
+ printf("Exit main function .\n");
454
+ return 0;
455
  }
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/models/cutoff_yolov5n_w8a8.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aee9e73507ebbffbd59b426b06531d05f4bc8d0ea934880a32804b6cfda720a
3
+ size 2124248
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py CHANGED
@@ -1,338 +1,338 @@
1
- import time
2
- import numpy as np
3
- import cv2
4
- import aidlite
5
- import argparse
6
- import os
7
- OBJ_CLASS_NUM = 80
8
- NMS_THRESH = 0.45
9
- BOX_THRESH = 0.5
10
- MODEL_SIZE = 640
11
-
12
- OBJ_NUMB_MAX_SIZE = 64
13
- PROP_BOX_SIZE = (5 + OBJ_CLASS_NUM)
14
- STRIDE8_SIZE = (MODEL_SIZE / 8)
15
- STRIDE16_SIZE = (MODEL_SIZE / 16)
16
- STRIDE32_SIZE = (MODEL_SIZE / 32)
17
-
18
- anchors = [[10, 13, 16, 30, 33, 23],
19
- [30, 61, 62, 45, 59, 119],
20
- [116, 90, 156, 198, 373, 326]]
21
-
22
- current_p =os.path.dirname(os.path.abspath(__file__))
23
-
24
- coco_class = [
25
- 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
26
- 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
27
- 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
28
- 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
29
- 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
30
- 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
31
- 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
32
- 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
33
-
34
-
35
- def eqprocess(image, size1, size2):
36
- h, w, _ = image.shape
37
- mask = np.zeros((size1, size2, 3), dtype=np.float32)
38
- scale1 = h / size1
39
- scale2 = w / size2
40
- if scale1 > scale2:
41
- scale = scale1
42
- else:
43
- scale = scale2
44
- img = cv2.resize(image, (int(w / scale), int(h / scale)))
45
- mask[:int(h / scale), :int(w / scale), :] = img
46
- return mask, scale
47
-
48
-
49
- def xywh2xyxy(x):
50
- '''
51
- Box (center x, center y, width, height) to (x1, y1, x2, y2)
52
- '''
53
- y = np.copy(x)
54
- y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
55
- y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
56
- y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
57
- y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
58
- return y
59
-
60
-
61
- def xyxy2xywh(box):
62
- '''
63
- Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
64
- '''
65
- box[:, 2:] = box[:, 2:] - box[:, :2]
66
- return box
67
-
68
-
69
- def NMS(dets, scores, thresh):
70
- '''
71
- 单类NMS算法
72
- dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
73
- '''
74
- x1 = dets[:, 0]
75
- y1 = dets[:, 1]
76
- x2 = dets[:, 2]
77
- y2 = dets[:, 3]
78
- areas = (y2 - y1 + 1) * (x2 - x1 + 1)
79
- keep = []
80
- index = scores.argsort()[::-1]
81
- while index.size > 0:
82
- i = index[0] # every time the first is the biggst, and add it directly
83
- keep.append(i)
84
- x11 = np.maximum(x1[i], x1[index[1:]]) # calculate the points of overlap
85
- y11 = np.maximum(y1[i], y1[index[1:]])
86
- x22 = np.minimum(x2[i], x2[index[1:]])
87
- y22 = np.minimum(y2[i], y2[index[1:]])
88
- w = np.maximum(0, x22 - x11 + 1) # the weights of overlap
89
- h = np.maximum(0, y22 - y11 + 1) # the height of overlap
90
- overlaps = w * h
91
- ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
92
- idx = np.where(ious <= thresh)[0]
93
- index = index[idx + 1] # because index start from 1
94
-
95
- return keep
96
-
97
-
98
- def clip_coords(boxes, img_shape):
99
- # Clip bounding xyxy bounding boxes to image shape (height, width)
100
- boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0]) # x1
101
- boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1]) # y1
102
- boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2]) # x2
103
- boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3]) # y2
104
-
105
-
106
- def detect_postprocess(prediction, img0shape, img1shape, conf_thres=0.25, iou_thres=0.45):
107
- '''
108
- 检测输出后处理
109
- prediction: aidlite模型预测输出
110
- img0shape: 原始图片shape
111
- img1shape: 输入图片shape
112
- conf_thres: 置信度阈值
113
- iou_thres: IOU阈值
114
- return: list[np.ndarray(N, 5)], 对应类别的坐标框信息, xywh、conf
115
- '''
116
- h, w, _ = img1shape
117
- valid_condidates = prediction[prediction[..., 4] > conf_thres]
118
- valid_condidates[:, 5:] *= valid_condidates[:, 4:5]
119
- valid_condidates[:, :4] = xywh2xyxy(valid_condidates[:, :4])
120
-
121
- max_det = 300
122
- max_wh = 7680
123
- max_nms = 30000
124
- valid_condidates[:, 4] = valid_condidates[:, 5:].max(1)
125
- valid_condidates[:, 5] = valid_condidates[:, 5:].argmax(1)
126
- sort_id = np.argsort(valid_condidates[:, 4])[::-1]
127
- valid_condidates = valid_condidates[sort_id[:max_nms]]
128
- boxes, scores = valid_condidates[:, :4] + valid_condidates[:, 5:6] * max_wh, valid_condidates[:, 4]
129
- index = NMS(boxes, scores, iou_thres)[:max_det]
130
- out_boxes = valid_condidates[index]
131
- clip_coords(out_boxes[:, :4], img0shape)
132
- out_boxes[:, :4] = xyxy2xywh(out_boxes[:, :4])
133
- print("检测到{}个区域".format(len(out_boxes)))
134
- return out_boxes
135
-
136
-
137
- def draw_detect_res(img, det_pred):
138
- '''
139
- 检测结果绘制
140
- '''
141
- img = img.astype(np.uint8)
142
- color_step = int(255 / len(coco_class))
143
- for i in range(len(det_pred)):
144
- x1, y1, x2, y2 = [int(t) for t in det_pred[i][:4]]
145
- score = det_pred[i][4]
146
- cls_id = int(det_pred[i][5])
147
-
148
- print(i + 1, [x1, y1, x2, y2], score, coco_class[cls_id])
149
-
150
- cv2.putText(img, f'{coco_class[cls_id]}', (x1, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
151
- cv2.rectangle(img, (x1, y1), (x2 + x1, y2 + y1), (0, int(cls_id * color_step), int(255 - cls_id * color_step)),
152
- thickness=2)
153
-
154
- return img
155
-
156
-
157
- class Detect():
158
- # YOLOv5 Detect head for detection models
159
- def __init__(self, nc=80, anchors=(), stride=[], image_size=640): # detection layer
160
- super().__init__()
161
- self.nc = nc # number of classes
162
- self.no = nc + 5 # number of outputs per anchor
163
- self.stride = stride
164
- self.nl = len(anchors) # number of detection layers
165
- self.na = len(anchors[0]) // 2 # number of anchors
166
- self.grid, self.anchor_grid = [0] * self.nl, [0] * self.nl
167
- self.anchors = np.array(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
168
-
169
- base_scale = image_size // 8
170
- for i in range(self.nl):
171
- self.grid[i], self.anchor_grid[i] = self._make_grid(base_scale // (2 ** i), base_scale // (2 ** i), i)
172
-
173
- def _make_grid(self, nx=20, ny=20, i=0):
174
- y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
175
- yv, xv = np.meshgrid(y, x)
176
- yv, xv = yv.T, xv.T
177
- # add grid offset, i.e. y = 2.0 * x - 0.5
178
- grid = np.stack((xv, yv), 2)
179
- grid = grid[np.newaxis, np.newaxis, ...]
180
- grid = np.repeat(grid, self.na, axis=1) - 0.5
181
- anchor_grid = self.anchors[i].reshape((1, self.na, 1, 1, 2))
182
- anchor_grid = np.repeat(anchor_grid, repeats=ny, axis=2)
183
- anchor_grid = np.repeat(anchor_grid, repeats=nx, axis=3)
184
- return grid, anchor_grid
185
-
186
- def sigmoid(self, arr):
187
- return 1 / (1 + np.exp(-arr))
188
-
189
- def __call__(self, x):
190
- z = [] # inference output
191
- for i in range(self.nl):
192
- bs, _, ny, nx = x[i].shape
193
- x[i] = x[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
194
- y = self.sigmoid(x[i])
195
- y[..., 0:2] = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i] # xy
196
- y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
197
- z.append(y.reshape(bs, self.na * nx * ny, self.no))
198
-
199
- return np.concatenate(z, 1)
200
-
201
- def main():
202
- args = parser_args()
203
- target_model = args.target_model
204
- model_type = args.model_type
205
- size = int(args.size)
206
- imgs = args.imgs
207
- invoke_nums = int(args.invoke_nums)
208
- print("Start main ... ...")
209
- # aidlite.set_log_level(aidlite.LogLevel.INFO)
210
- # aidlite.log_to_stderr()
211
- # print(f"Aidlite library version : {aidlite.get_library_version()}")
212
- # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
213
-
214
- config = aidlite.Config.create_instance()
215
- if config is None:
216
- print("Create config failed !")
217
- return False
218
-
219
-
220
- config.implement_type = aidlite.ImplementType.TYPE_LOCAL
221
- if model_type.lower()=="qnn":
222
- config.framework_type = aidlite.FrameworkType.TYPE_QNN
223
- elif model_type.lower()=="snpe2" or model_type.lower()=="snpe":
224
- config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
225
-
226
- config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
227
- config.is_quantify_model = 1
228
-
229
-
230
- model = aidlite.Model.create_instance(target_model)
231
- if model is None:
232
- print("Create model failed !")
233
- return False
234
- input_shapes = [[1, size, size, 3]]
235
- output_shapes = [[1, 20, 20, 255], [1, 40, 40, 255], [1, 80, 80, 255]]
236
- model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
237
- output_shapes, aidlite.DataType.TYPE_FLOAT32)
238
-
239
- interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
240
- if interpreter is None:
241
- print("build_interpretper_from_model_and_config failed !")
242
- return None
243
- result = interpreter.init()
244
- if result != 0:
245
- print(f"interpreter init failed !")
246
- return False
247
- result = interpreter.load_model()
248
- if result != 0:
249
- print("interpreter load model failed !")
250
- return False
251
- print("detect model load success!")
252
-
253
- # image process
254
- frame = cv2.imread(imgs)
255
- # 图片做等比缩放
256
- img_processed = np.copy(frame)
257
- [height, width, _] = img_processed.shape
258
- length = max((height, width))
259
- scale = length / size
260
- ratio=[scale,scale]
261
- image = np.zeros((length, length, 3), np.uint8)
262
- image[0:height, 0:width] = img_processed
263
- img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
264
- img_input=cv2.resize(img_input,(size,size))
265
-
266
- mean_data=[0, 0, 0]
267
- std_data=[255, 255, 255]
268
- img_input = (img_input-mean_data)/std_data # HWC
269
-
270
- img_input = img_input.astype(np.float32)
271
-
272
-
273
- # qnn run
274
- invoke_time=[]
275
- for i in range(invoke_nums):
276
- result = interpreter.set_input_tensor(0, img_input.data)
277
- if result != 0:
278
- print("interpreter set_input_tensor() failed")
279
-
280
- t1=time.time()
281
- result = interpreter.invoke()
282
- cost_time = (time.time()-t1)*1000
283
- invoke_time.append(cost_time)
284
-
285
- if result != 0:
286
- print("interpreter set_input_tensor() failed")
287
- stride8 = interpreter.get_output_tensor(0)
288
- stride16 = interpreter.get_output_tensor(1)
289
- stride32 = interpreter.get_output_tensor(2)
290
-
291
-
292
- result = interpreter.destory()
293
-
294
- ## time 统计
295
- max_invoke_time = max(invoke_time)
296
- min_invoke_time = min(invoke_time)
297
- mean_invoke_time = sum(invoke_time)/invoke_nums
298
- var_invoketime=np.var(invoke_time)
299
- print("=======================================")
300
- print(f"QNN inference {invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
301
- print("=======================================")
302
-
303
- ## 后处理
304
- stride = [8, 16, 32]
305
- yolo_head = Detect(OBJ_CLASS_NUM, anchors, stride, MODEL_SIZE)
306
- validCount0 = stride8.reshape(*output_shapes[2]).transpose(0, 3, 1, 2)
307
- validCount1 = stride16.reshape(*output_shapes[1]).transpose(0, 3, 1, 2)
308
- validCount2 = stride32.reshape(*output_shapes[0]).transpose(0, 3, 1, 2)
309
- pred = yolo_head([validCount0, validCount1, validCount2])
310
- det_pred = detect_postprocess(pred, frame.shape, [MODEL_SIZE, MODEL_SIZE, 3], conf_thres=0.5, iou_thres=0.45)
311
- det_pred[np.isnan(det_pred)] = 0.0
312
- det_pred[:, :4] = det_pred[:, :4] * scale
313
- res_img = draw_detect_res(frame, det_pred)
314
-
315
- save_path=os.path.join(current_p,"result.jpg")
316
- cv2.imwrite(save_path, res_img)
317
- print("图片保存在",save_path)
318
- print("=======================================")
319
-
320
- return True
321
-
322
-
323
-
324
-
325
- image_path = os.path.join(current_p,"bus.jpg")
326
- def parser_args():
327
- parser = argparse.ArgumentParser(description="Run model benchmarks")
328
- parser.add_argument('--target_model',type=str,default=os.path.join(current_p,'../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem'),help="inference model path")
329
- parser.add_argument('--imgs',type=str,default=image_path,help="Predict images path")
330
- parser.add_argument('--invoke_nums',type=str,default=10,help="Inference nums")
331
- parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
332
- parser.add_argument('--size',type=str,default=640,help="model input size")
333
- args = parser.parse_args()
334
- return args
335
-
336
- if __name__ == "__main__":
337
- main()
338
-
 
1
+ import time
2
+ import numpy as np
3
+ import cv2
4
+ import aidlite
5
+ import argparse
6
+ import os
7
+ OBJ_CLASS_NUM = 80
8
+ NMS_THRESH = 0.45
9
+ BOX_THRESH = 0.5
10
+ MODEL_SIZE = 640
11
+
12
+ OBJ_NUMB_MAX_SIZE = 64
13
+ PROP_BOX_SIZE = (5 + OBJ_CLASS_NUM)
14
+ STRIDE8_SIZE = (MODEL_SIZE / 8)
15
+ STRIDE16_SIZE = (MODEL_SIZE / 16)
16
+ STRIDE32_SIZE = (MODEL_SIZE / 32)
17
+
18
+ anchors = [[10, 13, 16, 30, 33, 23],
19
+ [30, 61, 62, 45, 59, 119],
20
+ [116, 90, 156, 198, 373, 326]]
21
+
22
+ current_p =os.path.dirname(os.path.abspath(__file__))
23
+
24
+ coco_class = [
25
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
26
+ 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
27
+ 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
28
+ 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
29
+ 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
30
+ 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
31
+ 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
32
+ 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
33
+
34
+
35
+ def eqprocess(image, size1, size2):
36
+ h, w, _ = image.shape
37
+ mask = np.zeros((size1, size2, 3), dtype=np.float32)
38
+ scale1 = h / size1
39
+ scale2 = w / size2
40
+ if scale1 > scale2:
41
+ scale = scale1
42
+ else:
43
+ scale = scale2
44
+ img = cv2.resize(image, (int(w / scale), int(h / scale)))
45
+ mask[:int(h / scale), :int(w / scale), :] = img
46
+ return mask, scale
47
+
48
+
49
+ def xywh2xyxy(x):
50
+ '''
51
+ Box (center x, center y, width, height) to (x1, y1, x2, y2)
52
+ '''
53
+ y = np.copy(x)
54
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
55
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
56
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
57
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
58
+ return y
59
+
60
+
61
+ def xyxy2xywh(box):
62
+ '''
63
+ Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
64
+ '''
65
+ box[:, 2:] = box[:, 2:] - box[:, :2]
66
+ return box
67
+
68
+
69
+ def NMS(dets, scores, thresh):
70
+ '''
71
+ 单类NMS算法
72
+ dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
73
+ '''
74
+ x1 = dets[:, 0]
75
+ y1 = dets[:, 1]
76
+ x2 = dets[:, 2]
77
+ y2 = dets[:, 3]
78
+ areas = (y2 - y1 + 1) * (x2 - x1 + 1)
79
+ keep = []
80
+ index = scores.argsort()[::-1]
81
+ while index.size > 0:
82
+ i = index[0] # every time the first is the biggst, and add it directly
83
+ keep.append(i)
84
+ x11 = np.maximum(x1[i], x1[index[1:]]) # calculate the points of overlap
85
+ y11 = np.maximum(y1[i], y1[index[1:]])
86
+ x22 = np.minimum(x2[i], x2[index[1:]])
87
+ y22 = np.minimum(y2[i], y2[index[1:]])
88
+ w = np.maximum(0, x22 - x11 + 1) # the weights of overlap
89
+ h = np.maximum(0, y22 - y11 + 1) # the height of overlap
90
+ overlaps = w * h
91
+ ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
92
+ idx = np.where(ious <= thresh)[0]
93
+ index = index[idx + 1] # because index start from 1
94
+
95
+ return keep
96
+
97
+
98
+ def clip_coords(boxes, img_shape):
99
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
100
+ boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0]) # x1
101
+ boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1]) # y1
102
+ boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2]) # x2
103
+ boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3]) # y2
104
+
105
+
106
+ def detect_postprocess(prediction, img0shape, img1shape, conf_thres=0.25, iou_thres=0.45):
107
+ '''
108
+ 检测输出后处理
109
+ prediction: aidlite模型预测输出
110
+ img0shape: 原始图片shape
111
+ img1shape: 输入图片shape
112
+ conf_thres: 置信度阈值
113
+ iou_thres: IOU阈值
114
+ return: list[np.ndarray(N, 5)], 对应类别的坐标框信息, xywh、conf
115
+ '''
116
+ h, w, _ = img1shape
117
+ valid_condidates = prediction[prediction[..., 4] > conf_thres]
118
+ valid_condidates[:, 5:] *= valid_condidates[:, 4:5]
119
+ valid_condidates[:, :4] = xywh2xyxy(valid_condidates[:, :4])
120
+
121
+ max_det = 300
122
+ max_wh = 7680
123
+ max_nms = 30000
124
+ valid_condidates[:, 4] = valid_condidates[:, 5:].max(1)
125
+ valid_condidates[:, 5] = valid_condidates[:, 5:].argmax(1)
126
+ sort_id = np.argsort(valid_condidates[:, 4])[::-1]
127
+ valid_condidates = valid_condidates[sort_id[:max_nms]]
128
+ boxes, scores = valid_condidates[:, :4] + valid_condidates[:, 5:6] * max_wh, valid_condidates[:, 4]
129
+ index = NMS(boxes, scores, iou_thres)[:max_det]
130
+ out_boxes = valid_condidates[index]
131
+ clip_coords(out_boxes[:, :4], img0shape)
132
+ out_boxes[:, :4] = xyxy2xywh(out_boxes[:, :4])
133
+ print("检测到{}个区域".format(len(out_boxes)))
134
+ return out_boxes
135
+
136
+
137
+ def draw_detect_res(img, det_pred):
138
+ '''
139
+ 检测结果绘制
140
+ '''
141
+ img = img.astype(np.uint8)
142
+ color_step = int(255 / len(coco_class))
143
+ for i in range(len(det_pred)):
144
+ x1, y1, x2, y2 = [int(t) for t in det_pred[i][:4]]
145
+ score = det_pred[i][4]
146
+ cls_id = int(det_pred[i][5])
147
+
148
+ print(i + 1, [x1, y1, x2, y2], score, coco_class[cls_id])
149
+
150
+ cv2.putText(img, f'{coco_class[cls_id]}', (x1, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
151
+ cv2.rectangle(img, (x1, y1), (x2 + x1, y2 + y1), (0, int(cls_id * color_step), int(255 - cls_id * color_step)),
152
+ thickness=2)
153
+
154
+ return img
155
+
156
+
157
+ class Detect():
158
+ # YOLOv5 Detect head for detection models
159
+ def __init__(self, nc=80, anchors=(), stride=[], image_size=640): # detection layer
160
+ super().__init__()
161
+ self.nc = nc # number of classes
162
+ self.no = nc + 5 # number of outputs per anchor
163
+ self.stride = stride
164
+ self.nl = len(anchors) # number of detection layers
165
+ self.na = len(anchors[0]) // 2 # number of anchors
166
+ self.grid, self.anchor_grid = [0] * self.nl, [0] * self.nl
167
+ self.anchors = np.array(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
168
+
169
+ base_scale = image_size // 8
170
+ for i in range(self.nl):
171
+ self.grid[i], self.anchor_grid[i] = self._make_grid(base_scale // (2 ** i), base_scale // (2 ** i), i)
172
+
173
+ def _make_grid(self, nx=20, ny=20, i=0):
174
+ y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
175
+ yv, xv = np.meshgrid(y, x)
176
+ yv, xv = yv.T, xv.T
177
+ # add grid offset, i.e. y = 2.0 * x - 0.5
178
+ grid = np.stack((xv, yv), 2)
179
+ grid = grid[np.newaxis, np.newaxis, ...]
180
+ grid = np.repeat(grid, self.na, axis=1) - 0.5
181
+ anchor_grid = self.anchors[i].reshape((1, self.na, 1, 1, 2))
182
+ anchor_grid = np.repeat(anchor_grid, repeats=ny, axis=2)
183
+ anchor_grid = np.repeat(anchor_grid, repeats=nx, axis=3)
184
+ return grid, anchor_grid
185
+
186
+ def sigmoid(self, arr):
187
+ return 1 / (1 + np.exp(-arr))
188
+
189
+ def __call__(self, x):
190
+ z = [] # inference output
191
+ for i in range(self.nl):
192
+ bs, _, ny, nx = x[i].shape
193
+ x[i] = x[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
194
+ y = self.sigmoid(x[i])
195
+ y[..., 0:2] = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i] # xy
196
+ y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
197
+ z.append(y.reshape(bs, self.na * nx * ny, self.no))
198
+
199
+ return np.concatenate(z, 1)
200
+
201
+ def main():
202
+ args = parser_args()
203
+ target_model = args.target_model
204
+ model_type = args.model_type
205
+ size = int(args.size)
206
+ imgs = args.imgs
207
+ invoke_nums = int(args.invoke_nums)
208
+ print("Start main ... ...")
209
+ # aidlite.set_log_level(aidlite.LogLevel.INFO)
210
+ # aidlite.log_to_stderr()
211
+ # print(f"Aidlite library version : {aidlite.get_library_version()}")
212
+ # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
213
+
214
+ config = aidlite.Config.create_instance()
215
+ if config is None:
216
+ print("Create config failed !")
217
+ return False
218
+
219
+
220
+ config.implement_type = aidlite.ImplementType.TYPE_LOCAL
221
+ if model_type.lower()=="qnn":
222
+ config.framework_type = aidlite.FrameworkType.TYPE_QNN
223
+ elif model_type.lower()=="snpe2" or model_type.lower()=="snpe":
224
+ config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
225
+
226
+ config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
227
+ config.is_quantify_model = 1
228
+
229
+
230
+ model = aidlite.Model.create_instance(target_model)
231
+ if model is None:
232
+ print("Create model failed !")
233
+ return False
234
+ input_shapes = [[1, size, size, 3]]
235
+ output_shapes = [[1, 20, 20, 255], [1, 40, 40, 255], [1, 80, 80, 255]]
236
+ model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
237
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
238
+
239
+ interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
240
+ if interpreter is None:
241
+ print("build_interpretper_from_model_and_config failed !")
242
+ return None
243
+ result = interpreter.init()
244
+ if result != 0:
245
+ print(f"interpreter init failed !")
246
+ return False
247
+ result = interpreter.load_model()
248
+ if result != 0:
249
+ print("interpreter load model failed !")
250
+ return False
251
+ print("detect model load success!")
252
+
253
+ # image process
254
+ frame = cv2.imread(imgs)
255
+ # 图片做等比缩放
256
+ img_processed = np.copy(frame)
257
+ [height, width, _] = img_processed.shape
258
+ length = max((height, width))
259
+ scale = length / size
260
+ ratio=[scale,scale]
261
+ image = np.zeros((length, length, 3), np.uint8)
262
+ image[0:height, 0:width] = img_processed
263
+ img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
264
+ img_input=cv2.resize(img_input,(size,size))
265
+
266
+ mean_data=[0, 0, 0]
267
+ std_data=[255, 255, 255]
268
+ img_input = (img_input-mean_data)/std_data # HWC
269
+
270
+ img_input = img_input.astype(np.float32)
271
+
272
+
273
+ # qnn run
274
+ invoke_time=[]
275
+ for i in range(invoke_nums):
276
+ result = interpreter.set_input_tensor(0, img_input.data)
277
+ if result != 0:
278
+ print("interpreter set_input_tensor() failed")
279
+
280
+ t1=time.time()
281
+ result = interpreter.invoke()
282
+ cost_time = (time.time()-t1)*1000
283
+ invoke_time.append(cost_time)
284
+
285
+ if result != 0:
286
+ print("interpreter set_input_tensor() failed")
287
+ stride8 = interpreter.get_output_tensor(0)
288
+ stride16 = interpreter.get_output_tensor(1)
289
+ stride32 = interpreter.get_output_tensor(2)
290
+
291
+
292
+ result = interpreter.destory()
293
+
294
+ ## time 统计
295
+ max_invoke_time = max(invoke_time)
296
+ min_invoke_time = min(invoke_time)
297
+ mean_invoke_time = sum(invoke_time)/invoke_nums
298
+ var_invoketime=np.var(invoke_time)
299
+ print("=======================================")
300
+ print(f"QNN inference {invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
301
+ print("=======================================")
302
+
303
+ ## 后处理
304
+ stride = [8, 16, 32]
305
+ yolo_head = Detect(OBJ_CLASS_NUM, anchors, stride, MODEL_SIZE)
306
+ validCount0 = stride8.reshape(*output_shapes[2]).transpose(0, 3, 1, 2)
307
+ validCount1 = stride16.reshape(*output_shapes[1]).transpose(0, 3, 1, 2)
308
+ validCount2 = stride32.reshape(*output_shapes[0]).transpose(0, 3, 1, 2)
309
+ pred = yolo_head([validCount0, validCount1, validCount2])
310
+ det_pred = detect_postprocess(pred, frame.shape, [MODEL_SIZE, MODEL_SIZE, 3], conf_thres=0.5, iou_thres=0.45)
311
+ det_pred[np.isnan(det_pred)] = 0.0
312
+ det_pred[:, :4] = det_pred[:, :4] * scale
313
+ res_img = draw_detect_res(frame, det_pred)
314
+
315
+ save_path=os.path.join(current_p,"result.jpg")
316
+ cv2.imwrite(save_path, res_img)
317
+ print("图片保存在",save_path)
318
+ print("=======================================")
319
+
320
+ return True
321
+
322
+
323
+
324
+
325
+ image_path = os.path.join(current_p,"bus.jpg")
326
+ def parser_args():
327
+ parser = argparse.ArgumentParser(description="Run model benchmarks")
328
+ parser.add_argument('--target_model',type=str,default=os.path.join(current_p,'../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin'),help="inference model path")
329
+ parser.add_argument('--imgs',type=str,default=image_path,help="Predict images path")
330
+ parser.add_argument('--invoke_nums',type=str,default=10,help="Inference nums")
331
+ parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
332
+ parser.add_argument('--size',type=str,default=640,help="model input size")
333
+ args = parser.parse_args()
334
+ return args
335
+
336
+ if __name__ == "__main__":
337
+ main()
338
+