qc903113684 commited on
Commit
b75be27
·
verified ·
1 Parent(s): 840c2fd

Upload 16 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp/bus.jpg filter=lfs diff=lfs merge=lfs -text
37
+ model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/python/bus.jpg filter=lfs diff=lfs merge=lfs -text
38
+ model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/python/result.jpg filter=lfs diff=lfs merge=lfs -text
39
+ model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp/bus.jpg filter=lfs diff=lfs merge=lfs -text
40
+ model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/python/bus.jpg filter=lfs diff=lfs merge=lfs -text
41
+ model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/python/result.jpg filter=lfs diff=lfs merge=lfs -text
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+
4
+ - Input shape: 640x640
5
+ - Number of parameters: 1.968M
6
+ - Model size: 7.56 MB
7
+ - Output shape: 1x25200x85
8
+
9
+ Source model repository: [yolov5](https://github.com/ultralytics/yolov5)
10
+
11
+ ### Converted model
12
+
13
+ - Precision: INT8
14
+ - Backend: QNN2.16
15
+ - Target Device: FV01 QCS6490
16
+
17
+ ## Inference with AidLite SDK
18
+
19
+ ### SDK installation
20
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
21
+
22
+ - install AidLite SDK
23
+
24
+ ```bash
25
+ # Install the appropriate version of the aidlite sdk
26
+ sudo aid-pkg update
27
+ sudo aid-pkg install aidlite-sdk
28
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
29
+ sudo aid-pkg install aidlite-{QNN VERSION}
30
+ # eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
31
+ ```
32
+
33
+ - Verify AidLite SDK
34
+
35
+ ```bash
36
+ # aidlite sdk c++ check
37
+ python3 -c "import aidlite; print(aidlite.get_library_version())"
38
+
39
+ # aidlite sdk python check
40
+ python3 -c "import aidlite; print(aidlite.get_py_library_version())"
41
+ ```
42
+
43
+ ### Run python demo
44
+
45
+ ```bash
46
+ cd python
47
+ python3 demo_qnn.py
48
+
49
+ ```
50
+
51
+ ### Run c++ demo
52
+
53
+ ```bash
54
+ cd yolov5n/model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp
55
+ mkdir build
56
+ cd build
57
+ cmake ..
58
+ make
59
+ ./run_yolov5
60
+ ```
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.12)
2
+ project(aidlite_cpp_samples)
3
+
4
+ set(CMAKE_BUILD_TYPE Release)
5
+
6
+ set(OPENCV_INCLUDE_DIR /usr/include/opencv4)
7
+ set(OPENCV_LINK_DIR "")
8
+ set(OPENCV_LIBS opencv_imgcodecs opencv_imgproc opencv_core) # 如果是静态库 需要注意先后顺序
9
+
10
+ set(AIDLITE_INCLUDE_DIR /usr/local/include)
11
+ set(AIDLITE_LINK_DIR /usr/local/lib)
12
+ set(AIDLITE_LIB aidlite)
13
+
14
+ function(func_generate_sample_exe sample_name)
15
+
16
+ set(demo_name ${sample_name})
17
+
18
+ file(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/${demo_name}.cpp)
19
+
20
+ add_executable(${demo_name} ${src_files})
21
+
22
+ target_compile_options(${demo_name} PRIVATE -std=c++11)
23
+ target_include_directories(${demo_name} PUBLIC ${OPENCV_INCLUDE_DIR} ${AIDLITE_INCLUDE_DIR})
24
+ target_link_directories(${demo_name} PUBLIC ${OPENCV_LINK_DIR} ${AIDLITE_LINK_DIR})
25
+ target_link_libraries(${demo_name} PUBLIC ${AIDLITE_LIB} ${OPENCV_LIBS} pthread)
26
+ message(STATUS "[CMAKEMSG] ${demo_name} need libraries is : ${AIDLITE_LIB} ${OPENCV_LIBS}")
27
+
28
+ endfunction()
29
+
30
+ set(SAMPLE_LIST run_yolov5)
31
+
32
+ FOREACH(sample ${SAMPLE_LIST})
33
+ message("prepare to generate cpp sample : ${sample}")
34
+
35
+ func_generate_sample_exe(${sample})
36
+ ENDFOREACH(sample)
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp/bus.jpg ADDED

Git LFS Details

  • SHA256: fb4914d123d97c440cd127ef0e98d4bdc68cd88e2683657528928b4a34014e16
  • Pointer size: 131 Bytes
  • Size of remote file: 181 kB
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp/run_yolov5.cpp ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <thread>
2
+ #include <future>
3
+ #include <opencv2/opencv.hpp>
4
+ #include "aidlux/aidlite/aidlite.hpp"
5
+
6
+ using namespace Aidlux::Aidlite;
7
+ using namespace std;
8
+
9
+ #define OBJ_CLASS_NUM 80
10
+ #define NMS_THRESH 0.45
11
+ #define BOX_THRESH 0.5
12
+ #define MODEL_SIZE 640
13
+ #define OBJ_NUMB_MAX_SIZE 64
14
+ #define PROP_BOX_SIZE (5+OBJ_CLASS_NUM)
15
+ #define STRIDE8_SIZE (MODEL_SIZE / 8)
16
+ #define STRIDE16_SIZE (MODEL_SIZE / 16)
17
+ #define STRIDE32_SIZE (MODEL_SIZE / 32)
18
+
19
+ const float anchor0[6] = {10, 13, 16, 30, 33, 23};
20
+ const float anchor1[6] = {30, 61, 62, 45, 59, 119};
21
+ const float anchor2[6] = {116, 90, 156, 198, 373, 326};
22
+
23
+ string class_names[] = {
24
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
25
+ "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
26
+ "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
27
+ "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
28
+ "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
29
+ "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
30
+ "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
31
+ "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
32
+
33
+
34
+ static float sigmoid(float x) { return 1.f / (1.f + exp(-x)); }
35
+
36
+ float eqprocess(cv::Mat* src, cv::Mat* dst, int width, int height)
37
+ {
38
+ int w = src->cols;
39
+ int h = src->rows;
40
+ float scale_h = float(h) / float(height);
41
+ float scale_w = float(w) / float(width);
42
+
43
+ float scale;
44
+ if (scale_h > scale_w)
45
+ {
46
+ scale = scale_h;
47
+ }
48
+ else
49
+ {
50
+ scale = scale_w;
51
+ }
52
+
53
+ int rel_width = int(w / scale);
54
+ int rel_height = int(h / scale);
55
+
56
+ cv::Mat tmp = (*dst)(cv::Rect(0, 0, rel_width, rel_height));
57
+ cv::resize(*src, tmp, cv::Size(rel_width, rel_height));
58
+ return scale;
59
+ }
60
+
61
+ std::vector<std::string> split(const std::string& str)
62
+ {
63
+ std::stringstream ss(str);
64
+ std::vector<std::string> elems;
65
+ std::string item;
66
+ while (std::getline(ss, item, ','))
67
+ {
68
+ elems.push_back(item);
69
+ }
70
+ return elems;
71
+ }
72
+
73
+
74
+ int process(float* output, std::vector<float>& boxes, std::vector<float>& objProbs, std::vector<int>& classId, float * anchor, int grid_h, int grid_w, int stride, int imgsz)
75
+ {
76
+ int ct = 0;
77
+ int validCount = 0;
78
+ for (int a = 0; a < 3; a++)
79
+ {
80
+ for (int i = 0; i < grid_h; i++)
81
+ {
82
+ for (int j = 0; j < grid_w; j++)
83
+ {
84
+ int idx = a * PROP_BOX_SIZE + (i * grid_w + j) * 3 * PROP_BOX_SIZE;
85
+ float box_confidence = sigmoid(output[idx + 4]);
86
+ if (box_confidence >= BOX_THRESH )
87
+ {
88
+ float box_x = sigmoid(output[idx]) * 2 - 0.5;
89
+ float box_y = sigmoid(output[idx + 1]) * 2 - 0.5;
90
+ float box_w = pow(sigmoid(output[idx + 2]) * 2, 2);
91
+ float box_h = pow(sigmoid(output[idx + 3]) * 2, 2);
92
+
93
+ box_x = (box_x + j) * (float)stride;
94
+ box_y = (box_y + i) * (float)stride;
95
+ box_w = box_w * anchor[a * 2];
96
+ box_h = box_h * anchor[a * 2 + 1];
97
+
98
+ box_x -= (box_w / 2.0);
99
+ box_y -= (box_h / 2.0);
100
+
101
+ float maxClassProbs = 0;
102
+ int maxClassId = 0;
103
+
104
+ for(int k = 0; k < OBJ_CLASS_NUM ; k++)
105
+ {
106
+ float prob = output[idx + 5 + k];
107
+ if (prob > maxClassProbs)
108
+ {
109
+ maxClassId = k;
110
+ maxClassProbs = prob;
111
+ }
112
+ }
113
+ if (maxClassProbs > BOX_THRESH)
114
+ {
115
+ objProbs.push_back(sigmoid(maxClassProbs) * box_confidence);
116
+ classId.push_back(maxClassId);
117
+ validCount++;
118
+ boxes.push_back(box_x);
119
+ boxes.push_back(box_y);
120
+ boxes.push_back(box_w);
121
+ boxes.push_back(box_h);
122
+ }
123
+ }
124
+ }
125
+ }
126
+ }
127
+
128
+ return validCount;
129
+ }
130
+
131
+
132
+ static int quick_sort_indice_inverse(std::vector<float>& input, int left, int right, std::vector<int>& indices)
133
+ {
134
+ float key;
135
+ int key_index;
136
+ int low = left;
137
+ int high = right;
138
+ if (left < right) {
139
+ key_index = indices[left];
140
+ key = input[left];
141
+ while (low < high) {
142
+ while (low < high && input[high] <= key) {
143
+ high--;
144
+ }
145
+ input[low] = input[high];
146
+ indices[low] = indices[high];
147
+ while (low < high && input[low] >= key) {
148
+ low++;
149
+ }
150
+ input[high] = input[low];
151
+ indices[high] = indices[low];
152
+ }
153
+ input[low] = key;
154
+ indices[low] = key_index;
155
+ quick_sort_indice_inverse(input, left, low - 1, indices);
156
+ quick_sort_indice_inverse(input, low + 1, right, indices);
157
+ }
158
+ return low;
159
+ }
160
+
161
+ static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
162
+ float ymax1)
163
+ {
164
+ float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
165
+ float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
166
+ float i = w * h;
167
+ float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
168
+ return u <= 0.f ? 0.f : (i / u);
169
+ }
170
+
171
+
172
+ static int nms(int validCount, std::vector<float>& outputLocations, std::vector<int> classIds, std::vector<int>& order,
173
+ int filterId, float threshold)
174
+ {
175
+ for (int i = 0; i < validCount; ++i) {
176
+ if (order[i] == -1 || classIds[i] != filterId) {
177
+ continue;
178
+ }
179
+ int n = order[i];
180
+ for (int j = i + 1; j < validCount; ++j) {
181
+ int m = order[j];
182
+ if (m == -1 || classIds[i] != filterId) {
183
+ continue;
184
+ }
185
+ float xmin0 = outputLocations[n * 4 + 0];
186
+ float ymin0 = outputLocations[n * 4 + 1];
187
+ float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
188
+ float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
189
+
190
+ float xmin1 = outputLocations[m * 4 + 0];
191
+ float ymin1 = outputLocations[m * 4 + 1];
192
+ float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
193
+ float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
194
+
195
+ float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
196
+
197
+ if (iou > threshold) {
198
+ order[j] = -1;
199
+ }
200
+ }
201
+ }
202
+ return 0;
203
+ }
204
+
205
+ int32_t thread_func(int thread_idx){
206
+
207
+ printf("entry thread_func[%d]\n", thread_idx);
208
+
209
+ std::string image_path = "../bus.jpg";
210
+ std::string save_name = "out_yolov5_qnn";
211
+ std::string model_path = "../../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem";
212
+
213
+ // image process
214
+ cv::Mat frame = cv::imread(image_path);
215
+ cv::cvtColor(frame, frame , cv::COLOR_BGR2RGB);
216
+ cv::Scalar stds_scale(255, 255, 255);
217
+ cv::Size target_shape(MODEL_SIZE, MODEL_SIZE);
218
+
219
+ cv::Mat frame_resized = cv::Mat::zeros(MODEL_SIZE, MODEL_SIZE, CV_8UC3);
220
+ float scale = eqprocess(&frame, &frame_resized, MODEL_SIZE, MODEL_SIZE);
221
+
222
+ cv::Mat input_data;
223
+ frame_resized.convertTo(input_data, CV_32FC3);
224
+ cv::divide(input_data, stds_scale, input_data);
225
+
226
+ // model init
227
+ printf("Aidlite library version : %s\n", Aidlux::Aidlite::get_library_version().c_str());
228
+
229
+ // 以下三个接口请按需组合调用。如果不调用这些函数,默认只打印错误日志到标准错误终端。
230
+ Aidlux::Aidlite::set_log_level(Aidlux::Aidlite::LogLevel::INFO);
231
+ Aidlux::Aidlite::log_to_stderr();
232
+ // Aidlux::Aidlite::log_to_file("./qnn_yolov5_multi_");
233
+
234
+ Model* model = Model::create_instance(model_path);
235
+ if(model == nullptr){
236
+ printf("Create Model object failed !\n");
237
+ return EXIT_FAILURE;
238
+ }
239
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,640,640,3}};
240
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,40,40,255}, {1,20,20,255}, {1,80,80,255}};
241
+ model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
242
+
243
+ Config* config = Config::create_instance();
244
+ if(config == nullptr){
245
+ printf("Create Config object failed !\n");
246
+ return EXIT_FAILURE;
247
+ }
248
+
249
+ config->implement_type = ImplementType::TYPE_LOCAL;
250
+ config->framework_type = FrameworkType::TYPE_QNN216;
251
+ config->accelerate_type = AccelerateType::TYPE_DSP;
252
+
253
+ std::unique_ptr<Interpreter>&& fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
254
+ if(fast_interpreter == nullptr){
255
+ printf("build_interpretper_from_model_and_config failed !\n");
256
+ return EXIT_FAILURE;
257
+ }
258
+
259
+ int result = fast_interpreter->init();
260
+ if(result != EXIT_SUCCESS){
261
+ printf("interpreter->init() failed !\n");
262
+ return EXIT_FAILURE;
263
+ }
264
+
265
+ result = fast_interpreter->load_model();
266
+ if(result != EXIT_SUCCESS){
267
+ printf("interpreter->load_model() failed !\n");
268
+ return EXIT_FAILURE;
269
+ }
270
+
271
+ printf("load model load success!\n");
272
+
273
+ float* stride8 = nullptr;
274
+ float* stride16 = nullptr;
275
+ float* stride32 = nullptr;
276
+
277
+ // post_process
278
+ std::vector<float> filterBoxes;
279
+ std::vector<float> objProbs;
280
+ std::vector<int> classId;
281
+
282
+ double sum_time_0 = 0.0, sum_time_1 = 0.0, sum_time_2 = 0.0;
283
+ int _counter = 10;
284
+ for(int idx = 0; idx < _counter; ++idx){
285
+ std::chrono::steady_clock::time_point st0 = std::chrono::steady_clock::now();
286
+
287
+ void* input_tensor_data = (void*)input_data.data;
288
+ result = fast_interpreter->set_input_tensor(0,input_tensor_data);
289
+ if(result != EXIT_SUCCESS){
290
+ printf("interpreter->set_input_tensor() failed !\n");
291
+ return EXIT_FAILURE;
292
+ }
293
+
294
+ std::chrono::steady_clock::time_point et0 = std::chrono::steady_clock::now();
295
+ std::chrono::steady_clock::duration dur0 = et0 - st0;
296
+ printf("current thread_idx[%d] [%d] set_input_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur0).count()*1000);
297
+ sum_time_0 += std::chrono::duration<double>(dur0).count()*1000;
298
+
299
+ std::chrono::steady_clock::time_point st1 = std::chrono::steady_clock::now();
300
+
301
+ result = fast_interpreter->invoke();
302
+ if(result != EXIT_SUCCESS){
303
+ printf("interpreter->invoke() failed !\n");
304
+ return EXIT_FAILURE;
305
+ }
306
+
307
+ std::chrono::steady_clock::time_point et1 = std::chrono::steady_clock::now();
308
+ std::chrono::steady_clock::duration dur1 = et1 - st1;
309
+ printf("current thread_idx[%d] [%d] invoke cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur1).count()*1000);
310
+ sum_time_1 += std::chrono::duration<double>(dur1).count()*1000;
311
+
312
+ std::chrono::steady_clock::time_point st2 = std::chrono::steady_clock::now();
313
+
314
+ uint32_t output_tensor_length_0 = 0;
315
+ result = fast_interpreter->get_output_tensor(0, (void**)&stride8, &output_tensor_length_0);
316
+ if(result != EXIT_SUCCESS){
317
+ printf("interpreter->get_output_tensor() 0 failed !\n");
318
+ return EXIT_FAILURE;
319
+ }
320
+ printf("sample : interpreter->get_output_tensor() 0 length is [%d] !\n", output_tensor_length_0);
321
+
322
+ uint32_t output_tensor_length_1 = 0;
323
+ result = fast_interpreter->get_output_tensor(1, (void**)&stride16, &output_tensor_length_1);
324
+ if(result != EXIT_SUCCESS){
325
+ printf("interpreter->get_output_tensor() 1 failed !\n");
326
+ return EXIT_FAILURE;
327
+ }
328
+ printf("sample : interpreter->get_output_tensor() 1 length is [%d] !\n", output_tensor_length_1);
329
+
330
+ uint32_t output_tensor_length_2 = 0;
331
+ result = fast_interpreter->get_output_tensor(2, (void**)&stride32, &output_tensor_length_2);
332
+ if(result != EXIT_SUCCESS){
333
+ printf("interpreter->get_output_tensor() 2 failed !\n");
334
+ return EXIT_FAILURE;
335
+ }
336
+ printf("sample : interpreter->get_output_tensor() 2 length is [%d] !\n", output_tensor_length_2);
337
+
338
+ std::chrono::steady_clock::time_point et2 = std::chrono::steady_clock::now();
339
+ std::chrono::steady_clock::duration dur2 = et2 - st2;
340
+ printf("current thread_idx[%d] [%d] get_output_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur2).count()*1000);
341
+ sum_time_2 += std::chrono::duration<double>(dur2).count()*1000;
342
+ }
343
+ printf("repeat [%d] time , input[%f] --- invoke[%f] --- output[%f] --- sum[%f]ms\n", _counter, sum_time_0, sum_time_1, sum_time_2, sum_time_0+sum_time_1+sum_time_2);
344
+
345
+ std::chrono::steady_clock::time_point pps = std::chrono::steady_clock::now();
346
+
347
+ filterBoxes.clear();
348
+ objProbs.clear();
349
+ classId.clear();
350
+ int validCount0 = process(stride8, filterBoxes, objProbs, classId, (float*)anchor0, STRIDE8_SIZE, STRIDE8_SIZE, 8, MODEL_SIZE);
351
+ int validCount1 = process(stride16, filterBoxes, objProbs, classId, (float*)anchor1, STRIDE16_SIZE, STRIDE16_SIZE, 16, MODEL_SIZE);
352
+ int validCount2 = process(stride32, filterBoxes, objProbs, classId, (float*)anchor2, STRIDE32_SIZE, STRIDE32_SIZE, 32, MODEL_SIZE);
353
+
354
+ int validCount = validCount0 + validCount1 +validCount2;
355
+
356
+ std::vector<int> indexArray;
357
+ for (int i = 0; i < validCount; ++i){
358
+ indexArray.push_back(i);
359
+ }
360
+
361
+ quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
362
+
363
+ std::set<int> class_set(std::begin(classId), std::end(classId));
364
+
365
+ for (auto c : class_set) {
366
+ nms(validCount, filterBoxes, classId, indexArray, c, NMS_THRESH);
367
+ }
368
+
369
+ std::chrono::steady_clock::time_point ppe = std::chrono::steady_clock::now();
370
+ std::chrono::steady_clock::duration durpp = ppe - pps;
371
+ printf("postprocess cost time : %f ms\n", std::chrono::duration<double>(durpp).count()*1000);
372
+
373
+ // 数据来源于 SNPE2 FP32 CPU 运行结果 [x1, y1, x2, y2] 坐标向下取整
374
+ const float expected_box_0[3][4] = {{210, 241, 285, 519}, {473, 229, 560, 522}, {108, 231, 231, 542}};
375
+ const float expected_box_5[1][4] = {{91, 131, 551, 464}};
376
+
377
+ unsigned int box_count = 0;
378
+ unsigned int verify_pass_count = 0;
379
+ for (int i = 0; i < validCount; ++i) {
380
+
381
+ if (indexArray[i] == -1) {
382
+ continue;
383
+ }
384
+ int n = indexArray[i];
385
+
386
+ float x1 = filterBoxes[n * 4 + 0] * scale;
387
+ float y1 = filterBoxes[n * 4 + 1] * scale;
388
+ float x2 = x1 + filterBoxes[n * 4 + 2] * scale;
389
+ float y2 = y1 + filterBoxes[n * 4 + 3] * scale;
390
+ int id = classId[n];
391
+ float obj_conf = objProbs[i];
392
+
393
+ // string show_info = "class " + to_string(id) + ": " + to_string(obj_conf);
394
+ string show_info = class_names[id] + ": " + to_string(obj_conf);
395
+ cv::putText(frame, show_info.c_str(), cv::Point(x1, y1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2, 2); // color-BGR
396
+ cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2, 2, 0);
397
+
398
+ // 结果正确性验证
399
+ printf("Result id[%d]-x1[%f]-y1[%f]-x2[%f]-y2[%f]\n", id, x1, y1, x2, y2);
400
+
401
+ ++box_count;
402
+ if(id == 0){
403
+ for(int idx = 0; idx < 3; ++idx){
404
+ float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
405
+ expected_box_0[idx][0], expected_box_0[idx][1], expected_box_0[idx][2], expected_box_0[idx][3]);
406
+ printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
407
+ if(coverage_ratio > 0.9){
408
+ ++verify_pass_count;
409
+ break;
410
+ }
411
+ }
412
+ }else if(id == 5){
413
+ for(int idx = 0; idx < 1; ++idx){
414
+ float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
415
+ expected_box_5[idx][0], expected_box_5[idx][1], expected_box_5[idx][2], expected_box_5[idx][3]);
416
+ printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
417
+ if(coverage_ratio > 0.9){
418
+ ++verify_pass_count;
419
+ break;
420
+ }
421
+ }
422
+ }else{
423
+ printf("ERROR : The Yolov5s model inference result is not the expected classification category.\n");
424
+ return EXIT_FAILURE;
425
+ }
426
+ }
427
+
428
+ // 保存结果图片
429
+ cv::cvtColor(frame, frame , cv::COLOR_RGB2BGR);
430
+ cv::imwrite("result.jpg", frame);
431
+
432
+ result = fast_interpreter->destory();
433
+ if(result != EXIT_SUCCESS){
434
+ printf("interpreter->destory() failed !\n");
435
+ return EXIT_FAILURE;
436
+ }
437
+
438
+ printf("exit thread_func[%d]\n", thread_idx);
439
+
440
+ return EXIT_SUCCESS;
441
+ }
442
+
443
+ int main(int argc, char** args)
444
+ {
445
+
446
+ std::future<int> thread_01_result = std::async(std::launch::async, thread_func, 1);
447
+
448
+ if(EXIT_SUCCESS != thread_01_result.get()){
449
+ printf("ERROR : thread_01 run failed.\n");
450
+ return EXIT_FAILURE;
451
+ }
452
+
453
+ printf("Exit main function .\n");
454
+ return 0;
455
+ }
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem ADDED
The diff for this file is too large to render. See raw diff
 
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/python/bus.jpg ADDED

Git LFS Details

  • SHA256: 33b198a1d2839bb9ac4c65d61f9e852196793cae9a0781360859425f6022b69c
  • Pointer size: 131 Bytes
  • Size of remote file: 487 kB
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import numpy as np
3
+ import cv2
4
+ import aidlite
5
+ import argparse
6
+ import os
7
+ OBJ_CLASS_NUM = 80
8
+ NMS_THRESH = 0.45
9
+ BOX_THRESH = 0.5
10
+ MODEL_SIZE = 640
11
+
12
+ OBJ_NUMB_MAX_SIZE = 64
13
+ PROP_BOX_SIZE = (5 + OBJ_CLASS_NUM)
14
+ STRIDE8_SIZE = (MODEL_SIZE / 8)
15
+ STRIDE16_SIZE = (MODEL_SIZE / 16)
16
+ STRIDE32_SIZE = (MODEL_SIZE / 32)
17
+
18
+ anchors = [[10, 13, 16, 30, 33, 23],
19
+ [30, 61, 62, 45, 59, 119],
20
+ [116, 90, 156, 198, 373, 326]]
21
+
22
+ current_p =os.path.dirname(os.path.abspath(__file__))
23
+
24
+ coco_class = [
25
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
26
+ 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
27
+ 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
28
+ 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
29
+ 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
30
+ 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
31
+ 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
32
+ 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
33
+
34
+
35
+ def eqprocess(image, size1, size2):
36
+ h, w, _ = image.shape
37
+ mask = np.zeros((size1, size2, 3), dtype=np.float32)
38
+ scale1 = h / size1
39
+ scale2 = w / size2
40
+ if scale1 > scale2:
41
+ scale = scale1
42
+ else:
43
+ scale = scale2
44
+ img = cv2.resize(image, (int(w / scale), int(h / scale)))
45
+ mask[:int(h / scale), :int(w / scale), :] = img
46
+ return mask, scale
47
+
48
+
49
+ def xywh2xyxy(x):
50
+ '''
51
+ Box (center x, center y, width, height) to (x1, y1, x2, y2)
52
+ '''
53
+ y = np.copy(x)
54
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
55
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
56
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
57
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
58
+ return y
59
+
60
+
61
+ def xyxy2xywh(box):
62
+ '''
63
+ Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
64
+ '''
65
+ box[:, 2:] = box[:, 2:] - box[:, :2]
66
+ return box
67
+
68
+
69
+ def NMS(dets, scores, thresh):
70
+ '''
71
+ 单类NMS算法
72
+ dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
73
+ '''
74
+ x1 = dets[:, 0]
75
+ y1 = dets[:, 1]
76
+ x2 = dets[:, 2]
77
+ y2 = dets[:, 3]
78
+ areas = (y2 - y1 + 1) * (x2 - x1 + 1)
79
+ keep = []
80
+ index = scores.argsort()[::-1]
81
+ while index.size > 0:
82
+ i = index[0] # every time the first is the biggst, and add it directly
83
+ keep.append(i)
84
+ x11 = np.maximum(x1[i], x1[index[1:]]) # calculate the points of overlap
85
+ y11 = np.maximum(y1[i], y1[index[1:]])
86
+ x22 = np.minimum(x2[i], x2[index[1:]])
87
+ y22 = np.minimum(y2[i], y2[index[1:]])
88
+ w = np.maximum(0, x22 - x11 + 1) # the weights of overlap
89
+ h = np.maximum(0, y22 - y11 + 1) # the height of overlap
90
+ overlaps = w * h
91
+ ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
92
+ idx = np.where(ious <= thresh)[0]
93
+ index = index[idx + 1] # because index start from 1
94
+
95
+ return keep
96
+
97
+
98
+ def clip_coords(boxes, img_shape):
99
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
100
+ boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0]) # x1
101
+ boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1]) # y1
102
+ boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2]) # x2
103
+ boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3]) # y2
104
+
105
+
106
+ def detect_postprocess(prediction, img0shape, img1shape, conf_thres=0.25, iou_thres=0.45):
107
+ '''
108
+ 检测输出后处理
109
+ prediction: aidlite模型预测输出
110
+ img0shape: 原始图片shape
111
+ img1shape: 输入图片shape
112
+ conf_thres: 置信度阈值
113
+ iou_thres: IOU阈值
114
+ return: list[np.ndarray(N, 5)], 对应类别的坐标框信息, xywh、conf
115
+ '''
116
+ h, w, _ = img1shape
117
+ valid_condidates = prediction[prediction[..., 4] > conf_thres]
118
+ valid_condidates[:, 5:] *= valid_condidates[:, 4:5]
119
+ valid_condidates[:, :4] = xywh2xyxy(valid_condidates[:, :4])
120
+
121
+ max_det = 300
122
+ max_wh = 7680
123
+ max_nms = 30000
124
+ valid_condidates[:, 4] = valid_condidates[:, 5:].max(1)
125
+ valid_condidates[:, 5] = valid_condidates[:, 5:].argmax(1)
126
+ sort_id = np.argsort(valid_condidates[:, 4])[::-1]
127
+ valid_condidates = valid_condidates[sort_id[:max_nms]]
128
+ boxes, scores = valid_condidates[:, :4] + valid_condidates[:, 5:6] * max_wh, valid_condidates[:, 4]
129
+ index = NMS(boxes, scores, iou_thres)[:max_det]
130
+ out_boxes = valid_condidates[index]
131
+ clip_coords(out_boxes[:, :4], img0shape)
132
+ out_boxes[:, :4] = xyxy2xywh(out_boxes[:, :4])
133
+ print("检测到{}个区域".format(len(out_boxes)))
134
+ return out_boxes
135
+
136
+
137
+ def draw_detect_res(img, det_pred):
138
+ '''
139
+ 检测结果绘制
140
+ '''
141
+ img = img.astype(np.uint8)
142
+ color_step = int(255 / len(coco_class))
143
+ for i in range(len(det_pred)):
144
+ x1, y1, x2, y2 = [int(t) for t in det_pred[i][:4]]
145
+ score = det_pred[i][4]
146
+ cls_id = int(det_pred[i][5])
147
+
148
+ print(i + 1, [x1, y1, x2, y2], score, coco_class[cls_id])
149
+
150
+ cv2.putText(img, f'{coco_class[cls_id]}', (x1, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
151
+ cv2.rectangle(img, (x1, y1), (x2 + x1, y2 + y1), (0, int(cls_id * color_step), int(255 - cls_id * color_step)),
152
+ thickness=2)
153
+
154
+ return img
155
+
156
+
157
+ class Detect():
158
+ # YOLOv5 Detect head for detection models
159
+ def __init__(self, nc=80, anchors=(), stride=[], image_size=640): # detection layer
160
+ super().__init__()
161
+ self.nc = nc # number of classes
162
+ self.no = nc + 5 # number of outputs per anchor
163
+ self.stride = stride
164
+ self.nl = len(anchors) # number of detection layers
165
+ self.na = len(anchors[0]) // 2 # number of anchors
166
+ self.grid, self.anchor_grid = [0] * self.nl, [0] * self.nl
167
+ self.anchors = np.array(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
168
+
169
+ base_scale = image_size // 8
170
+ for i in range(self.nl):
171
+ self.grid[i], self.anchor_grid[i] = self._make_grid(base_scale // (2 ** i), base_scale // (2 ** i), i)
172
+
173
+ def _make_grid(self, nx=20, ny=20, i=0):
174
+ y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
175
+ yv, xv = np.meshgrid(y, x)
176
+ yv, xv = yv.T, xv.T
177
+ # add grid offset, i.e. y = 2.0 * x - 0.5
178
+ grid = np.stack((xv, yv), 2)
179
+ grid = grid[np.newaxis, np.newaxis, ...]
180
+ grid = np.repeat(grid, self.na, axis=1) - 0.5
181
+ anchor_grid = self.anchors[i].reshape((1, self.na, 1, 1, 2))
182
+ anchor_grid = np.repeat(anchor_grid, repeats=ny, axis=2)
183
+ anchor_grid = np.repeat(anchor_grid, repeats=nx, axis=3)
184
+ return grid, anchor_grid
185
+
186
+ def sigmoid(self, arr):
187
+ return 1 / (1 + np.exp(-arr))
188
+
189
+ def __call__(self, x):
190
+ z = [] # inference output
191
+ for i in range(self.nl):
192
+ bs, _, ny, nx = x[i].shape
193
+ x[i] = x[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
194
+ y = self.sigmoid(x[i])
195
+ y[..., 0:2] = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i] # xy
196
+ y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
197
+ z.append(y.reshape(bs, self.na * nx * ny, self.no))
198
+
199
+ return np.concatenate(z, 1)
200
+
201
+ def main():
202
+ args = parser_args()
203
+ target_model = args.target_model
204
+ model_type = args.model_type
205
+ size = int(args.size)
206
+ imgs = args.imgs
207
+ invoke_nums = int(args.invoke_nums)
208
+ print("Start main ... ...")
209
+ # aidlite.set_log_level(aidlite.LogLevel.INFO)
210
+ # aidlite.log_to_stderr()
211
+ # print(f"Aidlite library version : {aidlite.get_library_version()}")
212
+ # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
213
+
214
+ config = aidlite.Config.create_instance()
215
+ if config is None:
216
+ print("Create config failed !")
217
+ return False
218
+
219
+
220
+ config.implement_type = aidlite.ImplementType.TYPE_LOCAL
221
+ if model_type.lower()=="qnn":
222
+ config.framework_type = aidlite.FrameworkType.TYPE_QNN
223
+ elif model_type.lower()=="snpe2" or model_type.lower()=="snpe":
224
+ config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
225
+
226
+ config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
227
+ config.is_quantify_model = 1
228
+
229
+
230
+ model = aidlite.Model.create_instance(target_model)
231
+ if model is None:
232
+ print("Create model failed !")
233
+ return False
234
+ input_shapes = [[1, size, size, 3]]
235
+ output_shapes = [[1, 20, 20, 255], [1, 40, 40, 255], [1, 80, 80, 255]]
236
+ model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
237
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
238
+
239
+ interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
240
+ if interpreter is None:
241
+ print("build_interpretper_from_model_and_config failed !")
242
+ return None
243
+ result = interpreter.init()
244
+ if result != 0:
245
+ print(f"interpreter init failed !")
246
+ return False
247
+ result = interpreter.load_model()
248
+ if result != 0:
249
+ print("interpreter load model failed !")
250
+ return False
251
+ print("detect model load success!")
252
+
253
+ # image process
254
+ frame = cv2.imread(imgs)
255
+ # 图片做等比缩放
256
+ img_processed = np.copy(frame)
257
+ [height, width, _] = img_processed.shape
258
+ length = max((height, width))
259
+ scale = length / size
260
+ ratio=[scale,scale]
261
+ image = np.zeros((length, length, 3), np.uint8)
262
+ image[0:height, 0:width] = img_processed
263
+ img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
264
+ img_input=cv2.resize(img_input,(size,size))
265
+
266
+ mean_data=[0, 0, 0]
267
+ std_data=[255, 255, 255]
268
+ img_input = (img_input-mean_data)/std_data # HWC
269
+
270
+ img_input = img_input.astype(np.float32)
271
+
272
+
273
+ # qnn run
274
+ invoke_time=[]
275
+ for i in range(invoke_nums):
276
+ result = interpreter.set_input_tensor(0, img_input.data)
277
+ if result != 0:
278
+ print("interpreter set_input_tensor() failed")
279
+
280
+ t1=time.time()
281
+ result = interpreter.invoke()
282
+ cost_time = (time.time()-t1)*1000
283
+ invoke_time.append(cost_time)
284
+
285
+ if result != 0:
286
+ print("interpreter set_input_tensor() failed")
287
+ stride8 = interpreter.get_output_tensor(0)
288
+ stride16 = interpreter.get_output_tensor(1)
289
+ stride32 = interpreter.get_output_tensor(2)
290
+
291
+
292
+ result = interpreter.destory()
293
+
294
+ ## time 统计
295
+ max_invoke_time = max(invoke_time)
296
+ min_invoke_time = min(invoke_time)
297
+ mean_invoke_time = sum(invoke_time)/invoke_nums
298
+ var_invoketime=np.var(invoke_time)
299
+ print("=======================================")
300
+ print(f"QNN inference {invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
301
+ print("=======================================")
302
+
303
+ ## 后处理
304
+ stride = [8, 16, 32]
305
+ yolo_head = Detect(OBJ_CLASS_NUM, anchors, stride, MODEL_SIZE)
306
+ validCount0 = stride8.reshape(*output_shapes[2]).transpose(0, 3, 1, 2)
307
+ validCount1 = stride16.reshape(*output_shapes[1]).transpose(0, 3, 1, 2)
308
+ validCount2 = stride32.reshape(*output_shapes[0]).transpose(0, 3, 1, 2)
309
+ pred = yolo_head([validCount0, validCount1, validCount2])
310
+ det_pred = detect_postprocess(pred, frame.shape, [MODEL_SIZE, MODEL_SIZE, 3], conf_thres=0.5, iou_thres=0.45)
311
+ det_pred[np.isnan(det_pred)] = 0.0
312
+ det_pred[:, :4] = det_pred[:, :4] * scale
313
+ res_img = draw_detect_res(frame, det_pred)
314
+
315
+ save_path=os.path.join(current_p,"result.jpg")
316
+ cv2.imwrite(save_path, res_img)
317
+ print("图片保存在",save_path)
318
+ print("=======================================")
319
+
320
+ return True
321
+
322
+
323
+
324
+
325
+ image_path = os.path.join(current_p,"bus.jpg")
326
+ def parser_args():
327
+ parser = argparse.ArgumentParser(description="Run model benchmarks")
328
+ parser.add_argument('--target_model',type=str,default=os.path.join(current_p,'../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem'),help="inference model path")
329
+ parser.add_argument('--imgs',type=str,default=image_path,help="Predict images path")
330
+ parser.add_argument('--invoke_nums',type=str,default=10,help="Inference nums")
331
+ parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
332
+ parser.add_argument('--size',type=str,default=640,help="model input size")
333
+ args = parser.parse_args()
334
+ return args
335
+
336
+ if __name__ == "__main__":
337
+ main()
338
+
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/python/result.jpg ADDED

Git LFS Details

  • SHA256: f6679449365d53eebe8cacea1b721d9c494f228dcf81c5e348d9961f37806abe
  • Pointer size: 131 Bytes
  • Size of remote file: 487 kB
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+
4
+ - Input shape: 640x640
5
+ - Number of parameters: 1.968M
6
+ - Model size: 7.56 MB
7
+ - Output shape: 1x25200x85
8
+
9
+ Source model repository: [yolov5](https://github.com/ultralytics/yolov5)
10
+
11
+ ### Converted model
12
+
13
+ - Precision: INT8
14
+ - Backend: QNN2.16
15
+ - Target Device: SNM972
16
+
17
+ ## Inference with AidLite SDK
18
+
19
+ ### SDK installation
20
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
21
+
22
+ - install AidLite SDK
23
+
24
+ ```bash
25
+ # Install the appropriate version of the aidlite sdk
26
+ sudo aid-pkg update
27
+ sudo aid-pkg install aidlite-sdk
28
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
29
+ sudo aid-pkg install aidlite-{QNN VERSION}
30
+ # eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
31
+ ```
32
+
33
+ - Verify AidLite SDK
34
+
35
+ ```bash
36
+ # aidlite sdk c++ check
37
+ python3 -c "import aidlite; print(aidlite.get_library_version())"
38
+
39
+ # aidlite sdk python check
40
+ python3 -c "import aidlite; print(aidlite.get_py_library_version())"
41
+ ```
42
+
43
+ ### Run python demo
44
+
45
+ ```bash
46
+ cd python
47
+ python3 demo_qnn.py
48
+
49
+ ```
50
+
51
+ ### Run c++ demo
52
+
53
+ ```bash
54
+ cd yolov5n/model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp
55
+ mkdir build
56
+ cd build
57
+ cmake ..
58
+ make
59
+ ./run_yolov5
60
+ ```
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.12)
2
+ project(aidlite_cpp_samples)
3
+
4
+ set(CMAKE_BUILD_TYPE Release)
5
+
6
+ set(OPENCV_INCLUDE_DIR /usr/include/opencv4)
7
+ set(OPENCV_LINK_DIR "")
8
+ set(OPENCV_LIBS opencv_imgcodecs opencv_imgproc opencv_core) # 如果是静态库 需要注意先后顺序
9
+
10
+ set(AIDLITE_INCLUDE_DIR /usr/local/include)
11
+ set(AIDLITE_LINK_DIR /usr/local/lib)
12
+ set(AIDLITE_LIB aidlite)
13
+
14
+ function(func_generate_sample_exe sample_name)
15
+
16
+ set(demo_name ${sample_name})
17
+
18
+ file(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/${demo_name}.cpp)
19
+
20
+ add_executable(${demo_name} ${src_files})
21
+
22
+ target_compile_options(${demo_name} PRIVATE -std=c++11)
23
+ target_include_directories(${demo_name} PUBLIC ${OPENCV_INCLUDE_DIR} ${AIDLITE_INCLUDE_DIR})
24
+ target_link_directories(${demo_name} PUBLIC ${OPENCV_LINK_DIR} ${AIDLITE_LINK_DIR})
25
+ target_link_libraries(${demo_name} PUBLIC ${AIDLITE_LIB} ${OPENCV_LIBS} pthread)
26
+ message(STATUS "[CMAKEMSG] ${demo_name} need libraries is : ${AIDLITE_LIB} ${OPENCV_LIBS}")
27
+
28
+ endfunction()
29
+
30
+ set(SAMPLE_LIST run_yolov5)
31
+
32
+ FOREACH(sample ${SAMPLE_LIST})
33
+ message("prepare to generate cpp sample : ${sample}")
34
+
35
+ func_generate_sample_exe(${sample})
36
+ ENDFOREACH(sample)
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp/bus.jpg ADDED

Git LFS Details

  • SHA256: fb4914d123d97c440cd127ef0e98d4bdc68cd88e2683657528928b4a34014e16
  • Pointer size: 131 Bytes
  • Size of remote file: 181 kB
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp/run_yolov5.cpp ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <thread>
2
+ #include <future>
3
+ #include <opencv2/opencv.hpp>
4
+ #include "aidlux/aidlite/aidlite.hpp"
5
+
6
+ using namespace Aidlux::Aidlite;
7
+ using namespace std;
8
+
9
+ #define OBJ_CLASS_NUM 80
10
+ #define NMS_THRESH 0.45
11
+ #define BOX_THRESH 0.5
12
+ #define MODEL_SIZE 640
13
+ #define OBJ_NUMB_MAX_SIZE 64
14
+ #define PROP_BOX_SIZE (5+OBJ_CLASS_NUM)
15
+ #define STRIDE8_SIZE (MODEL_SIZE / 8)
16
+ #define STRIDE16_SIZE (MODEL_SIZE / 16)
17
+ #define STRIDE32_SIZE (MODEL_SIZE / 32)
18
+
19
+ const float anchor0[6] = {10, 13, 16, 30, 33, 23};
20
+ const float anchor1[6] = {30, 61, 62, 45, 59, 119};
21
+ const float anchor2[6] = {116, 90, 156, 198, 373, 326};
22
+
23
+ string class_names[] = {
24
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
25
+ "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
26
+ "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
27
+ "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
28
+ "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
29
+ "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
30
+ "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
31
+ "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
32
+
33
+
34
+ static float sigmoid(float x) { return 1.f / (1.f + exp(-x)); }
35
+
36
+ float eqprocess(cv::Mat* src, cv::Mat* dst, int width, int height)
37
+ {
38
+ int w = src->cols;
39
+ int h = src->rows;
40
+ float scale_h = float(h) / float(height);
41
+ float scale_w = float(w) / float(width);
42
+
43
+ float scale;
44
+ if (scale_h > scale_w)
45
+ {
46
+ scale = scale_h;
47
+ }
48
+ else
49
+ {
50
+ scale = scale_w;
51
+ }
52
+
53
+ int rel_width = int(w / scale);
54
+ int rel_height = int(h / scale);
55
+
56
+ cv::Mat tmp = (*dst)(cv::Rect(0, 0, rel_width, rel_height));
57
+ cv::resize(*src, tmp, cv::Size(rel_width, rel_height));
58
+ return scale;
59
+ }
60
+
61
+ std::vector<std::string> split(const std::string& str)
62
+ {
63
+ std::stringstream ss(str);
64
+ std::vector<std::string> elems;
65
+ std::string item;
66
+ while (std::getline(ss, item, ','))
67
+ {
68
+ elems.push_back(item);
69
+ }
70
+ return elems;
71
+ }
72
+
73
+
74
+ int process(float* output, std::vector<float>& boxes, std::vector<float>& objProbs, std::vector<int>& classId, float * anchor, int grid_h, int grid_w, int stride, int imgsz)
75
+ {
76
+ int ct = 0;
77
+ int validCount = 0;
78
+ for (int a = 0; a < 3; a++)
79
+ {
80
+ for (int i = 0; i < grid_h; i++)
81
+ {
82
+ for (int j = 0; j < grid_w; j++)
83
+ {
84
+ int idx = a * PROP_BOX_SIZE + (i * grid_w + j) * 3 * PROP_BOX_SIZE;
85
+ float box_confidence = sigmoid(output[idx + 4]);
86
+ if (box_confidence >= BOX_THRESH )
87
+ {
88
+ float box_x = sigmoid(output[idx]) * 2 - 0.5;
89
+ float box_y = sigmoid(output[idx + 1]) * 2 - 0.5;
90
+ float box_w = pow(sigmoid(output[idx + 2]) * 2, 2);
91
+ float box_h = pow(sigmoid(output[idx + 3]) * 2, 2);
92
+
93
+ box_x = (box_x + j) * (float)stride;
94
+ box_y = (box_y + i) * (float)stride;
95
+ box_w = box_w * anchor[a * 2];
96
+ box_h = box_h * anchor[a * 2 + 1];
97
+
98
+ box_x -= (box_w / 2.0);
99
+ box_y -= (box_h / 2.0);
100
+
101
+ float maxClassProbs = 0;
102
+ int maxClassId = 0;
103
+
104
+ for(int k = 0; k < OBJ_CLASS_NUM ; k++)
105
+ {
106
+ float prob = output[idx + 5 + k];
107
+ if (prob > maxClassProbs)
108
+ {
109
+ maxClassId = k;
110
+ maxClassProbs = prob;
111
+ }
112
+ }
113
+ if (maxClassProbs > BOX_THRESH)
114
+ {
115
+ objProbs.push_back(sigmoid(maxClassProbs) * box_confidence);
116
+ classId.push_back(maxClassId);
117
+ validCount++;
118
+ boxes.push_back(box_x);
119
+ boxes.push_back(box_y);
120
+ boxes.push_back(box_w);
121
+ boxes.push_back(box_h);
122
+ }
123
+ }
124
+ }
125
+ }
126
+ }
127
+
128
+ return validCount;
129
+ }
130
+
131
+
132
+ static int quick_sort_indice_inverse(std::vector<float>& input, int left, int right, std::vector<int>& indices)
133
+ {
134
+ float key;
135
+ int key_index;
136
+ int low = left;
137
+ int high = right;
138
+ if (left < right) {
139
+ key_index = indices[left];
140
+ key = input[left];
141
+ while (low < high) {
142
+ while (low < high && input[high] <= key) {
143
+ high--;
144
+ }
145
+ input[low] = input[high];
146
+ indices[low] = indices[high];
147
+ while (low < high && input[low] >= key) {
148
+ low++;
149
+ }
150
+ input[high] = input[low];
151
+ indices[high] = indices[low];
152
+ }
153
+ input[low] = key;
154
+ indices[low] = key_index;
155
+ quick_sort_indice_inverse(input, left, low - 1, indices);
156
+ quick_sort_indice_inverse(input, low + 1, right, indices);
157
+ }
158
+ return low;
159
+ }
160
+
161
+ static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
162
+ float ymax1)
163
+ {
164
+ float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
165
+ float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
166
+ float i = w * h;
167
+ float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
168
+ return u <= 0.f ? 0.f : (i / u);
169
+ }
170
+
171
+
172
+ static int nms(int validCount, std::vector<float>& outputLocations, std::vector<int> classIds, std::vector<int>& order,
173
+ int filterId, float threshold)
174
+ {
175
+ for (int i = 0; i < validCount; ++i) {
176
+ if (order[i] == -1 || classIds[i] != filterId) {
177
+ continue;
178
+ }
179
+ int n = order[i];
180
+ for (int j = i + 1; j < validCount; ++j) {
181
+ int m = order[j];
182
+ if (m == -1 || classIds[i] != filterId) {
183
+ continue;
184
+ }
185
+ float xmin0 = outputLocations[n * 4 + 0];
186
+ float ymin0 = outputLocations[n * 4 + 1];
187
+ float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
188
+ float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
189
+
190
+ float xmin1 = outputLocations[m * 4 + 0];
191
+ float ymin1 = outputLocations[m * 4 + 1];
192
+ float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
193
+ float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
194
+
195
+ float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
196
+
197
+ if (iou > threshold) {
198
+ order[j] = -1;
199
+ }
200
+ }
201
+ }
202
+ return 0;
203
+ }
204
+
205
+ int32_t thread_func(int thread_idx){
206
+
207
+ printf("entry thread_func[%d]\n", thread_idx);
208
+
209
+ std::string image_path = "../bus.jpg";
210
+ std::string save_name = "out_yolov5_qnn";
211
+ std::string model_path = "../../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem";
212
+
213
+ // image process
214
+ cv::Mat frame = cv::imread(image_path);
215
+ cv::cvtColor(frame, frame , cv::COLOR_BGR2RGB);
216
+ cv::Scalar stds_scale(255, 255, 255);
217
+ cv::Size target_shape(MODEL_SIZE, MODEL_SIZE);
218
+
219
+ cv::Mat frame_resized = cv::Mat::zeros(MODEL_SIZE, MODEL_SIZE, CV_8UC3);
220
+ float scale = eqprocess(&frame, &frame_resized, MODEL_SIZE, MODEL_SIZE);
221
+
222
+ cv::Mat input_data;
223
+ frame_resized.convertTo(input_data, CV_32FC3);
224
+ cv::divide(input_data, stds_scale, input_data);
225
+
226
+ // model init
227
+ printf("Aidlite library version : %s\n", Aidlux::Aidlite::get_library_version().c_str());
228
+
229
+ // 以下三个接口请按需组合调用。如果不调用这些函数,默认只打印错误日志到标准错误终端。
230
+ Aidlux::Aidlite::set_log_level(Aidlux::Aidlite::LogLevel::INFO);
231
+ Aidlux::Aidlite::log_to_stderr();
232
+ // Aidlux::Aidlite::log_to_file("./qnn_yolov5_multi_");
233
+
234
+ Model* model = Model::create_instance(model_path);
235
+ if(model == nullptr){
236
+ printf("Create Model object failed !\n");
237
+ return EXIT_FAILURE;
238
+ }
239
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,640,640,3}};
240
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,40,40,255}, {1,20,20,255}, {1,80,80,255}};
241
+ model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
242
+
243
+ Config* config = Config::create_instance();
244
+ if(config == nullptr){
245
+ printf("Create Config object failed !\n");
246
+ return EXIT_FAILURE;
247
+ }
248
+
249
+ config->implement_type = ImplementType::TYPE_LOCAL;
250
+ config->framework_type = FrameworkType::TYPE_QNN216;
251
+ config->accelerate_type = AccelerateType::TYPE_DSP;
252
+
253
+ std::unique_ptr<Interpreter>&& fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
254
+ if(fast_interpreter == nullptr){
255
+ printf("build_interpretper_from_model_and_config failed !\n");
256
+ return EXIT_FAILURE;
257
+ }
258
+
259
+ int result = fast_interpreter->init();
260
+ if(result != EXIT_SUCCESS){
261
+ printf("interpreter->init() failed !\n");
262
+ return EXIT_FAILURE;
263
+ }
264
+
265
+ result = fast_interpreter->load_model();
266
+ if(result != EXIT_SUCCESS){
267
+ printf("interpreter->load_model() failed !\n");
268
+ return EXIT_FAILURE;
269
+ }
270
+
271
+ printf("load model load success!\n");
272
+
273
+ float* stride8 = nullptr;
274
+ float* stride16 = nullptr;
275
+ float* stride32 = nullptr;
276
+
277
+ // post_process
278
+ std::vector<float> filterBoxes;
279
+ std::vector<float> objProbs;
280
+ std::vector<int> classId;
281
+
282
+ double sum_time_0 = 0.0, sum_time_1 = 0.0, sum_time_2 = 0.0;
283
+ int _counter = 10;
284
+ for(int idx = 0; idx < _counter; ++idx){
285
+ std::chrono::steady_clock::time_point st0 = std::chrono::steady_clock::now();
286
+
287
+ void* input_tensor_data = (void*)input_data.data;
288
+ result = fast_interpreter->set_input_tensor(0,input_tensor_data);
289
+ if(result != EXIT_SUCCESS){
290
+ printf("interpreter->set_input_tensor() failed !\n");
291
+ return EXIT_FAILURE;
292
+ }
293
+
294
+ std::chrono::steady_clock::time_point et0 = std::chrono::steady_clock::now();
295
+ std::chrono::steady_clock::duration dur0 = et0 - st0;
296
+ printf("current thread_idx[%d] [%d] set_input_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur0).count()*1000);
297
+ sum_time_0 += std::chrono::duration<double>(dur0).count()*1000;
298
+
299
+ std::chrono::steady_clock::time_point st1 = std::chrono::steady_clock::now();
300
+
301
+ result = fast_interpreter->invoke();
302
+ if(result != EXIT_SUCCESS){
303
+ printf("interpreter->invoke() failed !\n");
304
+ return EXIT_FAILURE;
305
+ }
306
+
307
+ std::chrono::steady_clock::time_point et1 = std::chrono::steady_clock::now();
308
+ std::chrono::steady_clock::duration dur1 = et1 - st1;
309
+ printf("current thread_idx[%d] [%d] invoke cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur1).count()*1000);
310
+ sum_time_1 += std::chrono::duration<double>(dur1).count()*1000;
311
+
312
+ std::chrono::steady_clock::time_point st2 = std::chrono::steady_clock::now();
313
+
314
+ uint32_t output_tensor_length_0 = 0;
315
+ result = fast_interpreter->get_output_tensor(0, (void**)&stride8, &output_tensor_length_0);
316
+ if(result != EXIT_SUCCESS){
317
+ printf("interpreter->get_output_tensor() 0 failed !\n");
318
+ return EXIT_FAILURE;
319
+ }
320
+ printf("sample : interpreter->get_output_tensor() 0 length is [%d] !\n", output_tensor_length_0);
321
+
322
+ uint32_t output_tensor_length_1 = 0;
323
+ result = fast_interpreter->get_output_tensor(1, (void**)&stride16, &output_tensor_length_1);
324
+ if(result != EXIT_SUCCESS){
325
+ printf("interpreter->get_output_tensor() 1 failed !\n");
326
+ return EXIT_FAILURE;
327
+ }
328
+ printf("sample : interpreter->get_output_tensor() 1 length is [%d] !\n", output_tensor_length_1);
329
+
330
+ uint32_t output_tensor_length_2 = 0;
331
+ result = fast_interpreter->get_output_tensor(2, (void**)&stride32, &output_tensor_length_2);
332
+ if(result != EXIT_SUCCESS){
333
+ printf("interpreter->get_output_tensor() 2 failed !\n");
334
+ return EXIT_FAILURE;
335
+ }
336
+ printf("sample : interpreter->get_output_tensor() 2 length is [%d] !\n", output_tensor_length_2);
337
+
338
+ std::chrono::steady_clock::time_point et2 = std::chrono::steady_clock::now();
339
+ std::chrono::steady_clock::duration dur2 = et2 - st2;
340
+ printf("current thread_idx[%d] [%d] get_output_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur2).count()*1000);
341
+ sum_time_2 += std::chrono::duration<double>(dur2).count()*1000;
342
+ }
343
+ printf("repeat [%d] time , input[%f] --- invoke[%f] --- output[%f] --- sum[%f]ms\n", _counter, sum_time_0, sum_time_1, sum_time_2, sum_time_0+sum_time_1+sum_time_2);
344
+
345
+ std::chrono::steady_clock::time_point pps = std::chrono::steady_clock::now();
346
+
347
+ filterBoxes.clear();
348
+ objProbs.clear();
349
+ classId.clear();
350
+ int validCount0 = process(stride8, filterBoxes, objProbs, classId, (float*)anchor0, STRIDE8_SIZE, STRIDE8_SIZE, 8, MODEL_SIZE);
351
+ int validCount1 = process(stride16, filterBoxes, objProbs, classId, (float*)anchor1, STRIDE16_SIZE, STRIDE16_SIZE, 16, MODEL_SIZE);
352
+ int validCount2 = process(stride32, filterBoxes, objProbs, classId, (float*)anchor2, STRIDE32_SIZE, STRIDE32_SIZE, 32, MODEL_SIZE);
353
+
354
+ int validCount = validCount0 + validCount1 +validCount2;
355
+
356
+ std::vector<int> indexArray;
357
+ for (int i = 0; i < validCount; ++i){
358
+ indexArray.push_back(i);
359
+ }
360
+
361
+ quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
362
+
363
+ std::set<int> class_set(std::begin(classId), std::end(classId));
364
+
365
+ for (auto c : class_set) {
366
+ nms(validCount, filterBoxes, classId, indexArray, c, NMS_THRESH);
367
+ }
368
+
369
+ std::chrono::steady_clock::time_point ppe = std::chrono::steady_clock::now();
370
+ std::chrono::steady_clock::duration durpp = ppe - pps;
371
+ printf("postprocess cost time : %f ms\n", std::chrono::duration<double>(durpp).count()*1000);
372
+
373
+ // 数据来源于 SNPE2 FP32 CPU 运行结果 [x1, y1, x2, y2] 坐标向下取整
374
+ const float expected_box_0[3][4] = {{210, 241, 285, 519}, {473, 229, 560, 522}, {108, 231, 231, 542}};
375
+ const float expected_box_5[1][4] = {{91, 131, 551, 464}};
376
+
377
+ unsigned int box_count = 0;
378
+ unsigned int verify_pass_count = 0;
379
+ for (int i = 0; i < validCount; ++i) {
380
+
381
+ if (indexArray[i] == -1) {
382
+ continue;
383
+ }
384
+ int n = indexArray[i];
385
+
386
+ float x1 = filterBoxes[n * 4 + 0] * scale;
387
+ float y1 = filterBoxes[n * 4 + 1] * scale;
388
+ float x2 = x1 + filterBoxes[n * 4 + 2] * scale;
389
+ float y2 = y1 + filterBoxes[n * 4 + 3] * scale;
390
+ int id = classId[n];
391
+ float obj_conf = objProbs[i];
392
+
393
+ // string show_info = "class " + to_string(id) + ": " + to_string(obj_conf);
394
+ string show_info = class_names[id] + ": " + to_string(obj_conf);
395
+ cv::putText(frame, show_info.c_str(), cv::Point(x1, y1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2, 2); // color-BGR
396
+ cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2, 2, 0);
397
+
398
+ // 结果正确性验证
399
+ printf("Result id[%d]-x1[%f]-y1[%f]-x2[%f]-y2[%f]\n", id, x1, y1, x2, y2);
400
+
401
+ ++box_count;
402
+ if(id == 0){
403
+ for(int idx = 0; idx < 3; ++idx){
404
+ float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
405
+ expected_box_0[idx][0], expected_box_0[idx][1], expected_box_0[idx][2], expected_box_0[idx][3]);
406
+ printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
407
+ if(coverage_ratio > 0.9){
408
+ ++verify_pass_count;
409
+ break;
410
+ }
411
+ }
412
+ }else if(id == 5){
413
+ for(int idx = 0; idx < 1; ++idx){
414
+ float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
415
+ expected_box_5[idx][0], expected_box_5[idx][1], expected_box_5[idx][2], expected_box_5[idx][3]);
416
+ printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
417
+ if(coverage_ratio > 0.9){
418
+ ++verify_pass_count;
419
+ break;
420
+ }
421
+ }
422
+ }else{
423
+ printf("ERROR : The Yolov5s model inference result is not the expected classification category.\n");
424
+ return EXIT_FAILURE;
425
+ }
426
+ }
427
+
428
+ // 保存结果图片
429
+ cv::cvtColor(frame, frame , cv::COLOR_RGB2BGR);
430
+ cv::imwrite("result.jpg", frame);
431
+
432
+ result = fast_interpreter->destory();
433
+ if(result != EXIT_SUCCESS){
434
+ printf("interpreter->destory() failed !\n");
435
+ return EXIT_FAILURE;
436
+ }
437
+
438
+ printf("exit thread_func[%d]\n", thread_idx);
439
+
440
+ return EXIT_SUCCESS;
441
+ }
442
+
443
+ int main(int argc, char** args)
444
+ {
445
+
446
+ std::future<int> thread_01_result = std::async(std::launch::async, thread_func, 1);
447
+
448
+ if(EXIT_SUCCESS != thread_01_result.get()){
449
+ printf("ERROR : thread_01 run failed.\n");
450
+ return EXIT_FAILURE;
451
+ }
452
+
453
+ printf("Exit main function .\n");
454
+ return 0;
455
+ }
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem ADDED
The diff for this file is too large to render. See raw diff
 
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/python/bus.jpg ADDED

Git LFS Details

  • SHA256: 33b198a1d2839bb9ac4c65d61f9e852196793cae9a0781360859425f6022b69c
  • Pointer size: 131 Bytes
  • Size of remote file: 487 kB
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import numpy as np
3
+ import cv2
4
+ import aidlite
5
+ import argparse
6
+ import os
7
+ OBJ_CLASS_NUM = 80
8
+ NMS_THRESH = 0.45
9
+ BOX_THRESH = 0.5
10
+ MODEL_SIZE = 640
11
+
12
+ OBJ_NUMB_MAX_SIZE = 64
13
+ PROP_BOX_SIZE = (5 + OBJ_CLASS_NUM)
14
+ STRIDE8_SIZE = (MODEL_SIZE / 8)
15
+ STRIDE16_SIZE = (MODEL_SIZE / 16)
16
+ STRIDE32_SIZE = (MODEL_SIZE / 32)
17
+
18
+ anchors = [[10, 13, 16, 30, 33, 23],
19
+ [30, 61, 62, 45, 59, 119],
20
+ [116, 90, 156, 198, 373, 326]]
21
+
22
+ current_p =os.path.dirname(os.path.abspath(__file__))
23
+
24
+ coco_class = [
25
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
26
+ 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
27
+ 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
28
+ 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
29
+ 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
30
+ 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
31
+ 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
32
+ 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
33
+
34
+
35
+ def eqprocess(image, size1, size2):
36
+ h, w, _ = image.shape
37
+ mask = np.zeros((size1, size2, 3), dtype=np.float32)
38
+ scale1 = h / size1
39
+ scale2 = w / size2
40
+ if scale1 > scale2:
41
+ scale = scale1
42
+ else:
43
+ scale = scale2
44
+ img = cv2.resize(image, (int(w / scale), int(h / scale)))
45
+ mask[:int(h / scale), :int(w / scale), :] = img
46
+ return mask, scale
47
+
48
+
49
+ def xywh2xyxy(x):
50
+ '''
51
+ Box (center x, center y, width, height) to (x1, y1, x2, y2)
52
+ '''
53
+ y = np.copy(x)
54
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
55
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
56
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
57
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
58
+ return y
59
+
60
+
61
+ def xyxy2xywh(box):
62
+ '''
63
+ Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
64
+ '''
65
+ box[:, 2:] = box[:, 2:] - box[:, :2]
66
+ return box
67
+
68
+
69
+ def NMS(dets, scores, thresh):
70
+ '''
71
+ 单类NMS算法
72
+ dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
73
+ '''
74
+ x1 = dets[:, 0]
75
+ y1 = dets[:, 1]
76
+ x2 = dets[:, 2]
77
+ y2 = dets[:, 3]
78
+ areas = (y2 - y1 + 1) * (x2 - x1 + 1)
79
+ keep = []
80
+ index = scores.argsort()[::-1]
81
+ while index.size > 0:
82
+ i = index[0] # every time the first is the biggst, and add it directly
83
+ keep.append(i)
84
+ x11 = np.maximum(x1[i], x1[index[1:]]) # calculate the points of overlap
85
+ y11 = np.maximum(y1[i], y1[index[1:]])
86
+ x22 = np.minimum(x2[i], x2[index[1:]])
87
+ y22 = np.minimum(y2[i], y2[index[1:]])
88
+ w = np.maximum(0, x22 - x11 + 1) # the weights of overlap
89
+ h = np.maximum(0, y22 - y11 + 1) # the height of overlap
90
+ overlaps = w * h
91
+ ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
92
+ idx = np.where(ious <= thresh)[0]
93
+ index = index[idx + 1] # because index start from 1
94
+
95
+ return keep
96
+
97
+
98
+ def clip_coords(boxes, img_shape):
99
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
100
+ boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0]) # x1
101
+ boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1]) # y1
102
+ boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2]) # x2
103
+ boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3]) # y2
104
+
105
+
106
+ def detect_postprocess(prediction, img0shape, img1shape, conf_thres=0.25, iou_thres=0.45):
107
+ '''
108
+ 检测输出后处理
109
+ prediction: aidlite模型预测输出
110
+ img0shape: 原始图片shape
111
+ img1shape: 输入图片shape
112
+ conf_thres: 置信度阈值
113
+ iou_thres: IOU阈值
114
+ return: list[np.ndarray(N, 5)], 对应类别的坐标框信息, xywh、conf
115
+ '''
116
+ h, w, _ = img1shape
117
+ valid_condidates = prediction[prediction[..., 4] > conf_thres]
118
+ valid_condidates[:, 5:] *= valid_condidates[:, 4:5]
119
+ valid_condidates[:, :4] = xywh2xyxy(valid_condidates[:, :4])
120
+
121
+ max_det = 300
122
+ max_wh = 7680
123
+ max_nms = 30000
124
+ valid_condidates[:, 4] = valid_condidates[:, 5:].max(1)
125
+ valid_condidates[:, 5] = valid_condidates[:, 5:].argmax(1)
126
+ sort_id = np.argsort(valid_condidates[:, 4])[::-1]
127
+ valid_condidates = valid_condidates[sort_id[:max_nms]]
128
+ boxes, scores = valid_condidates[:, :4] + valid_condidates[:, 5:6] * max_wh, valid_condidates[:, 4]
129
+ index = NMS(boxes, scores, iou_thres)[:max_det]
130
+ out_boxes = valid_condidates[index]
131
+ clip_coords(out_boxes[:, :4], img0shape)
132
+ out_boxes[:, :4] = xyxy2xywh(out_boxes[:, :4])
133
+ print("检测到{}个区域".format(len(out_boxes)))
134
+ return out_boxes
135
+
136
+
137
+ def draw_detect_res(img, det_pred):
138
+ '''
139
+ 检测结果绘制
140
+ '''
141
+ img = img.astype(np.uint8)
142
+ color_step = int(255 / len(coco_class))
143
+ for i in range(len(det_pred)):
144
+ x1, y1, x2, y2 = [int(t) for t in det_pred[i][:4]]
145
+ score = det_pred[i][4]
146
+ cls_id = int(det_pred[i][5])
147
+
148
+ print(i + 1, [x1, y1, x2, y2], score, coco_class[cls_id])
149
+
150
+ cv2.putText(img, f'{coco_class[cls_id]}', (x1, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
151
+ cv2.rectangle(img, (x1, y1), (x2 + x1, y2 + y1), (0, int(cls_id * color_step), int(255 - cls_id * color_step)),
152
+ thickness=2)
153
+
154
+ return img
155
+
156
+
157
+ class Detect():
158
+ # YOLOv5 Detect head for detection models
159
+ def __init__(self, nc=80, anchors=(), stride=[], image_size=640): # detection layer
160
+ super().__init__()
161
+ self.nc = nc # number of classes
162
+ self.no = nc + 5 # number of outputs per anchor
163
+ self.stride = stride
164
+ self.nl = len(anchors) # number of detection layers
165
+ self.na = len(anchors[0]) // 2 # number of anchors
166
+ self.grid, self.anchor_grid = [0] * self.nl, [0] * self.nl
167
+ self.anchors = np.array(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
168
+
169
+ base_scale = image_size // 8
170
+ for i in range(self.nl):
171
+ self.grid[i], self.anchor_grid[i] = self._make_grid(base_scale // (2 ** i), base_scale // (2 ** i), i)
172
+
173
+ def _make_grid(self, nx=20, ny=20, i=0):
174
+ y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
175
+ yv, xv = np.meshgrid(y, x)
176
+ yv, xv = yv.T, xv.T
177
+ # add grid offset, i.e. y = 2.0 * x - 0.5
178
+ grid = np.stack((xv, yv), 2)
179
+ grid = grid[np.newaxis, np.newaxis, ...]
180
+ grid = np.repeat(grid, self.na, axis=1) - 0.5
181
+ anchor_grid = self.anchors[i].reshape((1, self.na, 1, 1, 2))
182
+ anchor_grid = np.repeat(anchor_grid, repeats=ny, axis=2)
183
+ anchor_grid = np.repeat(anchor_grid, repeats=nx, axis=3)
184
+ return grid, anchor_grid
185
+
186
+ def sigmoid(self, arr):
187
+ return 1 / (1 + np.exp(-arr))
188
+
189
+ def __call__(self, x):
190
+ z = [] # inference output
191
+ for i in range(self.nl):
192
+ bs, _, ny, nx = x[i].shape
193
+ x[i] = x[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
194
+ y = self.sigmoid(x[i])
195
+ y[..., 0:2] = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i] # xy
196
+ y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
197
+ z.append(y.reshape(bs, self.na * nx * ny, self.no))
198
+
199
+ return np.concatenate(z, 1)
200
+
201
+ def main():
202
+ args = parser_args()
203
+ target_model = args.target_model
204
+ model_type = args.model_type
205
+ size = int(args.size)
206
+ imgs = args.imgs
207
+ invoke_nums = int(args.invoke_nums)
208
+ print("Start main ... ...")
209
+ # aidlite.set_log_level(aidlite.LogLevel.INFO)
210
+ # aidlite.log_to_stderr()
211
+ # print(f"Aidlite library version : {aidlite.get_library_version()}")
212
+ # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
213
+
214
+ config = aidlite.Config.create_instance()
215
+ if config is None:
216
+ print("Create config failed !")
217
+ return False
218
+
219
+
220
+ config.implement_type = aidlite.ImplementType.TYPE_LOCAL
221
+ if model_type.lower()=="qnn":
222
+ config.framework_type = aidlite.FrameworkType.TYPE_QNN
223
+ elif model_type.lower()=="snpe2" or model_type.lower()=="snpe":
224
+ config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
225
+
226
+ config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
227
+ config.is_quantify_model = 1
228
+
229
+
230
+ model = aidlite.Model.create_instance(target_model)
231
+ if model is None:
232
+ print("Create model failed !")
233
+ return False
234
+ input_shapes = [[1, size, size, 3]]
235
+ output_shapes = [[1, 20, 20, 255], [1, 40, 40, 255], [1, 80, 80, 255]]
236
+ model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
237
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
238
+
239
+ interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
240
+ if interpreter is None:
241
+ print("build_interpretper_from_model_and_config failed !")
242
+ return None
243
+ result = interpreter.init()
244
+ if result != 0:
245
+ print(f"interpreter init failed !")
246
+ return False
247
+ result = interpreter.load_model()
248
+ if result != 0:
249
+ print("interpreter load model failed !")
250
+ return False
251
+ print("detect model load success!")
252
+
253
+ # image process
254
+ frame = cv2.imread(imgs)
255
+ # 图片做等比缩放
256
+ img_processed = np.copy(frame)
257
+ [height, width, _] = img_processed.shape
258
+ length = max((height, width))
259
+ scale = length / size
260
+ ratio=[scale,scale]
261
+ image = np.zeros((length, length, 3), np.uint8)
262
+ image[0:height, 0:width] = img_processed
263
+ img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
264
+ img_input=cv2.resize(img_input,(size,size))
265
+
266
+ mean_data=[0, 0, 0]
267
+ std_data=[255, 255, 255]
268
+ img_input = (img_input-mean_data)/std_data # HWC
269
+
270
+ img_input = img_input.astype(np.float32)
271
+
272
+
273
+ # qnn run
274
+ invoke_time=[]
275
+ for i in range(invoke_nums):
276
+ result = interpreter.set_input_tensor(0, img_input.data)
277
+ if result != 0:
278
+ print("interpreter set_input_tensor() failed")
279
+
280
+ t1=time.time()
281
+ result = interpreter.invoke()
282
+ cost_time = (time.time()-t1)*1000
283
+ invoke_time.append(cost_time)
284
+
285
+ if result != 0:
286
+ print("interpreter set_input_tensor() failed")
287
+ stride8 = interpreter.get_output_tensor(0)
288
+ stride16 = interpreter.get_output_tensor(1)
289
+ stride32 = interpreter.get_output_tensor(2)
290
+
291
+
292
+ result = interpreter.destory()
293
+
294
+ ## time 统计
295
+ max_invoke_time = max(invoke_time)
296
+ min_invoke_time = min(invoke_time)
297
+ mean_invoke_time = sum(invoke_time)/invoke_nums
298
+ var_invoketime=np.var(invoke_time)
299
+ print("=======================================")
300
+ print(f"QNN inference {invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
301
+ print("=======================================")
302
+
303
+ ## 后处理
304
+ stride = [8, 16, 32]
305
+ yolo_head = Detect(OBJ_CLASS_NUM, anchors, stride, MODEL_SIZE)
306
+ validCount0 = stride8.reshape(*output_shapes[2]).transpose(0, 3, 1, 2)
307
+ validCount1 = stride16.reshape(*output_shapes[1]).transpose(0, 3, 1, 2)
308
+ validCount2 = stride32.reshape(*output_shapes[0]).transpose(0, 3, 1, 2)
309
+ pred = yolo_head([validCount0, validCount1, validCount2])
310
+ det_pred = detect_postprocess(pred, frame.shape, [MODEL_SIZE, MODEL_SIZE, 3], conf_thres=0.5, iou_thres=0.45)
311
+ det_pred[np.isnan(det_pred)] = 0.0
312
+ det_pred[:, :4] = det_pred[:, :4] * scale
313
+ res_img = draw_detect_res(frame, det_pred)
314
+
315
+ save_path=os.path.join(current_p,"result.jpg")
316
+ cv2.imwrite(save_path, res_img)
317
+ print("图片保存在",save_path)
318
+ print("=======================================")
319
+
320
+ return True
321
+
322
+
323
+
324
+
325
+ image_path = os.path.join(current_p,"bus.jpg")
326
+ def parser_args():
327
+ parser = argparse.ArgumentParser(description="Run model benchmarks")
328
+ parser.add_argument('--target_model',type=str,default=os.path.join(current_p,'../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem'),help="inference model path")
329
+ parser.add_argument('--imgs',type=str,default=image_path,help="Predict images path")
330
+ parser.add_argument('--invoke_nums',type=str,default=10,help="Inference nums")
331
+ parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
332
+ parser.add_argument('--size',type=str,default=640,help="model input size")
333
+ args = parser.parse_args()
334
+ return args
335
+
336
+ if __name__ == "__main__":
337
+ main()
338
+
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/python/result.jpg ADDED

Git LFS Details

  • SHA256: efcf9248ed31924acbce99cb80b5a8ce72cb0eb953550a3b960129c6b1878605
  • Pointer size: 131 Bytes
  • Size of remote file: 492 kB