qc903113684 commited on
Commit
aaea8de
·
verified ·
1 Parent(s): b27ae22

Upload 35 files

Browse files
Files changed (36) hide show
  1. .gitattributes +10 -0
  2. model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/README.md +55 -0
  3. model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png +3 -0
  4. model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +31 -0
  5. model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp +366 -0
  6. model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin +3 -0
  7. model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/2.png +3 -0
  8. model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py +133 -0
  9. model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/README.md +55 -0
  10. model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png +3 -0
  11. model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt +31 -0
  12. model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp +366 -0
  13. model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet40S_BBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin +3 -0
  14. model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png +3 -0
  15. model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py +133 -0
  16. model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/README.md +55 -0
  17. model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png +3 -0
  18. model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt +31 -0
  19. model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp +366 -0
  20. model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin +3 -0
  21. model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/2.png +3 -0
  22. model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py +133 -0
  23. model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/README.md +55 -0
  24. model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png +3 -0
  25. model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +31 -0
  26. model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp +366 -0
  27. model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin +3 -0
  28. model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/2.png +3 -0
  29. model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py +133 -0
  30. model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/README.md +55 -0
  31. model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png +3 -0
  32. model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt +31 -0
  33. model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp +366 -0
  34. model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin +3 -0
  35. model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png +3 -0
  36. model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py +133 -0
.gitattributes CHANGED
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
37
+ model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
38
+ model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
39
+ model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
40
+ model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
41
+ model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
42
+ model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
43
+ model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
44
+ model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
45
+ model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: 1x3x512x1024
4
+ - Number of parameters:13.911M
5
+ - Model size:53.56MB,
6
+ - Output shape: 1x19x64x128
7
+
8
+ Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: INT8
13
+ - Backend: QNN2.16
14
+ - Target Device: FV01 QCS6490
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### cpp
49
+ ```bash
50
+ cd ffnet40s/model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp
51
+ mkdir build && cd build
52
+ cmake ..
53
+ make
54
+ ./run_test
55
+ ```
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required (VERSION 3.5)
2
+ project("run_test")
3
+
4
+ find_package(OpenCV REQUIRED)
5
+
6
+ message(STATUS "oPENCV Library status:")
7
+ message(STATUS ">version:${OpenCV_VERSION}")
8
+ message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
9
+
10
+ set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
11
+
12
+ include_directories(
13
+ /usr/local/include
14
+ /usr/include/opencv4
15
+ )
16
+
17
+ link_directories(
18
+ /usr/local/lib/
19
+ )
20
+
21
+ file(GLOB SRC_LISTS
22
+ ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
23
+ )
24
+
25
+ add_executable(run_test ${SRC_LISTS})
26
+
27
+ target_link_libraries(run_test
28
+ aidlite
29
+ ${OpenCV_LIBS}
30
+ pthread
31
+ )
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <string>
3
+ #include <algorithm>
4
+ #include <cctype>
5
+ #include <cstring> // 用于 memcpy
6
+ #include <opencv2/opencv.hpp>
7
+ #include <aidlux/aidlite/aidlite.hpp>
8
+ #include <vector>
9
+ #include <numeric>
10
+ #include <cmath>
11
+ #include <array>
12
+ #include <cstdint>
13
+
14
+ using namespace cv;
15
+ using namespace std;
16
+ using namespace Aidlux::Aidlite;
17
+
18
+ // 定義顏色表 (19個類別)
19
+ const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
20
+ {0, 0, 0}, // 0=background
21
+ {128, 0, 0}, // 1=aeroplane
22
+ {0, 128, 0}, // 2=bicycle
23
+ {128, 128, 0}, // 3=bird
24
+ {0, 0, 128}, // 4=boat
25
+ {128, 0, 128}, // 5=bottle
26
+ {0, 128, 128}, // 6=bus
27
+ {128, 128, 128}, // 7=car
28
+ {64, 0, 0}, // 8=cat
29
+ {192, 0, 0}, // 9=chair
30
+ {64, 128, 0}, // 10=cow
31
+ {192, 128, 0}, // 11=dining table
32
+ {64, 0, 128}, // 12=dog
33
+ {192, 0, 128}, // 13=horse
34
+ {64, 128, 128}, // 14=motorbike
35
+ {192, 128, 128}, // 15=person
36
+ {0, 64, 0}, // 16=potted plant
37
+ {128, 64, 0}, // 17=sheep
38
+ {0, 192, 0}, // 18=sofa
39
+ }};
40
+
41
+ // 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
42
+ std::vector<float> bilinear_interpolate(
43
+ const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
44
+
45
+ std::vector<float> output(target_h * target_w * channels, 0.0f);
46
+ const float scale_h = static_cast<float>(src_h) / target_h;
47
+ const float scale_w = static_cast<float>(src_w) / target_w;
48
+
49
+ for (int y = 0; y < target_h; ++y) {
50
+ const float y_src = (y + 0.5f) * scale_h - 0.5f;
51
+ const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
52
+ const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
53
+ const float dy = y_src - y0;
54
+
55
+ for (int x = 0; x < target_w; ++x) {
56
+ const float x_src = (x + 0.5f) * scale_w - 0.5f;
57
+ const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
58
+ const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
59
+ const float dx = x_src - x0;
60
+
61
+ for (int c = 0; c < channels; ++c) {
62
+ const int src_idx = c * src_h * src_w;
63
+ const float val00 = input[src_idx + y0 * src_w + x0];
64
+ const float val01 = input[src_idx + y0 * src_w + x1];
65
+ const float val10 = input[src_idx + y1 * src_w + x0];
66
+ const float val11 = input[src_idx + y1 * src_w + x1];
67
+
68
+ const float val = (1 - dy) * (1 - dx) * val00 +
69
+ (1 - dy) * dx * val01 +
70
+ dy * (1 - dx) * val10 +
71
+ dy * dx * val11;
72
+
73
+ output[c * target_h * target_w + y * target_w + x] = val;
74
+ }
75
+ }
76
+ }
77
+ return output;
78
+ }
79
+
80
+ // Softmax 計算 (通道維度)
81
+ void softmax(float* data, int height, int width, int channels) {
82
+ for (int y = 0; y < height; ++y) {
83
+ for (int x = 0; x < width; ++x) {
84
+ float max_val = -INFINITY;
85
+ for (int c = 0; c < channels; ++c) {
86
+ const int idx = c * height * width + y * width + x;
87
+ max_val = std::max(max_val, data[idx]);
88
+ }
89
+
90
+ float sum_exp = 0.0f;
91
+ for (int c = 0; c < channels; ++c) {
92
+ const int idx = c * height * width + y * width + x;
93
+ sum_exp += std::exp(data[idx] - max_val);
94
+ }
95
+
96
+ for (int c = 0; c < channels; ++c) {
97
+ const int idx = c * height * width + y * width + x;
98
+ data[idx] = std::exp(data[idx] - max_val) / sum_exp;
99
+ }
100
+ }
101
+ }
102
+ }
103
+
104
+ // 提取最大類別索引
105
+ std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
106
+ std::vector<uint8_t> pred(height * width);
107
+ for (int y = 0; y < height; ++y) {
108
+ for (int x = 0; x < width; ++x) {
109
+ float max_prob = -INFINITY;
110
+ uint8_t max_idx = 0;
111
+ for (int c = 0; c < channels; ++c) {
112
+ const int idx = c * height * width + y * width + x;
113
+ if (data[idx] > max_prob) {
114
+ max_prob = data[idx];
115
+ max_idx = c;
116
+ }
117
+ }
118
+ pred[y * width + x] = max_idx;
119
+ }
120
+ }
121
+ return pred;
122
+ }
123
+
124
+ // 解碼為 RGB 圖像
125
+ std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
126
+ std::vector<uint8_t> rgb(height * width * 3);
127
+ for (int y = 0; y < height; ++y) {
128
+ for (int x = 0; x < width; ++x) {
129
+ const int idx = y * width + x;
130
+ const uint8_t label = pred[idx];
131
+ if (label < 19) {
132
+ rgb[idx * 3] = label_colors[label][0];
133
+ rgb[idx * 3 + 1] = label_colors[label][1];
134
+ rgb[idx * 3 + 2] = label_colors[label][2];
135
+ } else {
136
+ rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
137
+ }
138
+ }
139
+ }
140
+ return rgb;
141
+ }
142
+
143
+ struct Args {
144
+ std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin";
145
+ std::string imgs = "../2.png";
146
+ int invoke_nums = 10;
147
+ std::string model_type = "QNN";
148
+ };
149
+
150
+ Args parse_args(int argc, char* argv[]) {
151
+ Args args;
152
+ for (int i = 1; i < argc; ++i) {
153
+ std::string arg = argv[i];
154
+ if (arg == "--target_model" && i + 1 < argc) {
155
+ args.target_model = argv[++i];
156
+ } else if (arg == "--imgs" && i + 1 < argc) {
157
+ args.imgs = argv[++i];
158
+ } else if (arg == "--invoke_nums" && i + 1 < argc) {
159
+ args.invoke_nums = std::stoi(argv[++i]);
160
+ } else if (arg == "--model_type" && i + 1 < argc) {
161
+ args.model_type = argv[++i];
162
+ }
163
+ }
164
+ return args;
165
+ }
166
+
167
+ std::string to_lower(const std::string& str) {
168
+ std::string lower_str = str;
169
+ std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
170
+ return std::tolower(c);
171
+ });
172
+ return lower_str;
173
+ }
174
+
175
+ int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
176
+
177
+ int current_coordinate[4] = {0, 0, 0, 0};
178
+ for(int a = 0; a < src_dims[0]; ++a){
179
+ current_coordinate[0] = a;
180
+ for(int b = 0; b < src_dims[1]; ++b){
181
+ current_coordinate[1] = b;
182
+ for(int c = 0; c < src_dims[2]; ++c){
183
+ current_coordinate[2] = c;
184
+ for(int d = 0; d < src_dims[3]; ++d){
185
+ current_coordinate[3] = d;
186
+
187
+ int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
188
+ current_coordinate[1]*src_dims[2]*src_dims[3] +
189
+ current_coordinate[2]*src_dims[3] +
190
+ current_coordinate[3];
191
+
192
+ int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
193
+ current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
194
+ current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
195
+ current_coordinate[tsp_dims[3]];
196
+
197
+ dest[new_index] = src[old_index];
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ return EXIT_SUCCESS;
204
+ }
205
+
206
+ cv::Mat post_process(cv::Mat &frame, float* outdata)
207
+ {
208
+ cv::Mat input_image = frame.clone();
209
+ // Initialize vectors to hold respective outputs while unwrapping detections.
210
+ std::vector<int> class_ids;
211
+ std::vector<float> confidences;
212
+ std::vector<cv::Rect> boxes;
213
+ std::vector<cv::Mat> masks;
214
+ std::vector<float> class_scores;
215
+ cv::RNG rng;
216
+ cv::Mat masked_img;
217
+
218
+ unsigned int src_dims[4] = {1, 64,128,19};
219
+ unsigned int tsp_dims[4] = {0,3,1,2};
220
+ unsigned int stride_data_num = 1*64*128*19;
221
+ float* format_data = new float[stride_data_num];
222
+ transpose(outdata, src_dims, tsp_dims, format_data);
223
+ cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
224
+ std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
225
+
226
+ const int channels = 19;
227
+ int target_h = 64, target_w = 128;
228
+ int src_h = 64, src_w = 128;
229
+ // Step 1: 雙線性插值
230
+ auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
231
+
232
+ // Step 2: Softmax
233
+ softmax(interpolated.data(), target_h, target_w, channels);
234
+
235
+ // Step 3: 獲取預測類別
236
+ auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
237
+
238
+ printf("Processing finished.\n");
239
+ // Step 4: 解碼為 RGB
240
+ std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
241
+ cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
242
+
243
+ // Step 2: 转换颜色通道 (RGB → BGR)
244
+ if (channels == 3) {
245
+ cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
246
+ }
247
+ cv::Mat resized_cubic;
248
+ cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
249
+ return resized_cubic;
250
+ }
251
+
252
+
253
+
254
+ int invoke(const Args& args) {
255
+ std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
256
+ << "Image Path: " << args.imgs << "\n"
257
+ << "Inference Nums: " << args.invoke_nums << "\n"
258
+ << "Model Type: " << args.model_type << "\n";
259
+ Model* model = Model::create_instance(args.target_model);
260
+ if(model == nullptr){
261
+ printf("Create model failed !\n");
262
+ return EXIT_FAILURE;
263
+ }
264
+ Config* config = Config::create_instance();
265
+ if(config == nullptr){
266
+ printf("Create config failed !\n");
267
+ return EXIT_FAILURE;
268
+ }
269
+ config->implement_type = ImplementType::TYPE_LOCAL;
270
+ std::string model_type_lower = to_lower(args.model_type);
271
+ if (model_type_lower == "qnn"){
272
+ config->framework_type = FrameworkType::TYPE_QNN216;
273
+ } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
274
+ config->framework_type = FrameworkType::TYPE_SNPE2;
275
+ }
276
+ config->accelerate_type = AccelerateType::TYPE_DSP;
277
+ config->is_quantify_model = 1;
278
+
279
+ unsigned int model_h = 512;
280
+ unsigned int model_w = 1024;
281
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
282
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
283
+ model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
284
+ std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
285
+ if(fast_interpreter == nullptr){
286
+ printf("build_interpretper_from_model_and_config failed !\n");
287
+ return EXIT_FAILURE;
288
+ }
289
+ int result = fast_interpreter->init();
290
+ if(result != EXIT_SUCCESS){
291
+ printf("interpreter->init() failed !\n");
292
+ return EXIT_FAILURE;
293
+ }
294
+ // load model
295
+ fast_interpreter->load_model();
296
+ if(result != EXIT_SUCCESS){
297
+ printf("interpreter->load_model() failed !\n");
298
+ return EXIT_FAILURE;
299
+ }
300
+ printf("detect model load success!\n");
301
+
302
+ cv::Mat frame = cv::imread(args.imgs);
303
+ if (frame.empty()) {
304
+ printf("detect image load failed!\n");
305
+ return 1;
306
+ }
307
+ printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
308
+ cv::Mat input_data;
309
+ cv::Mat frame_clone = frame.clone();
310
+ cv::Scalar stds_scale(58.395, 57.12, 57.375);
311
+ cv::Scalar means_scale(123.675, 116.28, 103.53);
312
+ cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
313
+ frame_clone.convertTo(input_data, CV_32F);
314
+ cv::subtract(input_data, means_scale, input_data);
315
+ cv::divide(input_data, stds_scale, input_data);
316
+
317
+ float *outdata0 = nullptr;
318
+ std::vector<float> invoke_time;
319
+ for (int i = 0; i < args.invoke_nums; ++i) {
320
+ result = fast_interpreter->set_input_tensor(0, input_data.data);
321
+ if(result != EXIT_SUCCESS){
322
+ printf("interpreter->set_input_tensor() failed !\n");
323
+ return EXIT_FAILURE;
324
+ }
325
+ auto t1 = std::chrono::high_resolution_clock::now();
326
+ result = fast_interpreter->invoke();
327
+ auto t2 = std::chrono::high_resolution_clock::now();
328
+ std::chrono::duration<double> cost_time = t2 - t1;
329
+ invoke_time.push_back(cost_time.count() * 1000);
330
+ if(result != EXIT_SUCCESS){
331
+ printf("interpreter->invoke() failed !\n");
332
+ return EXIT_FAILURE;
333
+ }
334
+ uint32_t out_data_0 = 0;
335
+ result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
336
+ if(result != EXIT_SUCCESS){
337
+ printf("interpreter->get_output_tensor() 1 failed !\n");
338
+ return EXIT_FAILURE;
339
+ }
340
+
341
+ }
342
+
343
+ float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
344
+ float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
345
+ float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
346
+ float var_invoketime = 0.0f;
347
+ for (auto time : invoke_time) {
348
+ var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
349
+ }
350
+ var_invoketime /= args.invoke_nums;
351
+ printf("=======================================\n");
352
+ printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
353
+ args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
354
+ printf("=======================================\n");
355
+
356
+ cv::Mat img = post_process(frame, outdata0);
357
+ cv::imwrite("./results.png", img);
358
+ fast_interpreter->destory();
359
+ return 0;
360
+ }
361
+
362
+
363
+ int main(int argc, char* argv[]) {
364
+ Args args = parse_args(argc, argv);
365
+ return invoke(args);
366
+ }
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047345e0f0511d5df1e64a01f2f420336de2e6863e688509dfb512aac707b2e0
3
+ size 14178304
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import cv2
4
+ import sys
5
+ import time
6
+ import aidlite
7
+ import os
8
+
9
+
10
+ def decode_segmap(image, nc=19):
11
+ label_colors = np.array([(0, 0, 0), # 0=background
12
+ # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
13
+ (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
14
+ # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
15
+ (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
16
+ # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
17
+ (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
18
+ # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
19
+ (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
20
+ r = np.zeros_like(image).astype(np.uint8)
21
+ g = np.zeros_like(image).astype(np.uint8)
22
+ b = np.zeros_like(image).astype(np.uint8)
23
+ for l in range(0, nc):
24
+ idx = image == l
25
+ r[idx] = label_colors[l, 0]
26
+ g[idx] = label_colors[l, 1]
27
+ b[idx] = label_colors[l, 2]
28
+ rgb = np.stack([r, g, b], axis=2)
29
+ return rgb
30
+
31
+
32
+
33
+ class ffnet40sQnn:
34
+ def __init__(self):
35
+ super().__init__()
36
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"))
37
+ if self.model is None:
38
+ print("Create model failed !")
39
+ return
40
+
41
+ self.config = aidlite.Config.create_instance()
42
+ if self.config is None:
43
+ print("build_interpretper_from_model_and_config failed !")
44
+ return
45
+
46
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
47
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
48
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
49
+ # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
50
+ self.config.is_quantify_model = 1
51
+
52
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
53
+ if self.interpreter is None:
54
+ print("build_interpretper_from_model_and_config failed !")
55
+ return
56
+ input_shapes = [[1,512,1024,3]]
57
+ output_shapes = [[1,64,128,19]]
58
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
59
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
60
+
61
+ if self.interpreter is None:
62
+ print("build_interpretper_from_model_and_config failed !")
63
+ result = self.interpreter.init()
64
+ if result != 0:
65
+ print(f"interpreter init failed !")
66
+ result = self.interpreter.load_model()
67
+ if result != 0:
68
+ print("interpreter load model failed !")
69
+
70
+ print(" model load success!")
71
+
72
+ def __call__(self, input):
73
+ self.interpreter.set_input_tensor(0,input)
74
+ invoke_time=[]
75
+ invoke_nums =10
76
+ for i in range(invoke_nums):
77
+ result = self.interpreter.set_input_tensor(0, input.data)
78
+ if result != 0:
79
+ print("interpreter set_input_tensor() failed")
80
+ t1=time.time()
81
+ result = self.interpreter.invoke()
82
+ cost_time = (time.time()-t1)*1000
83
+ invoke_time.append(cost_time)
84
+
85
+ max_invoke_time = max(invoke_time)
86
+ min_invoke_time = min(invoke_time)
87
+ mean_invoke_time = sum(invoke_time)/invoke_nums
88
+ var_invoketime=np.var(invoke_time)
89
+ print("====================================")
90
+ print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
91
+ print("====================================")
92
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
93
+ return features_0
94
+
95
+
96
+
97
+
98
+
99
+ ffnet_segm = ffnet40sQnn()
100
+
101
+ frame_ct=0
102
+ image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
103
+
104
+ image = cv2.imread(image_path)
105
+ image=cv2.resize(image,(1024,512))
106
+ frame = np.ascontiguousarray(image[:,:,::-1])
107
+
108
+ mean_data=[123.675, 116.28, 103.53]
109
+ std_data=[58.395, 57.12, 57.375]
110
+ img_input = (frame-mean_data)/std_data # HWC
111
+ input = img_input.astype(np.float32)
112
+ input = input[np.newaxis, ...]
113
+ input_size = input.shape[1], input.shape[2] #H w
114
+ t0 = time.time()
115
+ out = ffnet_segm(input)
116
+ use_time = round((time.time() - t0) * 1000, 2)
117
+ print(f"pose detction inference_time:{use_time} ms")
118
+ out = np.transpose(out, (0, 3, 1,2))
119
+ out = torch.from_numpy(out)
120
+
121
+ output = torch.nn.functional.interpolate(
122
+ out, size=input_size, mode="bilinear", align_corners=False
123
+ )
124
+ output_data = torch.nn.functional.softmax(output, dim=1).data
125
+ max_probs, predictions = output_data.max(1)
126
+
127
+
128
+ prediction = predictions.numpy().astype(np.uint8)
129
+ test = decode_segmap( prediction[0])
130
+
131
+ cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
132
+ ffnet_segm.interpreter.destory()
133
+
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: 1x3x512x1024
4
+ - Number of parameters:13.911M
5
+ - Model size:53.56MB,
6
+ - Output shape: 1x19x64x128
7
+
8
+ Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: W8A16
13
+ - Backend: QNN2.16
14
+ - Target Device: FV01 QCS6490
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### cpp
49
+ ```bash
50
+ cd ffnet40s/model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp
51
+ mkdir build && cd build
52
+ cmake ..
53
+ make
54
+ ./run_test
55
+ ```
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required (VERSION 3.5)
2
+ project("run_test")
3
+
4
+ find_package(OpenCV REQUIRED)
5
+
6
+ message(STATUS "oPENCV Library status:")
7
+ message(STATUS ">version:${OpenCV_VERSION}")
8
+ message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
9
+
10
+ set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
11
+
12
+ include_directories(
13
+ /usr/local/include
14
+ /usr/include/opencv4
15
+ )
16
+
17
+ link_directories(
18
+ /usr/local/lib/
19
+ )
20
+
21
+ file(GLOB SRC_LISTS
22
+ ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
23
+ )
24
+
25
+ add_executable(run_test ${SRC_LISTS})
26
+
27
+ target_link_libraries(run_test
28
+ aidlite
29
+ ${OpenCV_LIBS}
30
+ pthread
31
+ )
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <string>
3
+ #include <algorithm>
4
+ #include <cctype>
5
+ #include <cstring> // 用于 memcpy
6
+ #include <opencv2/opencv.hpp>
7
+ #include <aidlux/aidlite/aidlite.hpp>
8
+ #include <vector>
9
+ #include <numeric>
10
+ #include <cmath>
11
+ #include <array>
12
+ #include <cstdint>
13
+
14
+ using namespace cv;
15
+ using namespace std;
16
+ using namespace Aidlux::Aidlite;
17
+
18
+ // 定義顏色表 (19個類別)
19
+ const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
20
+ {0, 0, 0}, // 0=background
21
+ {128, 0, 0}, // 1=aeroplane
22
+ {0, 128, 0}, // 2=bicycle
23
+ {128, 128, 0}, // 3=bird
24
+ {0, 0, 128}, // 4=boat
25
+ {128, 0, 128}, // 5=bottle
26
+ {0, 128, 128}, // 6=bus
27
+ {128, 128, 128}, // 7=car
28
+ {64, 0, 0}, // 8=cat
29
+ {192, 0, 0}, // 9=chair
30
+ {64, 128, 0}, // 10=cow
31
+ {192, 128, 0}, // 11=dining table
32
+ {64, 0, 128}, // 12=dog
33
+ {192, 0, 128}, // 13=horse
34
+ {64, 128, 128}, // 14=motorbike
35
+ {192, 128, 128}, // 15=person
36
+ {0, 64, 0}, // 16=potted plant
37
+ {128, 64, 0}, // 17=sheep
38
+ {0, 192, 0}, // 18=sofa
39
+ }};
40
+
41
+ // 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
42
+ std::vector<float> bilinear_interpolate(
43
+ const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
44
+
45
+ std::vector<float> output(target_h * target_w * channels, 0.0f);
46
+ const float scale_h = static_cast<float>(src_h) / target_h;
47
+ const float scale_w = static_cast<float>(src_w) / target_w;
48
+
49
+ for (int y = 0; y < target_h; ++y) {
50
+ const float y_src = (y + 0.5f) * scale_h - 0.5f;
51
+ const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
52
+ const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
53
+ const float dy = y_src - y0;
54
+
55
+ for (int x = 0; x < target_w; ++x) {
56
+ const float x_src = (x + 0.5f) * scale_w - 0.5f;
57
+ const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
58
+ const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
59
+ const float dx = x_src - x0;
60
+
61
+ for (int c = 0; c < channels; ++c) {
62
+ const int src_idx = c * src_h * src_w;
63
+ const float val00 = input[src_idx + y0 * src_w + x0];
64
+ const float val01 = input[src_idx + y0 * src_w + x1];
65
+ const float val10 = input[src_idx + y1 * src_w + x0];
66
+ const float val11 = input[src_idx + y1 * src_w + x1];
67
+
68
+ const float val = (1 - dy) * (1 - dx) * val00 +
69
+ (1 - dy) * dx * val01 +
70
+ dy * (1 - dx) * val10 +
71
+ dy * dx * val11;
72
+
73
+ output[c * target_h * target_w + y * target_w + x] = val;
74
+ }
75
+ }
76
+ }
77
+ return output;
78
+ }
79
+
80
+ // Softmax 計算 (通道維度)
81
+ void softmax(float* data, int height, int width, int channels) {
82
+ for (int y = 0; y < height; ++y) {
83
+ for (int x = 0; x < width; ++x) {
84
+ float max_val = -INFINITY;
85
+ for (int c = 0; c < channels; ++c) {
86
+ const int idx = c * height * width + y * width + x;
87
+ max_val = std::max(max_val, data[idx]);
88
+ }
89
+
90
+ float sum_exp = 0.0f;
91
+ for (int c = 0; c < channels; ++c) {
92
+ const int idx = c * height * width + y * width + x;
93
+ sum_exp += std::exp(data[idx] - max_val);
94
+ }
95
+
96
+ for (int c = 0; c < channels; ++c) {
97
+ const int idx = c * height * width + y * width + x;
98
+ data[idx] = std::exp(data[idx] - max_val) / sum_exp;
99
+ }
100
+ }
101
+ }
102
+ }
103
+
104
+ // 提取最大類別索引
105
+ std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
106
+ std::vector<uint8_t> pred(height * width);
107
+ for (int y = 0; y < height; ++y) {
108
+ for (int x = 0; x < width; ++x) {
109
+ float max_prob = -INFINITY;
110
+ uint8_t max_idx = 0;
111
+ for (int c = 0; c < channels; ++c) {
112
+ const int idx = c * height * width + y * width + x;
113
+ if (data[idx] > max_prob) {
114
+ max_prob = data[idx];
115
+ max_idx = c;
116
+ }
117
+ }
118
+ pred[y * width + x] = max_idx;
119
+ }
120
+ }
121
+ return pred;
122
+ }
123
+
124
+ // 解碼為 RGB 圖像
125
+ std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
126
+ std::vector<uint8_t> rgb(height * width * 3);
127
+ for (int y = 0; y < height; ++y) {
128
+ for (int x = 0; x < width; ++x) {
129
+ const int idx = y * width + x;
130
+ const uint8_t label = pred[idx];
131
+ if (label < 19) {
132
+ rgb[idx * 3] = label_colors[label][0];
133
+ rgb[idx * 3 + 1] = label_colors[label][1];
134
+ rgb[idx * 3 + 2] = label_colors[label][2];
135
+ } else {
136
+ rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
137
+ }
138
+ }
139
+ }
140
+ return rgb;
141
+ }
142
+
143
+ struct Args {
144
+ std::string target_model = "../../models/ffnet40S_BBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin";
145
+ std::string imgs = "../2.png";
146
+ int invoke_nums = 10;
147
+ std::string model_type = "QNN";
148
+ };
149
+
150
+ Args parse_args(int argc, char* argv[]) {
151
+ Args args;
152
+ for (int i = 1; i < argc; ++i) {
153
+ std::string arg = argv[i];
154
+ if (arg == "--target_model" && i + 1 < argc) {
155
+ args.target_model = argv[++i];
156
+ } else if (arg == "--imgs" && i + 1 < argc) {
157
+ args.imgs = argv[++i];
158
+ } else if (arg == "--invoke_nums" && i + 1 < argc) {
159
+ args.invoke_nums = std::stoi(argv[++i]);
160
+ } else if (arg == "--model_type" && i + 1 < argc) {
161
+ args.model_type = argv[++i];
162
+ }
163
+ }
164
+ return args;
165
+ }
166
+
167
+ std::string to_lower(const std::string& str) {
168
+ std::string lower_str = str;
169
+ std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
170
+ return std::tolower(c);
171
+ });
172
+ return lower_str;
173
+ }
174
+
175
+ int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
176
+
177
+ int current_coordinate[4] = {0, 0, 0, 0};
178
+ for(int a = 0; a < src_dims[0]; ++a){
179
+ current_coordinate[0] = a;
180
+ for(int b = 0; b < src_dims[1]; ++b){
181
+ current_coordinate[1] = b;
182
+ for(int c = 0; c < src_dims[2]; ++c){
183
+ current_coordinate[2] = c;
184
+ for(int d = 0; d < src_dims[3]; ++d){
185
+ current_coordinate[3] = d;
186
+
187
+ int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
188
+ current_coordinate[1]*src_dims[2]*src_dims[3] +
189
+ current_coordinate[2]*src_dims[3] +
190
+ current_coordinate[3];
191
+
192
+ int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
193
+ current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
194
+ current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
195
+ current_coordinate[tsp_dims[3]];
196
+
197
+ dest[new_index] = src[old_index];
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ return EXIT_SUCCESS;
204
+ }
205
+
206
+ cv::Mat post_process(cv::Mat &frame, float* outdata)
207
+ {
208
+ cv::Mat input_image = frame.clone();
209
+ // Initialize vectors to hold respective outputs while unwrapping detections.
210
+ std::vector<int> class_ids;
211
+ std::vector<float> confidences;
212
+ std::vector<cv::Rect> boxes;
213
+ std::vector<cv::Mat> masks;
214
+ std::vector<float> class_scores;
215
+ cv::RNG rng;
216
+ cv::Mat masked_img;
217
+
218
+ unsigned int src_dims[4] = {1, 64,128,19};
219
+ unsigned int tsp_dims[4] = {0,3,1,2};
220
+ unsigned int stride_data_num = 1*64*128*19;
221
+ float* format_data = new float[stride_data_num];
222
+ transpose(outdata, src_dims, tsp_dims, format_data);
223
+ cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
224
+ std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
225
+
226
+ const int channels = 19;
227
+ int target_h = 64, target_w = 128;
228
+ int src_h = 64, src_w = 128;
229
+ // Step 1: 雙線性插值
230
+ auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
231
+
232
+ // Step 2: Softmax
233
+ softmax(interpolated.data(), target_h, target_w, channels);
234
+
235
+ // Step 3: 獲取預測類別
236
+ auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
237
+
238
+ printf("Processing finished.\n");
239
+ // Step 4: 解碼為 RGB
240
+ std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
241
+ cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
242
+
243
+ // Step 2: 转换颜色通道 (RGB → BGR)
244
+ if (channels == 3) {
245
+ cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
246
+ }
247
+ cv::Mat resized_cubic;
248
+ cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
249
+ return resized_cubic;
250
+ }
251
+
252
+
253
+
254
+ int invoke(const Args& args) {
255
+ std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
256
+ << "Image Path: " << args.imgs << "\n"
257
+ << "Inference Nums: " << args.invoke_nums << "\n"
258
+ << "Model Type: " << args.model_type << "\n";
259
+ Model* model = Model::create_instance(args.target_model);
260
+ if(model == nullptr){
261
+ printf("Create model failed !\n");
262
+ return EXIT_FAILURE;
263
+ }
264
+ Config* config = Config::create_instance();
265
+ if(config == nullptr){
266
+ printf("Create config failed !\n");
267
+ return EXIT_FAILURE;
268
+ }
269
+ config->implement_type = ImplementType::TYPE_LOCAL;
270
+ std::string model_type_lower = to_lower(args.model_type);
271
+ if (model_type_lower == "qnn"){
272
+ config->framework_type = FrameworkType::TYPE_QNN216;
273
+ } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
274
+ config->framework_type = FrameworkType::TYPE_SNPE2;
275
+ }
276
+ config->accelerate_type = AccelerateType::TYPE_DSP;
277
+ config->is_quantify_model = 1;
278
+
279
+ unsigned int model_h = 512;
280
+ unsigned int model_w = 1024;
281
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
282
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
283
+ model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
284
+ std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
285
+ if(fast_interpreter == nullptr){
286
+ printf("build_interpretper_from_model_and_config failed !\n");
287
+ return EXIT_FAILURE;
288
+ }
289
+ int result = fast_interpreter->init();
290
+ if(result != EXIT_SUCCESS){
291
+ printf("interpreter->init() failed !\n");
292
+ return EXIT_FAILURE;
293
+ }
294
+ // load model
295
+ fast_interpreter->load_model();
296
+ if(result != EXIT_SUCCESS){
297
+ printf("interpreter->load_model() failed !\n");
298
+ return EXIT_FAILURE;
299
+ }
300
+ printf("detect model load success!\n");
301
+
302
+ cv::Mat frame = cv::imread(args.imgs);
303
+ if (frame.empty()) {
304
+ printf("detect image load failed!\n");
305
+ return 1;
306
+ }
307
+ printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
308
+ cv::Mat input_data;
309
+ cv::Mat frame_clone = frame.clone();
310
+ cv::Scalar stds_scale(58.395, 57.12, 57.375);
311
+ cv::Scalar means_scale(123.675, 116.28, 103.53);
312
+ cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
313
+ frame_clone.convertTo(input_data, CV_32F);
314
+ cv::subtract(input_data, means_scale, input_data);
315
+ cv::divide(input_data, stds_scale, input_data);
316
+
317
+ float *outdata0 = nullptr;
318
+ std::vector<float> invoke_time;
319
+ for (int i = 0; i < args.invoke_nums; ++i) {
320
+ result = fast_interpreter->set_input_tensor(0, input_data.data);
321
+ if(result != EXIT_SUCCESS){
322
+ printf("interpreter->set_input_tensor() failed !\n");
323
+ return EXIT_FAILURE;
324
+ }
325
+ auto t1 = std::chrono::high_resolution_clock::now();
326
+ result = fast_interpreter->invoke();
327
+ auto t2 = std::chrono::high_resolution_clock::now();
328
+ std::chrono::duration<double> cost_time = t2 - t1;
329
+ invoke_time.push_back(cost_time.count() * 1000);
330
+ if(result != EXIT_SUCCESS){
331
+ printf("interpreter->invoke() failed !\n");
332
+ return EXIT_FAILURE;
333
+ }
334
+ uint32_t out_data_0 = 0;
335
+ result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
336
+ if(result != EXIT_SUCCESS){
337
+ printf("interpreter->get_output_tensor() 1 failed !\n");
338
+ return EXIT_FAILURE;
339
+ }
340
+
341
+ }
342
+
343
+ float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
344
+ float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
345
+ float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
346
+ float var_invoketime = 0.0f;
347
+ for (auto time : invoke_time) {
348
+ var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
349
+ }
350
+ var_invoketime /= args.invoke_nums;
351
+ printf("=======================================\n");
352
+ printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
353
+ args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
354
+ printf("=======================================\n");
355
+
356
+ cv::Mat img = post_process(frame, outdata0);
357
+ cv::imwrite("./results.png", img);
358
+ fast_interpreter->destory();
359
+ return 0;
360
+ }
361
+
362
+
363
+ int main(int argc, char* argv[]) {
364
+ Args args = parse_args(argc, argv);
365
+ return invoke(args);
366
+ }
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet40S_BBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49d2e1e62c7c9dec18911ab610f56725ed953d35e5fa2bb3df52d9dce6f8f25a
3
+ size 14403648
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import cv2
4
+ import sys
5
+ import time
6
+ import aidlite
7
+ import os
8
+
9
+
10
+ def decode_segmap(image, nc=19):
11
+ label_colors = np.array([(0, 0, 0), # 0=background
12
+ # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
13
+ (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
14
+ # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
15
+ (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
16
+ # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
17
+ (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
18
+ # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
19
+ (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
20
+ r = np.zeros_like(image).astype(np.uint8)
21
+ g = np.zeros_like(image).astype(np.uint8)
22
+ b = np.zeros_like(image).astype(np.uint8)
23
+ for l in range(0, nc):
24
+ idx = image == l
25
+ r[idx] = label_colors[l, 0]
26
+ g[idx] = label_colors[l, 1]
27
+ b[idx] = label_colors[l, 2]
28
+ rgb = np.stack([r, g, b], axis=2)
29
+ return rgb
30
+
31
+
32
+
33
+ class ffnet40sQnn:
34
+ def __init__(self):
35
+ super().__init__()
36
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"))
37
+ if self.model is None:
38
+ print("Create model failed !")
39
+ return
40
+
41
+ self.config = aidlite.Config.create_instance()
42
+ if self.config is None:
43
+ print("build_interpretper_from_model_and_config failed !")
44
+ return
45
+
46
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
47
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
48
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
49
+ # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
50
+ self.config.is_quantify_model = 1
51
+
52
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
53
+ if self.interpreter is None:
54
+ print("build_interpretper_from_model_and_config failed !")
55
+ return
56
+ input_shapes = [[1,512,1024,3]]
57
+ output_shapes = [[1,64,128,19]]
58
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
59
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
60
+
61
+ if self.interpreter is None:
62
+ print("build_interpretper_from_model_and_config failed !")
63
+ result = self.interpreter.init()
64
+ if result != 0:
65
+ print(f"interpreter init failed !")
66
+ result = self.interpreter.load_model()
67
+ if result != 0:
68
+ print("interpreter load model failed !")
69
+
70
+ print(" model load success!")
71
+
72
+ def __call__(self, input):
73
+ self.interpreter.set_input_tensor(0,input)
74
+ invoke_time=[]
75
+ invoke_nums =10
76
+ for i in range(invoke_nums):
77
+ result = self.interpreter.set_input_tensor(0, input.data)
78
+ if result != 0:
79
+ print("interpreter set_input_tensor() failed")
80
+ t1=time.time()
81
+ result = self.interpreter.invoke()
82
+ cost_time = (time.time()-t1)*1000
83
+ invoke_time.append(cost_time)
84
+
85
+ max_invoke_time = max(invoke_time)
86
+ min_invoke_time = min(invoke_time)
87
+ mean_invoke_time = sum(invoke_time)/invoke_nums
88
+ var_invoketime=np.var(invoke_time)
89
+ print("====================================")
90
+ print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
91
+ print("====================================")
92
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
93
+ return features_0
94
+
95
+
96
+
97
+
98
+
99
+ ffnet_segm = ffnet40sQnn()
100
+
101
+ frame_ct=0
102
+ image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
103
+
104
+ image = cv2.imread(image_path)
105
+ image=cv2.resize(image,(1024,512))
106
+ frame = np.ascontiguousarray(image[:,:,::-1])
107
+
108
+ mean_data=[123.675, 116.28, 103.53]
109
+ std_data=[58.395, 57.12, 57.375]
110
+ img_input = (frame-mean_data)/std_data # HWC
111
+ input = img_input.astype(np.float32)
112
+ input = input[np.newaxis, ...]
113
+ input_size = input.shape[1], input.shape[2] #H w
114
+ t0 = time.time()
115
+ out = ffnet_segm(input)
116
+ use_time = round((time.time() - t0) * 1000, 2)
117
+ print(f"pose detction inference_time:{use_time} ms")
118
+ out = np.transpose(out, (0, 3, 1,2))
119
+ out = torch.from_numpy(out)
120
+
121
+ output = torch.nn.functional.interpolate(
122
+ out, size=input_size, mode="bilinear", align_corners=False
123
+ )
124
+ output_data = torch.nn.functional.softmax(output, dim=1).data
125
+ max_probs, predictions = output_data.max(1)
126
+
127
+
128
+ prediction = predictions.numpy().astype(np.uint8)
129
+ test = decode_segmap( prediction[0])
130
+
131
+ cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
132
+ ffnet_segm.interpreter.destory()
133
+
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: 1x3x512x1024
4
+ - Number of parameters:13.911M
5
+ - Model size:53.56MB,
6
+ - Output shape: 1x19x64x128
7
+
8
+ Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: INT8
13
+ - Backend: QNN2.16
14
+ - Target Device: FV01 QCS6490
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### cpp
49
+ ```bash
50
+ cd ffnet40s/model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp
51
+ mkdir build && cd build
52
+ cmake ..
53
+ make
54
+ ./run_test
55
+ ```
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required (VERSION 3.5)
2
+ project("run_test")
3
+
4
+ find_package(OpenCV REQUIRED)
5
+
6
+ message(STATUS "oPENCV Library status:")
7
+ message(STATUS ">version:${OpenCV_VERSION}")
8
+ message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
9
+
10
+ set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
11
+
12
+ include_directories(
13
+ /usr/local/include
14
+ /usr/include/opencv4
15
+ )
16
+
17
+ link_directories(
18
+ /usr/local/lib/
19
+ )
20
+
21
+ file(GLOB SRC_LISTS
22
+ ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
23
+ )
24
+
25
+ add_executable(run_test ${SRC_LISTS})
26
+
27
+ target_link_libraries(run_test
28
+ aidlite
29
+ ${OpenCV_LIBS}
30
+ pthread
31
+ )
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <string>
3
+ #include <algorithm>
4
+ #include <cctype>
5
+ #include <cstring> // 用于 memcpy
6
+ #include <opencv2/opencv.hpp>
7
+ #include <aidlux/aidlite/aidlite.hpp>
8
+ #include <vector>
9
+ #include <numeric>
10
+ #include <cmath>
11
+ #include <array>
12
+ #include <cstdint>
13
+
14
+ using namespace cv;
15
+ using namespace std;
16
+ using namespace Aidlux::Aidlite;
17
+
18
+ // 定義顏色表 (19個類別)
19
+ const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
20
+ {0, 0, 0}, // 0=background
21
+ {128, 0, 0}, // 1=aeroplane
22
+ {0, 128, 0}, // 2=bicycle
23
+ {128, 128, 0}, // 3=bird
24
+ {0, 0, 128}, // 4=boat
25
+ {128, 0, 128}, // 5=bottle
26
+ {0, 128, 128}, // 6=bus
27
+ {128, 128, 128}, // 7=car
28
+ {64, 0, 0}, // 8=cat
29
+ {192, 0, 0}, // 9=chair
30
+ {64, 128, 0}, // 10=cow
31
+ {192, 128, 0}, // 11=dining table
32
+ {64, 0, 128}, // 12=dog
33
+ {192, 0, 128}, // 13=horse
34
+ {64, 128, 128}, // 14=motorbike
35
+ {192, 128, 128}, // 15=person
36
+ {0, 64, 0}, // 16=potted plant
37
+ {128, 64, 0}, // 17=sheep
38
+ {0, 192, 0}, // 18=sofa
39
+ }};
40
+
41
+ // 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
42
+ std::vector<float> bilinear_interpolate(
43
+ const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
44
+
45
+ std::vector<float> output(target_h * target_w * channels, 0.0f);
46
+ const float scale_h = static_cast<float>(src_h) / target_h;
47
+ const float scale_w = static_cast<float>(src_w) / target_w;
48
+
49
+ for (int y = 0; y < target_h; ++y) {
50
+ const float y_src = (y + 0.5f) * scale_h - 0.5f;
51
+ const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
52
+ const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
53
+ const float dy = y_src - y0;
54
+
55
+ for (int x = 0; x < target_w; ++x) {
56
+ const float x_src = (x + 0.5f) * scale_w - 0.5f;
57
+ const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
58
+ const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
59
+ const float dx = x_src - x0;
60
+
61
+ for (int c = 0; c < channels; ++c) {
62
+ const int src_idx = c * src_h * src_w;
63
+ const float val00 = input[src_idx + y0 * src_w + x0];
64
+ const float val01 = input[src_idx + y0 * src_w + x1];
65
+ const float val10 = input[src_idx + y1 * src_w + x0];
66
+ const float val11 = input[src_idx + y1 * src_w + x1];
67
+
68
+ const float val = (1 - dy) * (1 - dx) * val00 +
69
+ (1 - dy) * dx * val01 +
70
+ dy * (1 - dx) * val10 +
71
+ dy * dx * val11;
72
+
73
+ output[c * target_h * target_w + y * target_w + x] = val;
74
+ }
75
+ }
76
+ }
77
+ return output;
78
+ }
79
+
80
+ // Softmax 計算 (通道維度)
81
+ void softmax(float* data, int height, int width, int channels) {
82
+ for (int y = 0; y < height; ++y) {
83
+ for (int x = 0; x < width; ++x) {
84
+ float max_val = -INFINITY;
85
+ for (int c = 0; c < channels; ++c) {
86
+ const int idx = c * height * width + y * width + x;
87
+ max_val = std::max(max_val, data[idx]);
88
+ }
89
+
90
+ float sum_exp = 0.0f;
91
+ for (int c = 0; c < channels; ++c) {
92
+ const int idx = c * height * width + y * width + x;
93
+ sum_exp += std::exp(data[idx] - max_val);
94
+ }
95
+
96
+ for (int c = 0; c < channels; ++c) {
97
+ const int idx = c * height * width + y * width + x;
98
+ data[idx] = std::exp(data[idx] - max_val) / sum_exp;
99
+ }
100
+ }
101
+ }
102
+ }
103
+
104
+ // 提取最大類別索引
105
+ std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
106
+ std::vector<uint8_t> pred(height * width);
107
+ for (int y = 0; y < height; ++y) {
108
+ for (int x = 0; x < width; ++x) {
109
+ float max_prob = -INFINITY;
110
+ uint8_t max_idx = 0;
111
+ for (int c = 0; c < channels; ++c) {
112
+ const int idx = c * height * width + y * width + x;
113
+ if (data[idx] > max_prob) {
114
+ max_prob = data[idx];
115
+ max_idx = c;
116
+ }
117
+ }
118
+ pred[y * width + x] = max_idx;
119
+ }
120
+ }
121
+ return pred;
122
+ }
123
+
124
+ // 解碼為 RGB 圖像
125
+ std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
126
+ std::vector<uint8_t> rgb(height * width * 3);
127
+ for (int y = 0; y < height; ++y) {
128
+ for (int x = 0; x < width; ++x) {
129
+ const int idx = y * width + x;
130
+ const uint8_t label = pred[idx];
131
+ if (label < 19) {
132
+ rgb[idx * 3] = label_colors[label][0];
133
+ rgb[idx * 3 + 1] = label_colors[label][1];
134
+ rgb[idx * 3 + 2] = label_colors[label][2];
135
+ } else {
136
+ rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
137
+ }
138
+ }
139
+ }
140
+ return rgb;
141
+ }
142
+
143
+ struct Args {
144
+ std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin";
145
+ std::string imgs = "../2.png";
146
+ int invoke_nums = 10;
147
+ std::string model_type = "QNN";
148
+ };
149
+
150
+ Args parse_args(int argc, char* argv[]) {
151
+ Args args;
152
+ for (int i = 1; i < argc; ++i) {
153
+ std::string arg = argv[i];
154
+ if (arg == "--target_model" && i + 1 < argc) {
155
+ args.target_model = argv[++i];
156
+ } else if (arg == "--imgs" && i + 1 < argc) {
157
+ args.imgs = argv[++i];
158
+ } else if (arg == "--invoke_nums" && i + 1 < argc) {
159
+ args.invoke_nums = std::stoi(argv[++i]);
160
+ } else if (arg == "--model_type" && i + 1 < argc) {
161
+ args.model_type = argv[++i];
162
+ }
163
+ }
164
+ return args;
165
+ }
166
+
167
+ std::string to_lower(const std::string& str) {
168
+ std::string lower_str = str;
169
+ std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
170
+ return std::tolower(c);
171
+ });
172
+ return lower_str;
173
+ }
174
+
175
+ int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
176
+
177
+ int current_coordinate[4] = {0, 0, 0, 0};
178
+ for(int a = 0; a < src_dims[0]; ++a){
179
+ current_coordinate[0] = a;
180
+ for(int b = 0; b < src_dims[1]; ++b){
181
+ current_coordinate[1] = b;
182
+ for(int c = 0; c < src_dims[2]; ++c){
183
+ current_coordinate[2] = c;
184
+ for(int d = 0; d < src_dims[3]; ++d){
185
+ current_coordinate[3] = d;
186
+
187
+ int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
188
+ current_coordinate[1]*src_dims[2]*src_dims[3] +
189
+ current_coordinate[2]*src_dims[3] +
190
+ current_coordinate[3];
191
+
192
+ int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
193
+ current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
194
+ current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
195
+ current_coordinate[tsp_dims[3]];
196
+
197
+ dest[new_index] = src[old_index];
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ return EXIT_SUCCESS;
204
+ }
205
+
206
+ cv::Mat post_process(cv::Mat &frame, float* outdata)
207
+ {
208
+ cv::Mat input_image = frame.clone();
209
+ // Initialize vectors to hold respective outputs while unwrapping detections.
210
+ std::vector<int> class_ids;
211
+ std::vector<float> confidences;
212
+ std::vector<cv::Rect> boxes;
213
+ std::vector<cv::Mat> masks;
214
+ std::vector<float> class_scores;
215
+ cv::RNG rng;
216
+ cv::Mat masked_img;
217
+
218
+ unsigned int src_dims[4] = {1, 64,128,19};
219
+ unsigned int tsp_dims[4] = {0,3,1,2};
220
+ unsigned int stride_data_num = 1*64*128*19;
221
+ float* format_data = new float[stride_data_num];
222
+ transpose(outdata, src_dims, tsp_dims, format_data);
223
+ cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
224
+ std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
225
+
226
+ const int channels = 19;
227
+ int target_h = 64, target_w = 128;
228
+ int src_h = 64, src_w = 128;
229
+ // Step 1: 雙線性插值
230
+ auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
231
+
232
+ // Step 2: Softmax
233
+ softmax(interpolated.data(), target_h, target_w, channels);
234
+
235
+ // Step 3: 獲取預測類別
236
+ auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
237
+
238
+ printf("Processing finished.\n");
239
+ // Step 4: 解碼為 RGB
240
+ std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
241
+ cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
242
+
243
+ // Step 2: 转换颜色通道 (RGB → BGR)
244
+ if (channels == 3) {
245
+ cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
246
+ }
247
+ cv::Mat resized_cubic;
248
+ cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
249
+ return resized_cubic;
250
+ }
251
+
252
+
253
+
254
+ int invoke(const Args& args) {
255
+ std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
256
+ << "Image Path: " << args.imgs << "\n"
257
+ << "Inference Nums: " << args.invoke_nums << "\n"
258
+ << "Model Type: " << args.model_type << "\n";
259
+ Model* model = Model::create_instance(args.target_model);
260
+ if(model == nullptr){
261
+ printf("Create model failed !\n");
262
+ return EXIT_FAILURE;
263
+ }
264
+ Config* config = Config::create_instance();
265
+ if(config == nullptr){
266
+ printf("Create config failed !\n");
267
+ return EXIT_FAILURE;
268
+ }
269
+ config->implement_type = ImplementType::TYPE_LOCAL;
270
+ std::string model_type_lower = to_lower(args.model_type);
271
+ if (model_type_lower == "qnn"){
272
+ config->framework_type = FrameworkType::TYPE_QNN216;
273
+ } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
274
+ config->framework_type = FrameworkType::TYPE_SNPE2;
275
+ }
276
+ config->accelerate_type = AccelerateType::TYPE_DSP;
277
+ config->is_quantify_model = 1;
278
+
279
+ unsigned int model_h = 512;
280
+ unsigned int model_w = 1024;
281
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
282
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
283
+ model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
284
+ std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
285
+ if(fast_interpreter == nullptr){
286
+ printf("build_interpretper_from_model_and_config failed !\n");
287
+ return EXIT_FAILURE;
288
+ }
289
+ int result = fast_interpreter->init();
290
+ if(result != EXIT_SUCCESS){
291
+ printf("interpreter->init() failed !\n");
292
+ return EXIT_FAILURE;
293
+ }
294
+ // load model
295
+ fast_interpreter->load_model();
296
+ if(result != EXIT_SUCCESS){
297
+ printf("interpreter->load_model() failed !\n");
298
+ return EXIT_FAILURE;
299
+ }
300
+ printf("detect model load success!\n");
301
+
302
+ cv::Mat frame = cv::imread(args.imgs);
303
+ if (frame.empty()) {
304
+ printf("detect image load failed!\n");
305
+ return 1;
306
+ }
307
+ printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
308
+ cv::Mat input_data;
309
+ cv::Mat frame_clone = frame.clone();
310
+ cv::Scalar stds_scale(58.395, 57.12, 57.375);
311
+ cv::Scalar means_scale(123.675, 116.28, 103.53);
312
+ cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
313
+ frame_clone.convertTo(input_data, CV_32F);
314
+ cv::subtract(input_data, means_scale, input_data);
315
+ cv::divide(input_data, stds_scale, input_data);
316
+
317
+ float *outdata0 = nullptr;
318
+ std::vector<float> invoke_time;
319
+ for (int i = 0; i < args.invoke_nums; ++i) {
320
+ result = fast_interpreter->set_input_tensor(0, input_data.data);
321
+ if(result != EXIT_SUCCESS){
322
+ printf("interpreter->set_input_tensor() failed !\n");
323
+ return EXIT_FAILURE;
324
+ }
325
+ auto t1 = std::chrono::high_resolution_clock::now();
326
+ result = fast_interpreter->invoke();
327
+ auto t2 = std::chrono::high_resolution_clock::now();
328
+ std::chrono::duration<double> cost_time = t2 - t1;
329
+ invoke_time.push_back(cost_time.count() * 1000);
330
+ if(result != EXIT_SUCCESS){
331
+ printf("interpreter->invoke() failed !\n");
332
+ return EXIT_FAILURE;
333
+ }
334
+ uint32_t out_data_0 = 0;
335
+ result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
336
+ if(result != EXIT_SUCCESS){
337
+ printf("interpreter->get_output_tensor() 1 failed !\n");
338
+ return EXIT_FAILURE;
339
+ }
340
+
341
+ }
342
+
343
+ float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
344
+ float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
345
+ float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
346
+ float var_invoketime = 0.0f;
347
+ for (auto time : invoke_time) {
348
+ var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
349
+ }
350
+ var_invoketime /= args.invoke_nums;
351
+ printf("=======================================\n");
352
+ printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
353
+ args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
354
+ printf("=======================================\n");
355
+
356
+ cv::Mat img = post_process(frame, outdata0);
357
+ cv::imwrite("./results.png", img);
358
+ fast_interpreter->destory();
359
+ return 0;
360
+ }
361
+
362
+
363
+ int main(int argc, char* argv[]) {
364
+ Args args = parse_args(argc, argv);
365
+ return invoke(args);
366
+ }
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50c93cf5c0f77e1db8c6ef4708fd80a3f0b2fc7be2958b2afa55d3ce3b84e4cf
3
+ size 28144512
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import cv2
4
+ import sys
5
+ import time
6
+ import aidlite
7
+ import os
8
+
9
+
10
+ def decode_segmap(image, nc=19):
11
+ label_colors = np.array([(0, 0, 0), # 0=background
12
+ # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
13
+ (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
14
+ # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
15
+ (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
16
+ # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
17
+ (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
18
+ # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
19
+ (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
20
+ r = np.zeros_like(image).astype(np.uint8)
21
+ g = np.zeros_like(image).astype(np.uint8)
22
+ b = np.zeros_like(image).astype(np.uint8)
23
+ for l in range(0, nc):
24
+ idx = image == l
25
+ r[idx] = label_colors[l, 0]
26
+ g[idx] = label_colors[l, 1]
27
+ b[idx] = label_colors[l, 2]
28
+ rgb = np.stack([r, g, b], axis=2)
29
+ return rgb
30
+
31
+
32
+
33
+ class ffnet40sQnn:
34
+ def __init__(self):
35
+ super().__init__()
36
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin"))
37
+ if self.model is None:
38
+ print("Create model failed !")
39
+ return
40
+
41
+ self.config = aidlite.Config.create_instance()
42
+ if self.config is None:
43
+ print("build_interpretper_from_model_and_config failed !")
44
+ return
45
+
46
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
47
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
48
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
49
+ # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
50
+ self.config.is_quantify_model = 1
51
+
52
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
53
+ if self.interpreter is None:
54
+ print("build_interpretper_from_model_and_config failed !")
55
+ return
56
+ input_shapes = [[1,512,1024,3]]
57
+ output_shapes = [[1,64,128,19]]
58
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
59
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
60
+
61
+ if self.interpreter is None:
62
+ print("build_interpretper_from_model_and_config failed !")
63
+ result = self.interpreter.init()
64
+ if result != 0:
65
+ print(f"interpreter init failed !")
66
+ result = self.interpreter.load_model()
67
+ if result != 0:
68
+ print("interpreter load model failed !")
69
+
70
+ print(" model load success!")
71
+
72
+ def __call__(self, input):
73
+ self.interpreter.set_input_tensor(0,input)
74
+ invoke_time=[]
75
+ invoke_nums =10
76
+ for i in range(invoke_nums):
77
+ result = self.interpreter.set_input_tensor(0, input.data)
78
+ if result != 0:
79
+ print("interpreter set_input_tensor() failed")
80
+ t1=time.time()
81
+ result = self.interpreter.invoke()
82
+ cost_time = (time.time()-t1)*1000
83
+ invoke_time.append(cost_time)
84
+
85
+ max_invoke_time = max(invoke_time)
86
+ min_invoke_time = min(invoke_time)
87
+ mean_invoke_time = sum(invoke_time)/invoke_nums
88
+ var_invoketime=np.var(invoke_time)
89
+ print("====================================")
90
+ print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
91
+ print("====================================")
92
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
93
+ return features_0
94
+
95
+
96
+
97
+
98
+
99
+ ffnet_segm = ffnet40sQnn()
100
+
101
+ frame_ct=0
102
+ image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
103
+
104
+ image = cv2.imread(image_path)
105
+ image=cv2.resize(image,(1024,512))
106
+ frame = np.ascontiguousarray(image[:,:,::-1])
107
+
108
+ mean_data=[123.675, 116.28, 103.53]
109
+ std_data=[58.395, 57.12, 57.375]
110
+ img_input = (frame-mean_data)/std_data # HWC
111
+ input = img_input.astype(np.float32)
112
+ input = input[np.newaxis, ...]
113
+ input_size = input.shape[1], input.shape[2] #H w
114
+ t0 = time.time()
115
+ out = ffnet_segm(input)
116
+ use_time = round((time.time() - t0) * 1000, 2)
117
+ print(f"pose detction inference_time:{use_time} ms")
118
+ out = np.transpose(out, (0, 3, 1,2))
119
+ out = torch.from_numpy(out)
120
+
121
+ output = torch.nn.functional.interpolate(
122
+ out, size=input_size, mode="bilinear", align_corners=False
123
+ )
124
+ output_data = torch.nn.functional.softmax(output, dim=1).data
125
+ max_probs, predictions = output_data.max(1)
126
+
127
+
128
+ prediction = predictions.numpy().astype(np.uint8)
129
+ test = decode_segmap( prediction[0])
130
+
131
+ cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
132
+ ffnet_segm.interpreter.destory()
133
+
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: 1x3x512x1024
4
+ - Number of parameters:13.911M
5
+ - Model size:53.56MB,
6
+ - Output shape: 1x19x64x128
7
+
8
+ Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: INT8
13
+ - Backend: QNN2.16
14
+ - Target Device: FV01 QCS6490
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### cpp
49
+ ```bash
50
+ cd ffnet40s/model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp
51
+ mkdir build && cd build
52
+ cmake ..
53
+ make
54
+ ./run_test
55
+ ```
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required (VERSION 3.5)
2
+ project("run_test")
3
+
4
+ find_package(OpenCV REQUIRED)
5
+
6
+ message(STATUS "oPENCV Library status:")
7
+ message(STATUS ">version:${OpenCV_VERSION}")
8
+ message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
9
+
10
+ set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
11
+
12
+ include_directories(
13
+ /usr/local/include
14
+ /usr/include/opencv4
15
+ )
16
+
17
+ link_directories(
18
+ /usr/local/lib/
19
+ )
20
+
21
+ file(GLOB SRC_LISTS
22
+ ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
23
+ )
24
+
25
+ add_executable(run_test ${SRC_LISTS})
26
+
27
+ target_link_libraries(run_test
28
+ aidlite
29
+ ${OpenCV_LIBS}
30
+ pthread
31
+ )
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <string>
3
+ #include <algorithm>
4
+ #include <cctype>
5
+ #include <cstring> // 用于 memcpy
6
+ #include <opencv2/opencv.hpp>
7
+ #include <aidlux/aidlite/aidlite.hpp>
8
+ #include <vector>
9
+ #include <numeric>
10
+ #include <cmath>
11
+ #include <array>
12
+ #include <cstdint>
13
+
14
+ using namespace cv;
15
+ using namespace std;
16
+ using namespace Aidlux::Aidlite;
17
+
18
+ // 定義顏色表 (19個類別)
19
+ const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
20
+ {0, 0, 0}, // 0=background
21
+ {128, 0, 0}, // 1=aeroplane
22
+ {0, 128, 0}, // 2=bicycle
23
+ {128, 128, 0}, // 3=bird
24
+ {0, 0, 128}, // 4=boat
25
+ {128, 0, 128}, // 5=bottle
26
+ {0, 128, 128}, // 6=bus
27
+ {128, 128, 128}, // 7=car
28
+ {64, 0, 0}, // 8=cat
29
+ {192, 0, 0}, // 9=chair
30
+ {64, 128, 0}, // 10=cow
31
+ {192, 128, 0}, // 11=dining table
32
+ {64, 0, 128}, // 12=dog
33
+ {192, 0, 128}, // 13=horse
34
+ {64, 128, 128}, // 14=motorbike
35
+ {192, 128, 128}, // 15=person
36
+ {0, 64, 0}, // 16=potted plant
37
+ {128, 64, 0}, // 17=sheep
38
+ {0, 192, 0}, // 18=sofa
39
+ }};
40
+
41
+ // 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
42
+ std::vector<float> bilinear_interpolate(
43
+ const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
44
+
45
+ std::vector<float> output(target_h * target_w * channels, 0.0f);
46
+ const float scale_h = static_cast<float>(src_h) / target_h;
47
+ const float scale_w = static_cast<float>(src_w) / target_w;
48
+
49
+ for (int y = 0; y < target_h; ++y) {
50
+ const float y_src = (y + 0.5f) * scale_h - 0.5f;
51
+ const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
52
+ const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
53
+ const float dy = y_src - y0;
54
+
55
+ for (int x = 0; x < target_w; ++x) {
56
+ const float x_src = (x + 0.5f) * scale_w - 0.5f;
57
+ const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
58
+ const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
59
+ const float dx = x_src - x0;
60
+
61
+ for (int c = 0; c < channels; ++c) {
62
+ const int src_idx = c * src_h * src_w;
63
+ const float val00 = input[src_idx + y0 * src_w + x0];
64
+ const float val01 = input[src_idx + y0 * src_w + x1];
65
+ const float val10 = input[src_idx + y1 * src_w + x0];
66
+ const float val11 = input[src_idx + y1 * src_w + x1];
67
+
68
+ const float val = (1 - dy) * (1 - dx) * val00 +
69
+ (1 - dy) * dx * val01 +
70
+ dy * (1 - dx) * val10 +
71
+ dy * dx * val11;
72
+
73
+ output[c * target_h * target_w + y * target_w + x] = val;
74
+ }
75
+ }
76
+ }
77
+ return output;
78
+ }
79
+
80
+ // Softmax 計算 (通道維度)
81
+ void softmax(float* data, int height, int width, int channels) {
82
+ for (int y = 0; y < height; ++y) {
83
+ for (int x = 0; x < width; ++x) {
84
+ float max_val = -INFINITY;
85
+ for (int c = 0; c < channels; ++c) {
86
+ const int idx = c * height * width + y * width + x;
87
+ max_val = std::max(max_val, data[idx]);
88
+ }
89
+
90
+ float sum_exp = 0.0f;
91
+ for (int c = 0; c < channels; ++c) {
92
+ const int idx = c * height * width + y * width + x;
93
+ sum_exp += std::exp(data[idx] - max_val);
94
+ }
95
+
96
+ for (int c = 0; c < channels; ++c) {
97
+ const int idx = c * height * width + y * width + x;
98
+ data[idx] = std::exp(data[idx] - max_val) / sum_exp;
99
+ }
100
+ }
101
+ }
102
+ }
103
+
104
+ // 提取最大類別索引
105
+ std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
106
+ std::vector<uint8_t> pred(height * width);
107
+ for (int y = 0; y < height; ++y) {
108
+ for (int x = 0; x < width; ++x) {
109
+ float max_prob = -INFINITY;
110
+ uint8_t max_idx = 0;
111
+ for (int c = 0; c < channels; ++c) {
112
+ const int idx = c * height * width + y * width + x;
113
+ if (data[idx] > max_prob) {
114
+ max_prob = data[idx];
115
+ max_idx = c;
116
+ }
117
+ }
118
+ pred[y * width + x] = max_idx;
119
+ }
120
+ }
121
+ return pred;
122
+ }
123
+
124
+ // 解碼為 RGB 圖像
125
+ std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
126
+ std::vector<uint8_t> rgb(height * width * 3);
127
+ for (int y = 0; y < height; ++y) {
128
+ for (int x = 0; x < width; ++x) {
129
+ const int idx = y * width + x;
130
+ const uint8_t label = pred[idx];
131
+ if (label < 19) {
132
+ rgb[idx * 3] = label_colors[label][0];
133
+ rgb[idx * 3 + 1] = label_colors[label][1];
134
+ rgb[idx * 3 + 2] = label_colors[label][2];
135
+ } else {
136
+ rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
137
+ }
138
+ }
139
+ }
140
+ return rgb;
141
+ }
142
+
143
+ struct Args {
144
+ std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin";
145
+ std::string imgs = "../2.png";
146
+ int invoke_nums = 10;
147
+ std::string model_type = "QNN";
148
+ };
149
+
150
+ Args parse_args(int argc, char* argv[]) {
151
+ Args args;
152
+ for (int i = 1; i < argc; ++i) {
153
+ std::string arg = argv[i];
154
+ if (arg == "--target_model" && i + 1 < argc) {
155
+ args.target_model = argv[++i];
156
+ } else if (arg == "--imgs" && i + 1 < argc) {
157
+ args.imgs = argv[++i];
158
+ } else if (arg == "--invoke_nums" && i + 1 < argc) {
159
+ args.invoke_nums = std::stoi(argv[++i]);
160
+ } else if (arg == "--model_type" && i + 1 < argc) {
161
+ args.model_type = argv[++i];
162
+ }
163
+ }
164
+ return args;
165
+ }
166
+
167
+ std::string to_lower(const std::string& str) {
168
+ std::string lower_str = str;
169
+ std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
170
+ return std::tolower(c);
171
+ });
172
+ return lower_str;
173
+ }
174
+
175
+ int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
176
+
177
+ int current_coordinate[4] = {0, 0, 0, 0};
178
+ for(int a = 0; a < src_dims[0]; ++a){
179
+ current_coordinate[0] = a;
180
+ for(int b = 0; b < src_dims[1]; ++b){
181
+ current_coordinate[1] = b;
182
+ for(int c = 0; c < src_dims[2]; ++c){
183
+ current_coordinate[2] = c;
184
+ for(int d = 0; d < src_dims[3]; ++d){
185
+ current_coordinate[3] = d;
186
+
187
+ int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
188
+ current_coordinate[1]*src_dims[2]*src_dims[3] +
189
+ current_coordinate[2]*src_dims[3] +
190
+ current_coordinate[3];
191
+
192
+ int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
193
+ current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
194
+ current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
195
+ current_coordinate[tsp_dims[3]];
196
+
197
+ dest[new_index] = src[old_index];
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ return EXIT_SUCCESS;
204
+ }
205
+
206
+ cv::Mat post_process(cv::Mat &frame, float* outdata)
207
+ {
208
+ cv::Mat input_image = frame.clone();
209
+ // Initialize vectors to hold respective outputs while unwrapping detections.
210
+ std::vector<int> class_ids;
211
+ std::vector<float> confidences;
212
+ std::vector<cv::Rect> boxes;
213
+ std::vector<cv::Mat> masks;
214
+ std::vector<float> class_scores;
215
+ cv::RNG rng;
216
+ cv::Mat masked_img;
217
+
218
+ unsigned int src_dims[4] = {1, 64,128,19};
219
+ unsigned int tsp_dims[4] = {0,3,1,2};
220
+ unsigned int stride_data_num = 1*64*128*19;
221
+ float* format_data = new float[stride_data_num];
222
+ transpose(outdata, src_dims, tsp_dims, format_data);
223
+ cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
224
+ std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
225
+
226
+ const int channels = 19;
227
+ int target_h = 64, target_w = 128;
228
+ int src_h = 64, src_w = 128;
229
+ // Step 1: 雙線性插值
230
+ auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
231
+
232
+ // Step 2: Softmax
233
+ softmax(interpolated.data(), target_h, target_w, channels);
234
+
235
+ // Step 3: 獲取預測類別
236
+ auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
237
+
238
+ printf("Processing finished.\n");
239
+ // Step 4: 解碼為 RGB
240
+ std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
241
+ cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
242
+
243
+ // Step 2: 转换颜色通道 (RGB → BGR)
244
+ if (channels == 3) {
245
+ cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
246
+ }
247
+ cv::Mat resized_cubic;
248
+ cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
249
+ return resized_cubic;
250
+ }
251
+
252
+
253
+
254
+ int invoke(const Args& args) {
255
+ std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
256
+ << "Image Path: " << args.imgs << "\n"
257
+ << "Inference Nums: " << args.invoke_nums << "\n"
258
+ << "Model Type: " << args.model_type << "\n";
259
+ Model* model = Model::create_instance(args.target_model);
260
+ if(model == nullptr){
261
+ printf("Create model failed !\n");
262
+ return EXIT_FAILURE;
263
+ }
264
+ Config* config = Config::create_instance();
265
+ if(config == nullptr){
266
+ printf("Create config failed !\n");
267
+ return EXIT_FAILURE;
268
+ }
269
+ config->implement_type = ImplementType::TYPE_LOCAL;
270
+ std::string model_type_lower = to_lower(args.model_type);
271
+ if (model_type_lower == "qnn"){
272
+ config->framework_type = FrameworkType::TYPE_QNN216;
273
+ } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
274
+ config->framework_type = FrameworkType::TYPE_SNPE2;
275
+ }
276
+ config->accelerate_type = AccelerateType::TYPE_DSP;
277
+ config->is_quantify_model = 1;
278
+
279
+ unsigned int model_h = 512;
280
+ unsigned int model_w = 1024;
281
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
282
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
283
+ model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
284
+ std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
285
+ if(fast_interpreter == nullptr){
286
+ printf("build_interpretper_from_model_and_config failed !\n");
287
+ return EXIT_FAILURE;
288
+ }
289
+ int result = fast_interpreter->init();
290
+ if(result != EXIT_SUCCESS){
291
+ printf("interpreter->init() failed !\n");
292
+ return EXIT_FAILURE;
293
+ }
294
+ // load model
295
+ fast_interpreter->load_model();
296
+ if(result != EXIT_SUCCESS){
297
+ printf("interpreter->load_model() failed !\n");
298
+ return EXIT_FAILURE;
299
+ }
300
+ printf("detect model load success!\n");
301
+
302
+ cv::Mat frame = cv::imread(args.imgs);
303
+ if (frame.empty()) {
304
+ printf("detect image load failed!\n");
305
+ return 1;
306
+ }
307
+ printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
308
+ cv::Mat input_data;
309
+ cv::Mat frame_clone = frame.clone();
310
+ cv::Scalar stds_scale(58.395, 57.12, 57.375);
311
+ cv::Scalar means_scale(123.675, 116.28, 103.53);
312
+ cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
313
+ frame_clone.convertTo(input_data, CV_32F);
314
+ cv::subtract(input_data, means_scale, input_data);
315
+ cv::divide(input_data, stds_scale, input_data);
316
+
317
+ float *outdata0 = nullptr;
318
+ std::vector<float> invoke_time;
319
+ for (int i = 0; i < args.invoke_nums; ++i) {
320
+ result = fast_interpreter->set_input_tensor(0, input_data.data);
321
+ if(result != EXIT_SUCCESS){
322
+ printf("interpreter->set_input_tensor() failed !\n");
323
+ return EXIT_FAILURE;
324
+ }
325
+ auto t1 = std::chrono::high_resolution_clock::now();
326
+ result = fast_interpreter->invoke();
327
+ auto t2 = std::chrono::high_resolution_clock::now();
328
+ std::chrono::duration<double> cost_time = t2 - t1;
329
+ invoke_time.push_back(cost_time.count() * 1000);
330
+ if(result != EXIT_SUCCESS){
331
+ printf("interpreter->invoke() failed !\n");
332
+ return EXIT_FAILURE;
333
+ }
334
+ uint32_t out_data_0 = 0;
335
+ result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
336
+ if(result != EXIT_SUCCESS){
337
+ printf("interpreter->get_output_tensor() 1 failed !\n");
338
+ return EXIT_FAILURE;
339
+ }
340
+
341
+ }
342
+
343
+ float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
344
+ float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
345
+ float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
346
+ float var_invoketime = 0.0f;
347
+ for (auto time : invoke_time) {
348
+ var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
349
+ }
350
+ var_invoketime /= args.invoke_nums;
351
+ printf("=======================================\n");
352
+ printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
353
+ args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
354
+ printf("=======================================\n");
355
+
356
+ cv::Mat img = post_process(frame, outdata0);
357
+ cv::imwrite("./results.png", img);
358
+ fast_interpreter->destory();
359
+ return 0;
360
+ }
361
+
362
+
363
+ int main(int argc, char* argv[]) {
364
+ Args args = parse_args(argc, argv);
365
+ return invoke(args);
366
+ }
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1195497a2b5198ec425069d2c62685e1398ee58cd51ba20fec110042937d23bd
3
+ size 14129152
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import cv2
4
+ import sys
5
+ import time
6
+ import aidlite
7
+ import os
8
+
9
+
10
+ def decode_segmap(image, nc=19):
11
+ label_colors = np.array([(0, 0, 0), # 0=background
12
+ # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
13
+ (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
14
+ # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
15
+ (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
16
+ # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
17
+ (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
18
+ # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
19
+ (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
20
+ r = np.zeros_like(image).astype(np.uint8)
21
+ g = np.zeros_like(image).astype(np.uint8)
22
+ b = np.zeros_like(image).astype(np.uint8)
23
+ for l in range(0, nc):
24
+ idx = image == l
25
+ r[idx] = label_colors[l, 0]
26
+ g[idx] = label_colors[l, 1]
27
+ b[idx] = label_colors[l, 2]
28
+ rgb = np.stack([r, g, b], axis=2)
29
+ return rgb
30
+
31
+
32
+
33
+ class ffnet40sQnn:
34
+ def __init__(self):
35
+ super().__init__()
36
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"))
37
+ if self.model is None:
38
+ print("Create model failed !")
39
+ return
40
+
41
+ self.config = aidlite.Config.create_instance()
42
+ if self.config is None:
43
+ print("build_interpretper_from_model_and_config failed !")
44
+ return
45
+
46
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
47
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
48
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
49
+ # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
50
+ self.config.is_quantify_model = 1
51
+
52
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
53
+ if self.interpreter is None:
54
+ print("build_interpretper_from_model_and_config failed !")
55
+ return
56
+ input_shapes = [[1,512,1024,3]]
57
+ output_shapes = [[1,64,128,19]]
58
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
59
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
60
+
61
+ if self.interpreter is None:
62
+ print("build_interpretper_from_model_and_config failed !")
63
+ result = self.interpreter.init()
64
+ if result != 0:
65
+ print(f"interpreter init failed !")
66
+ result = self.interpreter.load_model()
67
+ if result != 0:
68
+ print("interpreter load model failed !")
69
+
70
+ print(" model load success!")
71
+
72
+ def __call__(self, input):
73
+ self.interpreter.set_input_tensor(0,input)
74
+ invoke_time=[]
75
+ invoke_nums =10
76
+ for i in range(invoke_nums):
77
+ result = self.interpreter.set_input_tensor(0, input.data)
78
+ if result != 0:
79
+ print("interpreter set_input_tensor() failed")
80
+ t1=time.time()
81
+ result = self.interpreter.invoke()
82
+ cost_time = (time.time()-t1)*1000
83
+ invoke_time.append(cost_time)
84
+
85
+ max_invoke_time = max(invoke_time)
86
+ min_invoke_time = min(invoke_time)
87
+ mean_invoke_time = sum(invoke_time)/invoke_nums
88
+ var_invoketime=np.var(invoke_time)
89
+ print("====================================")
90
+ print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
91
+ print("====================================")
92
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
93
+ return features_0
94
+
95
+
96
+
97
+
98
+
99
+ ffnet_segm = ffnet40sQnn()
100
+
101
+ frame_ct=0
102
+ image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
103
+
104
+ image = cv2.imread(image_path)
105
+ image=cv2.resize(image,(1024,512))
106
+ frame = np.ascontiguousarray(image[:,:,::-1])
107
+
108
+ mean_data=[123.675, 116.28, 103.53]
109
+ std_data=[58.395, 57.12, 57.375]
110
+ img_input = (frame-mean_data)/std_data # HWC
111
+ input = img_input.astype(np.float32)
112
+ input = input[np.newaxis, ...]
113
+ input_size = input.shape[1], input.shape[2] #H w
114
+ t0 = time.time()
115
+ out = ffnet_segm(input)
116
+ use_time = round((time.time() - t0) * 1000, 2)
117
+ print(f"pose detction inference_time:{use_time} ms")
118
+ out = np.transpose(out, (0, 3, 1,2))
119
+ out = torch.from_numpy(out)
120
+
121
+ output = torch.nn.functional.interpolate(
122
+ out, size=input_size, mode="bilinear", align_corners=False
123
+ )
124
+ output_data = torch.nn.functional.softmax(output, dim=1).data
125
+ max_probs, predictions = output_data.max(1)
126
+
127
+
128
+ prediction = predictions.numpy().astype(np.uint8)
129
+ test = decode_segmap( prediction[0])
130
+
131
+ cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
132
+ ffnet_segm.interpreter.destory()
133
+
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: 1x3x512x1024
4
+ - Number of parameters:13.911M
5
+ - Model size:53.56MB,
6
+ - Output shape: 1x19x64x128
7
+
8
+ Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: INT8
13
+ - Backend: QNN2.16
14
+ - Target Device: FV01 QCS6490
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### cpp
49
+ ```bash
50
+ cd ffnet40s/model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp
51
+ mkdir build && cd build
52
+ cmake ..
53
+ make
54
+ ./run_test
55
+ ```
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required (VERSION 3.5)
2
+ project("run_test")
3
+
4
+ find_package(OpenCV REQUIRED)
5
+
6
+ message(STATUS "oPENCV Library status:")
7
+ message(STATUS ">version:${OpenCV_VERSION}")
8
+ message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
9
+
10
+ set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
11
+
12
+ include_directories(
13
+ /usr/local/include
14
+ /usr/include/opencv4
15
+ )
16
+
17
+ link_directories(
18
+ /usr/local/lib/
19
+ )
20
+
21
+ file(GLOB SRC_LISTS
22
+ ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
23
+ )
24
+
25
+ add_executable(run_test ${SRC_LISTS})
26
+
27
+ target_link_libraries(run_test
28
+ aidlite
29
+ ${OpenCV_LIBS}
30
+ pthread
31
+ )
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <string>
3
+ #include <algorithm>
4
+ #include <cctype>
5
+ #include <cstring> // 用于 memcpy
6
+ #include <opencv2/opencv.hpp>
7
+ #include <aidlux/aidlite/aidlite.hpp>
8
+ #include <vector>
9
+ #include <numeric>
10
+ #include <cmath>
11
+ #include <array>
12
+ #include <cstdint>
13
+
14
+ using namespace cv;
15
+ using namespace std;
16
+ using namespace Aidlux::Aidlite;
17
+
18
+ // 定義顏色表 (19個類別)
19
+ const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
20
+ {0, 0, 0}, // 0=background
21
+ {128, 0, 0}, // 1=aeroplane
22
+ {0, 128, 0}, // 2=bicycle
23
+ {128, 128, 0}, // 3=bird
24
+ {0, 0, 128}, // 4=boat
25
+ {128, 0, 128}, // 5=bottle
26
+ {0, 128, 128}, // 6=bus
27
+ {128, 128, 128}, // 7=car
28
+ {64, 0, 0}, // 8=cat
29
+ {192, 0, 0}, // 9=chair
30
+ {64, 128, 0}, // 10=cow
31
+ {192, 128, 0}, // 11=dining table
32
+ {64, 0, 128}, // 12=dog
33
+ {192, 0, 128}, // 13=horse
34
+ {64, 128, 128}, // 14=motorbike
35
+ {192, 128, 128}, // 15=person
36
+ {0, 64, 0}, // 16=potted plant
37
+ {128, 64, 0}, // 17=sheep
38
+ {0, 192, 0}, // 18=sofa
39
+ }};
40
+
41
+ // 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
42
+ std::vector<float> bilinear_interpolate(
43
+ const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
44
+
45
+ std::vector<float> output(target_h * target_w * channels, 0.0f);
46
+ const float scale_h = static_cast<float>(src_h) / target_h;
47
+ const float scale_w = static_cast<float>(src_w) / target_w;
48
+
49
+ for (int y = 0; y < target_h; ++y) {
50
+ const float y_src = (y + 0.5f) * scale_h - 0.5f;
51
+ const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
52
+ const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
53
+ const float dy = y_src - y0;
54
+
55
+ for (int x = 0; x < target_w; ++x) {
56
+ const float x_src = (x + 0.5f) * scale_w - 0.5f;
57
+ const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
58
+ const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
59
+ const float dx = x_src - x0;
60
+
61
+ for (int c = 0; c < channels; ++c) {
62
+ const int src_idx = c * src_h * src_w;
63
+ const float val00 = input[src_idx + y0 * src_w + x0];
64
+ const float val01 = input[src_idx + y0 * src_w + x1];
65
+ const float val10 = input[src_idx + y1 * src_w + x0];
66
+ const float val11 = input[src_idx + y1 * src_w + x1];
67
+
68
+ const float val = (1 - dy) * (1 - dx) * val00 +
69
+ (1 - dy) * dx * val01 +
70
+ dy * (1 - dx) * val10 +
71
+ dy * dx * val11;
72
+
73
+ output[c * target_h * target_w + y * target_w + x] = val;
74
+ }
75
+ }
76
+ }
77
+ return output;
78
+ }
79
+
80
+ // Softmax 計算 (通道維度)
81
+ void softmax(float* data, int height, int width, int channels) {
82
+ for (int y = 0; y < height; ++y) {
83
+ for (int x = 0; x < width; ++x) {
84
+ float max_val = -INFINITY;
85
+ for (int c = 0; c < channels; ++c) {
86
+ const int idx = c * height * width + y * width + x;
87
+ max_val = std::max(max_val, data[idx]);
88
+ }
89
+
90
+ float sum_exp = 0.0f;
91
+ for (int c = 0; c < channels; ++c) {
92
+ const int idx = c * height * width + y * width + x;
93
+ sum_exp += std::exp(data[idx] - max_val);
94
+ }
95
+
96
+ for (int c = 0; c < channels; ++c) {
97
+ const int idx = c * height * width + y * width + x;
98
+ data[idx] = std::exp(data[idx] - max_val) / sum_exp;
99
+ }
100
+ }
101
+ }
102
+ }
103
+
104
+ // 提取最大類別索引
105
+ std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
106
+ std::vector<uint8_t> pred(height * width);
107
+ for (int y = 0; y < height; ++y) {
108
+ for (int x = 0; x < width; ++x) {
109
+ float max_prob = -INFINITY;
110
+ uint8_t max_idx = 0;
111
+ for (int c = 0; c < channels; ++c) {
112
+ const int idx = c * height * width + y * width + x;
113
+ if (data[idx] > max_prob) {
114
+ max_prob = data[idx];
115
+ max_idx = c;
116
+ }
117
+ }
118
+ pred[y * width + x] = max_idx;
119
+ }
120
+ }
121
+ return pred;
122
+ }
123
+
124
+ // 解碼為 RGB 圖像
125
+ std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
126
+ std::vector<uint8_t> rgb(height * width * 3);
127
+ for (int y = 0; y < height; ++y) {
128
+ for (int x = 0; x < width; ++x) {
129
+ const int idx = y * width + x;
130
+ const uint8_t label = pred[idx];
131
+ if (label < 19) {
132
+ rgb[idx * 3] = label_colors[label][0];
133
+ rgb[idx * 3 + 1] = label_colors[label][1];
134
+ rgb[idx * 3 + 2] = label_colors[label][2];
135
+ } else {
136
+ rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
137
+ }
138
+ }
139
+ }
140
+ return rgb;
141
+ }
142
+
143
+ struct Args {
144
+ std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin";
145
+ std::string imgs = "../2.png";
146
+ int invoke_nums = 10;
147
+ std::string model_type = "QNN";
148
+ };
149
+
150
+ Args parse_args(int argc, char* argv[]) {
151
+ Args args;
152
+ for (int i = 1; i < argc; ++i) {
153
+ std::string arg = argv[i];
154
+ if (arg == "--target_model" && i + 1 < argc) {
155
+ args.target_model = argv[++i];
156
+ } else if (arg == "--imgs" && i + 1 < argc) {
157
+ args.imgs = argv[++i];
158
+ } else if (arg == "--invoke_nums" && i + 1 < argc) {
159
+ args.invoke_nums = std::stoi(argv[++i]);
160
+ } else if (arg == "--model_type" && i + 1 < argc) {
161
+ args.model_type = argv[++i];
162
+ }
163
+ }
164
+ return args;
165
+ }
166
+
167
+ std::string to_lower(const std::string& str) {
168
+ std::string lower_str = str;
169
+ std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
170
+ return std::tolower(c);
171
+ });
172
+ return lower_str;
173
+ }
174
+
175
+ int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
176
+
177
+ int current_coordinate[4] = {0, 0, 0, 0};
178
+ for(int a = 0; a < src_dims[0]; ++a){
179
+ current_coordinate[0] = a;
180
+ for(int b = 0; b < src_dims[1]; ++b){
181
+ current_coordinate[1] = b;
182
+ for(int c = 0; c < src_dims[2]; ++c){
183
+ current_coordinate[2] = c;
184
+ for(int d = 0; d < src_dims[3]; ++d){
185
+ current_coordinate[3] = d;
186
+
187
+ int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
188
+ current_coordinate[1]*src_dims[2]*src_dims[3] +
189
+ current_coordinate[2]*src_dims[3] +
190
+ current_coordinate[3];
191
+
192
+ int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
193
+ current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
194
+ current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
195
+ current_coordinate[tsp_dims[3]];
196
+
197
+ dest[new_index] = src[old_index];
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ return EXIT_SUCCESS;
204
+ }
205
+
206
+ cv::Mat post_process(cv::Mat &frame, float* outdata)
207
+ {
208
+ cv::Mat input_image = frame.clone();
209
+ // Initialize vectors to hold respective outputs while unwrapping detections.
210
+ std::vector<int> class_ids;
211
+ std::vector<float> confidences;
212
+ std::vector<cv::Rect> boxes;
213
+ std::vector<cv::Mat> masks;
214
+ std::vector<float> class_scores;
215
+ cv::RNG rng;
216
+ cv::Mat masked_img;
217
+
218
+ unsigned int src_dims[4] = {1, 64,128,19};
219
+ unsigned int tsp_dims[4] = {0,3,1,2};
220
+ unsigned int stride_data_num = 1*64*128*19;
221
+ float* format_data = new float[stride_data_num];
222
+ transpose(outdata, src_dims, tsp_dims, format_data);
223
+ cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
224
+ std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
225
+
226
+ const int channels = 19;
227
+ int target_h = 64, target_w = 128;
228
+ int src_h = 64, src_w = 128;
229
+ // Step 1: 雙線性插值
230
+ auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
231
+
232
+ // Step 2: Softmax
233
+ softmax(interpolated.data(), target_h, target_w, channels);
234
+
235
+ // Step 3: 獲取預測類別
236
+ auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
237
+
238
+ printf("Processing finished.\n");
239
+ // Step 4: 解碼為 RGB
240
+ std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
241
+ cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
242
+
243
+ // Step 2: 转换颜色通道 (RGB → BGR)
244
+ if (channels == 3) {
245
+ cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
246
+ }
247
+ cv::Mat resized_cubic;
248
+ cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
249
+ return resized_cubic;
250
+ }
251
+
252
+
253
+
254
+ int invoke(const Args& args) {
255
+ std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
256
+ << "Image Path: " << args.imgs << "\n"
257
+ << "Inference Nums: " << args.invoke_nums << "\n"
258
+ << "Model Type: " << args.model_type << "\n";
259
+ Model* model = Model::create_instance(args.target_model);
260
+ if(model == nullptr){
261
+ printf("Create model failed !\n");
262
+ return EXIT_FAILURE;
263
+ }
264
+ Config* config = Config::create_instance();
265
+ if(config == nullptr){
266
+ printf("Create config failed !\n");
267
+ return EXIT_FAILURE;
268
+ }
269
+ config->implement_type = ImplementType::TYPE_LOCAL;
270
+ std::string model_type_lower = to_lower(args.model_type);
271
+ if (model_type_lower == "qnn"){
272
+ config->framework_type = FrameworkType::TYPE_QNN216;
273
+ } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
274
+ config->framework_type = FrameworkType::TYPE_SNPE2;
275
+ }
276
+ config->accelerate_type = AccelerateType::TYPE_DSP;
277
+ config->is_quantify_model = 1;
278
+
279
+ unsigned int model_h = 512;
280
+ unsigned int model_w = 1024;
281
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
282
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
283
+ model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
284
+ std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
285
+ if(fast_interpreter == nullptr){
286
+ printf("build_interpretper_from_model_and_config failed !\n");
287
+ return EXIT_FAILURE;
288
+ }
289
+ int result = fast_interpreter->init();
290
+ if(result != EXIT_SUCCESS){
291
+ printf("interpreter->init() failed !\n");
292
+ return EXIT_FAILURE;
293
+ }
294
+ // load model
295
+ fast_interpreter->load_model();
296
+ if(result != EXIT_SUCCESS){
297
+ printf("interpreter->load_model() failed !\n");
298
+ return EXIT_FAILURE;
299
+ }
300
+ printf("detect model load success!\n");
301
+
302
+ cv::Mat frame = cv::imread(args.imgs);
303
+ if (frame.empty()) {
304
+ printf("detect image load failed!\n");
305
+ return 1;
306
+ }
307
+ printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
308
+ cv::Mat input_data;
309
+ cv::Mat frame_clone = frame.clone();
310
+ cv::Scalar stds_scale(58.395, 57.12, 57.375);
311
+ cv::Scalar means_scale(123.675, 116.28, 103.53);
312
+ cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
313
+ frame_clone.convertTo(input_data, CV_32F);
314
+ cv::subtract(input_data, means_scale, input_data);
315
+ cv::divide(input_data, stds_scale, input_data);
316
+
317
+ float *outdata0 = nullptr;
318
+ std::vector<float> invoke_time;
319
+ for (int i = 0; i < args.invoke_nums; ++i) {
320
+ result = fast_interpreter->set_input_tensor(0, input_data.data);
321
+ if(result != EXIT_SUCCESS){
322
+ printf("interpreter->set_input_tensor() failed !\n");
323
+ return EXIT_FAILURE;
324
+ }
325
+ auto t1 = std::chrono::high_resolution_clock::now();
326
+ result = fast_interpreter->invoke();
327
+ auto t2 = std::chrono::high_resolution_clock::now();
328
+ std::chrono::duration<double> cost_time = t2 - t1;
329
+ invoke_time.push_back(cost_time.count() * 1000);
330
+ if(result != EXIT_SUCCESS){
331
+ printf("interpreter->invoke() failed !\n");
332
+ return EXIT_FAILURE;
333
+ }
334
+ uint32_t out_data_0 = 0;
335
+ result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
336
+ if(result != EXIT_SUCCESS){
337
+ printf("interpreter->get_output_tensor() 1 failed !\n");
338
+ return EXIT_FAILURE;
339
+ }
340
+
341
+ }
342
+
343
+ float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
344
+ float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
345
+ float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
346
+ float var_invoketime = 0.0f;
347
+ for (auto time : invoke_time) {
348
+ var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
349
+ }
350
+ var_invoketime /= args.invoke_nums;
351
+ printf("=======================================\n");
352
+ printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
353
+ args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
354
+ printf("=======================================\n");
355
+
356
+ cv::Mat img = post_process(frame, outdata0);
357
+ cv::imwrite("./results.png", img);
358
+ fast_interpreter->destory();
359
+ return 0;
360
+ }
361
+
362
+
363
+ int main(int argc, char* argv[]) {
364
+ Args args = parse_args(argc, argv);
365
+ return invoke(args);
366
+ }
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67c14ec4ae6d19b5a0670d5c02616e40b1012e1325065b76f6db532c6df53dba
3
+ size 14235648
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import cv2
4
+ import sys
5
+ import time
6
+ import aidlite
7
+ import os
8
+
9
+
10
+ def decode_segmap(image, nc=19):
11
+ label_colors = np.array([(0, 0, 0), # 0=background
12
+ # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
13
+ (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
14
+ # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
15
+ (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
16
+ # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
17
+ (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
18
+ # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
19
+ (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
20
+ r = np.zeros_like(image).astype(np.uint8)
21
+ g = np.zeros_like(image).astype(np.uint8)
22
+ b = np.zeros_like(image).astype(np.uint8)
23
+ for l in range(0, nc):
24
+ idx = image == l
25
+ r[idx] = label_colors[l, 0]
26
+ g[idx] = label_colors[l, 1]
27
+ b[idx] = label_colors[l, 2]
28
+ rgb = np.stack([r, g, b], axis=2)
29
+ return rgb
30
+
31
+
32
+
33
+ class ffnet40sQnn:
34
+ def __init__(self):
35
+ super().__init__()
36
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"))
37
+ if self.model is None:
38
+ print("Create model failed !")
39
+ return
40
+
41
+ self.config = aidlite.Config.create_instance()
42
+ if self.config is None:
43
+ print("build_interpretper_from_model_and_config failed !")
44
+ return
45
+
46
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
47
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
48
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
49
+ # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
50
+ self.config.is_quantify_model = 1
51
+
52
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
53
+ if self.interpreter is None:
54
+ print("build_interpretper_from_model_and_config failed !")
55
+ return
56
+ input_shapes = [[1,512,1024,3]]
57
+ output_shapes = [[1,64,128,19]]
58
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
59
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
60
+
61
+ if self.interpreter is None:
62
+ print("build_interpretper_from_model_and_config failed !")
63
+ result = self.interpreter.init()
64
+ if result != 0:
65
+ print(f"interpreter init failed !")
66
+ result = self.interpreter.load_model()
67
+ if result != 0:
68
+ print("interpreter load model failed !")
69
+
70
+ print(" model load success!")
71
+
72
+ def __call__(self, input):
73
+ self.interpreter.set_input_tensor(0,input)
74
+ invoke_time=[]
75
+ invoke_nums =10
76
+ for i in range(invoke_nums):
77
+ result = self.interpreter.set_input_tensor(0, input.data)
78
+ if result != 0:
79
+ print("interpreter set_input_tensor() failed")
80
+ t1=time.time()
81
+ result = self.interpreter.invoke()
82
+ cost_time = (time.time()-t1)*1000
83
+ invoke_time.append(cost_time)
84
+
85
+ max_invoke_time = max(invoke_time)
86
+ min_invoke_time = min(invoke_time)
87
+ mean_invoke_time = sum(invoke_time)/invoke_nums
88
+ var_invoketime=np.var(invoke_time)
89
+ print("====================================")
90
+ print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
91
+ print("====================================")
92
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
93
+ return features_0
94
+
95
+
96
+
97
+
98
+
99
+ ffnet_segm = ffnet40sQnn()
100
+
101
+ frame_ct=0
102
+ image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
103
+
104
+ image = cv2.imread(image_path)
105
+ image=cv2.resize(image,(1024,512))
106
+ frame = np.ascontiguousarray(image[:,:,::-1])
107
+
108
+ mean_data=[123.675, 116.28, 103.53]
109
+ std_data=[58.395, 57.12, 57.375]
110
+ img_input = (frame-mean_data)/std_data # HWC
111
+ input = img_input.astype(np.float32)
112
+ input = input[np.newaxis, ...]
113
+ input_size = input.shape[1], input.shape[2] #H w
114
+ t0 = time.time()
115
+ out = ffnet_segm(input)
116
+ use_time = round((time.time() - t0) * 1000, 2)
117
+ print(f"pose detction inference_time:{use_time} ms")
118
+ out = np.transpose(out, (0, 3, 1,2))
119
+ out = torch.from_numpy(out)
120
+
121
+ output = torch.nn.functional.interpolate(
122
+ out, size=input_size, mode="bilinear", align_corners=False
123
+ )
124
+ output_data = torch.nn.functional.softmax(output, dim=1).data
125
+ max_probs, predictions = output_data.max(1)
126
+
127
+
128
+ prediction = predictions.numpy().astype(np.uint8)
129
+ test = decode_segmap( prediction[0])
130
+
131
+ cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
132
+ ffnet_segm.interpreter.destory()
133
+