Upload 35 files
Browse files- .gitattributes +10 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/README.md +55 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png +3 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +31 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp +366 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin +3 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/2.png +3 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py +133 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/README.md +55 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png +3 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt +31 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp +366 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet40S_BBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin +3 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png +3 -0
- model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py +133 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/README.md +55 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png +3 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt +31 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp +366 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin +3 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/2.png +3 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py +133 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/README.md +55 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png +3 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +31 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp +366 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin +3 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/2.png +3 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py +133 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/README.md +55 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png +3 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt +31 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp +366 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin +3 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png +3 -0
- model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py +133 -0
.gitattributes
CHANGED
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
|
39 |
+
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
|
41 |
+
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
|
42 |
+
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
|
43 |
+
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
|
44 |
+
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
|
45 |
+
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
|
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
### Source model
|
3 |
+
- Input shape: 1x3x512x1024
|
4 |
+
- Number of parameters:13.911M
|
5 |
+
- Model size:53.56MB,
|
6 |
+
- Output shape: 1x19x64x128
|
7 |
+
|
8 |
+
Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: INT8
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: FV01 QCS6490
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
#### python
|
43 |
+
```bash
|
44 |
+
cd python
|
45 |
+
python3 demo_qnn.py
|
46 |
+
```
|
47 |
+
|
48 |
+
#### cpp
|
49 |
+
```bash
|
50 |
+
cd ffnet40s/model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp
|
51 |
+
mkdir build && cd build
|
52 |
+
cmake ..
|
53 |
+
make
|
54 |
+
./run_test
|
55 |
+
```
|
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required (VERSION 3.5)
|
2 |
+
project("run_test")
|
3 |
+
|
4 |
+
find_package(OpenCV REQUIRED)
|
5 |
+
|
6 |
+
message(STATUS "oPENCV Library status:")
|
7 |
+
message(STATUS ">version:${OpenCV_VERSION}")
|
8 |
+
message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
|
9 |
+
|
10 |
+
set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
|
11 |
+
|
12 |
+
include_directories(
|
13 |
+
/usr/local/include
|
14 |
+
/usr/include/opencv4
|
15 |
+
)
|
16 |
+
|
17 |
+
link_directories(
|
18 |
+
/usr/local/lib/
|
19 |
+
)
|
20 |
+
|
21 |
+
file(GLOB SRC_LISTS
|
22 |
+
${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
|
23 |
+
)
|
24 |
+
|
25 |
+
add_executable(run_test ${SRC_LISTS})
|
26 |
+
|
27 |
+
target_link_libraries(run_test
|
28 |
+
aidlite
|
29 |
+
${OpenCV_LIBS}
|
30 |
+
pthread
|
31 |
+
)
|
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp
ADDED
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <iostream>
|
2 |
+
#include <string>
|
3 |
+
#include <algorithm>
|
4 |
+
#include <cctype>
|
5 |
+
#include <cstring> // 用于 memcpy
|
6 |
+
#include <opencv2/opencv.hpp>
|
7 |
+
#include <aidlux/aidlite/aidlite.hpp>
|
8 |
+
#include <vector>
|
9 |
+
#include <numeric>
|
10 |
+
#include <cmath>
|
11 |
+
#include <array>
|
12 |
+
#include <cstdint>
|
13 |
+
|
14 |
+
using namespace cv;
|
15 |
+
using namespace std;
|
16 |
+
using namespace Aidlux::Aidlite;
|
17 |
+
|
18 |
+
// 定義顏色表 (19個類別)
|
19 |
+
const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
|
20 |
+
{0, 0, 0}, // 0=background
|
21 |
+
{128, 0, 0}, // 1=aeroplane
|
22 |
+
{0, 128, 0}, // 2=bicycle
|
23 |
+
{128, 128, 0}, // 3=bird
|
24 |
+
{0, 0, 128}, // 4=boat
|
25 |
+
{128, 0, 128}, // 5=bottle
|
26 |
+
{0, 128, 128}, // 6=bus
|
27 |
+
{128, 128, 128}, // 7=car
|
28 |
+
{64, 0, 0}, // 8=cat
|
29 |
+
{192, 0, 0}, // 9=chair
|
30 |
+
{64, 128, 0}, // 10=cow
|
31 |
+
{192, 128, 0}, // 11=dining table
|
32 |
+
{64, 0, 128}, // 12=dog
|
33 |
+
{192, 0, 128}, // 13=horse
|
34 |
+
{64, 128, 128}, // 14=motorbike
|
35 |
+
{192, 128, 128}, // 15=person
|
36 |
+
{0, 64, 0}, // 16=potted plant
|
37 |
+
{128, 64, 0}, // 17=sheep
|
38 |
+
{0, 192, 0}, // 18=sofa
|
39 |
+
}};
|
40 |
+
|
41 |
+
// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
|
42 |
+
std::vector<float> bilinear_interpolate(
|
43 |
+
const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
|
44 |
+
|
45 |
+
std::vector<float> output(target_h * target_w * channels, 0.0f);
|
46 |
+
const float scale_h = static_cast<float>(src_h) / target_h;
|
47 |
+
const float scale_w = static_cast<float>(src_w) / target_w;
|
48 |
+
|
49 |
+
for (int y = 0; y < target_h; ++y) {
|
50 |
+
const float y_src = (y + 0.5f) * scale_h - 0.5f;
|
51 |
+
const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
|
52 |
+
const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
|
53 |
+
const float dy = y_src - y0;
|
54 |
+
|
55 |
+
for (int x = 0; x < target_w; ++x) {
|
56 |
+
const float x_src = (x + 0.5f) * scale_w - 0.5f;
|
57 |
+
const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
|
58 |
+
const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
|
59 |
+
const float dx = x_src - x0;
|
60 |
+
|
61 |
+
for (int c = 0; c < channels; ++c) {
|
62 |
+
const int src_idx = c * src_h * src_w;
|
63 |
+
const float val00 = input[src_idx + y0 * src_w + x0];
|
64 |
+
const float val01 = input[src_idx + y0 * src_w + x1];
|
65 |
+
const float val10 = input[src_idx + y1 * src_w + x0];
|
66 |
+
const float val11 = input[src_idx + y1 * src_w + x1];
|
67 |
+
|
68 |
+
const float val = (1 - dy) * (1 - dx) * val00 +
|
69 |
+
(1 - dy) * dx * val01 +
|
70 |
+
dy * (1 - dx) * val10 +
|
71 |
+
dy * dx * val11;
|
72 |
+
|
73 |
+
output[c * target_h * target_w + y * target_w + x] = val;
|
74 |
+
}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
return output;
|
78 |
+
}
|
79 |
+
|
80 |
+
// Softmax 計算 (通道維度)
|
81 |
+
void softmax(float* data, int height, int width, int channels) {
|
82 |
+
for (int y = 0; y < height; ++y) {
|
83 |
+
for (int x = 0; x < width; ++x) {
|
84 |
+
float max_val = -INFINITY;
|
85 |
+
for (int c = 0; c < channels; ++c) {
|
86 |
+
const int idx = c * height * width + y * width + x;
|
87 |
+
max_val = std::max(max_val, data[idx]);
|
88 |
+
}
|
89 |
+
|
90 |
+
float sum_exp = 0.0f;
|
91 |
+
for (int c = 0; c < channels; ++c) {
|
92 |
+
const int idx = c * height * width + y * width + x;
|
93 |
+
sum_exp += std::exp(data[idx] - max_val);
|
94 |
+
}
|
95 |
+
|
96 |
+
for (int c = 0; c < channels; ++c) {
|
97 |
+
const int idx = c * height * width + y * width + x;
|
98 |
+
data[idx] = std::exp(data[idx] - max_val) / sum_exp;
|
99 |
+
}
|
100 |
+
}
|
101 |
+
}
|
102 |
+
}
|
103 |
+
|
104 |
+
// 提取最大類別索引
|
105 |
+
std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
|
106 |
+
std::vector<uint8_t> pred(height * width);
|
107 |
+
for (int y = 0; y < height; ++y) {
|
108 |
+
for (int x = 0; x < width; ++x) {
|
109 |
+
float max_prob = -INFINITY;
|
110 |
+
uint8_t max_idx = 0;
|
111 |
+
for (int c = 0; c < channels; ++c) {
|
112 |
+
const int idx = c * height * width + y * width + x;
|
113 |
+
if (data[idx] > max_prob) {
|
114 |
+
max_prob = data[idx];
|
115 |
+
max_idx = c;
|
116 |
+
}
|
117 |
+
}
|
118 |
+
pred[y * width + x] = max_idx;
|
119 |
+
}
|
120 |
+
}
|
121 |
+
return pred;
|
122 |
+
}
|
123 |
+
|
124 |
+
// 解碼為 RGB 圖像
|
125 |
+
std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
|
126 |
+
std::vector<uint8_t> rgb(height * width * 3);
|
127 |
+
for (int y = 0; y < height; ++y) {
|
128 |
+
for (int x = 0; x < width; ++x) {
|
129 |
+
const int idx = y * width + x;
|
130 |
+
const uint8_t label = pred[idx];
|
131 |
+
if (label < 19) {
|
132 |
+
rgb[idx * 3] = label_colors[label][0];
|
133 |
+
rgb[idx * 3 + 1] = label_colors[label][1];
|
134 |
+
rgb[idx * 3 + 2] = label_colors[label][2];
|
135 |
+
} else {
|
136 |
+
rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
|
137 |
+
}
|
138 |
+
}
|
139 |
+
}
|
140 |
+
return rgb;
|
141 |
+
}
|
142 |
+
|
143 |
+
struct Args {
|
144 |
+
std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin";
|
145 |
+
std::string imgs = "../2.png";
|
146 |
+
int invoke_nums = 10;
|
147 |
+
std::string model_type = "QNN";
|
148 |
+
};
|
149 |
+
|
150 |
+
Args parse_args(int argc, char* argv[]) {
|
151 |
+
Args args;
|
152 |
+
for (int i = 1; i < argc; ++i) {
|
153 |
+
std::string arg = argv[i];
|
154 |
+
if (arg == "--target_model" && i + 1 < argc) {
|
155 |
+
args.target_model = argv[++i];
|
156 |
+
} else if (arg == "--imgs" && i + 1 < argc) {
|
157 |
+
args.imgs = argv[++i];
|
158 |
+
} else if (arg == "--invoke_nums" && i + 1 < argc) {
|
159 |
+
args.invoke_nums = std::stoi(argv[++i]);
|
160 |
+
} else if (arg == "--model_type" && i + 1 < argc) {
|
161 |
+
args.model_type = argv[++i];
|
162 |
+
}
|
163 |
+
}
|
164 |
+
return args;
|
165 |
+
}
|
166 |
+
|
167 |
+
std::string to_lower(const std::string& str) {
|
168 |
+
std::string lower_str = str;
|
169 |
+
std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
|
170 |
+
return std::tolower(c);
|
171 |
+
});
|
172 |
+
return lower_str;
|
173 |
+
}
|
174 |
+
|
175 |
+
int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
|
176 |
+
|
177 |
+
int current_coordinate[4] = {0, 0, 0, 0};
|
178 |
+
for(int a = 0; a < src_dims[0]; ++a){
|
179 |
+
current_coordinate[0] = a;
|
180 |
+
for(int b = 0; b < src_dims[1]; ++b){
|
181 |
+
current_coordinate[1] = b;
|
182 |
+
for(int c = 0; c < src_dims[2]; ++c){
|
183 |
+
current_coordinate[2] = c;
|
184 |
+
for(int d = 0; d < src_dims[3]; ++d){
|
185 |
+
current_coordinate[3] = d;
|
186 |
+
|
187 |
+
int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
|
188 |
+
current_coordinate[1]*src_dims[2]*src_dims[3] +
|
189 |
+
current_coordinate[2]*src_dims[3] +
|
190 |
+
current_coordinate[3];
|
191 |
+
|
192 |
+
int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
193 |
+
current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
194 |
+
current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
195 |
+
current_coordinate[tsp_dims[3]];
|
196 |
+
|
197 |
+
dest[new_index] = src[old_index];
|
198 |
+
}
|
199 |
+
}
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
return EXIT_SUCCESS;
|
204 |
+
}
|
205 |
+
|
206 |
+
cv::Mat post_process(cv::Mat &frame, float* outdata)
|
207 |
+
{
|
208 |
+
cv::Mat input_image = frame.clone();
|
209 |
+
// Initialize vectors to hold respective outputs while unwrapping detections.
|
210 |
+
std::vector<int> class_ids;
|
211 |
+
std::vector<float> confidences;
|
212 |
+
std::vector<cv::Rect> boxes;
|
213 |
+
std::vector<cv::Mat> masks;
|
214 |
+
std::vector<float> class_scores;
|
215 |
+
cv::RNG rng;
|
216 |
+
cv::Mat masked_img;
|
217 |
+
|
218 |
+
unsigned int src_dims[4] = {1, 64,128,19};
|
219 |
+
unsigned int tsp_dims[4] = {0,3,1,2};
|
220 |
+
unsigned int stride_data_num = 1*64*128*19;
|
221 |
+
float* format_data = new float[stride_data_num];
|
222 |
+
transpose(outdata, src_dims, tsp_dims, format_data);
|
223 |
+
cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
|
224 |
+
std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
|
225 |
+
|
226 |
+
const int channels = 19;
|
227 |
+
int target_h = 64, target_w = 128;
|
228 |
+
int src_h = 64, src_w = 128;
|
229 |
+
// Step 1: 雙線性插值
|
230 |
+
auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
|
231 |
+
|
232 |
+
// Step 2: Softmax
|
233 |
+
softmax(interpolated.data(), target_h, target_w, channels);
|
234 |
+
|
235 |
+
// Step 3: 獲取預測類別
|
236 |
+
auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
|
237 |
+
|
238 |
+
printf("Processing finished.\n");
|
239 |
+
// Step 4: 解碼為 RGB
|
240 |
+
std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
|
241 |
+
cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
|
242 |
+
|
243 |
+
// Step 2: 转换颜色通道 (RGB → BGR)
|
244 |
+
if (channels == 3) {
|
245 |
+
cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
|
246 |
+
}
|
247 |
+
cv::Mat resized_cubic;
|
248 |
+
cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
|
249 |
+
return resized_cubic;
|
250 |
+
}
|
251 |
+
|
252 |
+
|
253 |
+
|
254 |
+
int invoke(const Args& args) {
|
255 |
+
std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
|
256 |
+
<< "Image Path: " << args.imgs << "\n"
|
257 |
+
<< "Inference Nums: " << args.invoke_nums << "\n"
|
258 |
+
<< "Model Type: " << args.model_type << "\n";
|
259 |
+
Model* model = Model::create_instance(args.target_model);
|
260 |
+
if(model == nullptr){
|
261 |
+
printf("Create model failed !\n");
|
262 |
+
return EXIT_FAILURE;
|
263 |
+
}
|
264 |
+
Config* config = Config::create_instance();
|
265 |
+
if(config == nullptr){
|
266 |
+
printf("Create config failed !\n");
|
267 |
+
return EXIT_FAILURE;
|
268 |
+
}
|
269 |
+
config->implement_type = ImplementType::TYPE_LOCAL;
|
270 |
+
std::string model_type_lower = to_lower(args.model_type);
|
271 |
+
if (model_type_lower == "qnn"){
|
272 |
+
config->framework_type = FrameworkType::TYPE_QNN216;
|
273 |
+
} else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
|
274 |
+
config->framework_type = FrameworkType::TYPE_SNPE2;
|
275 |
+
}
|
276 |
+
config->accelerate_type = AccelerateType::TYPE_DSP;
|
277 |
+
config->is_quantify_model = 1;
|
278 |
+
|
279 |
+
unsigned int model_h = 512;
|
280 |
+
unsigned int model_w = 1024;
|
281 |
+
std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
|
282 |
+
std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
|
283 |
+
model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
|
284 |
+
std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
|
285 |
+
if(fast_interpreter == nullptr){
|
286 |
+
printf("build_interpretper_from_model_and_config failed !\n");
|
287 |
+
return EXIT_FAILURE;
|
288 |
+
}
|
289 |
+
int result = fast_interpreter->init();
|
290 |
+
if(result != EXIT_SUCCESS){
|
291 |
+
printf("interpreter->init() failed !\n");
|
292 |
+
return EXIT_FAILURE;
|
293 |
+
}
|
294 |
+
// load model
|
295 |
+
fast_interpreter->load_model();
|
296 |
+
if(result != EXIT_SUCCESS){
|
297 |
+
printf("interpreter->load_model() failed !\n");
|
298 |
+
return EXIT_FAILURE;
|
299 |
+
}
|
300 |
+
printf("detect model load success!\n");
|
301 |
+
|
302 |
+
cv::Mat frame = cv::imread(args.imgs);
|
303 |
+
if (frame.empty()) {
|
304 |
+
printf("detect image load failed!\n");
|
305 |
+
return 1;
|
306 |
+
}
|
307 |
+
printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
|
308 |
+
cv::Mat input_data;
|
309 |
+
cv::Mat frame_clone = frame.clone();
|
310 |
+
cv::Scalar stds_scale(58.395, 57.12, 57.375);
|
311 |
+
cv::Scalar means_scale(123.675, 116.28, 103.53);
|
312 |
+
cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
|
313 |
+
frame_clone.convertTo(input_data, CV_32F);
|
314 |
+
cv::subtract(input_data, means_scale, input_data);
|
315 |
+
cv::divide(input_data, stds_scale, input_data);
|
316 |
+
|
317 |
+
float *outdata0 = nullptr;
|
318 |
+
std::vector<float> invoke_time;
|
319 |
+
for (int i = 0; i < args.invoke_nums; ++i) {
|
320 |
+
result = fast_interpreter->set_input_tensor(0, input_data.data);
|
321 |
+
if(result != EXIT_SUCCESS){
|
322 |
+
printf("interpreter->set_input_tensor() failed !\n");
|
323 |
+
return EXIT_FAILURE;
|
324 |
+
}
|
325 |
+
auto t1 = std::chrono::high_resolution_clock::now();
|
326 |
+
result = fast_interpreter->invoke();
|
327 |
+
auto t2 = std::chrono::high_resolution_clock::now();
|
328 |
+
std::chrono::duration<double> cost_time = t2 - t1;
|
329 |
+
invoke_time.push_back(cost_time.count() * 1000);
|
330 |
+
if(result != EXIT_SUCCESS){
|
331 |
+
printf("interpreter->invoke() failed !\n");
|
332 |
+
return EXIT_FAILURE;
|
333 |
+
}
|
334 |
+
uint32_t out_data_0 = 0;
|
335 |
+
result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
|
336 |
+
if(result != EXIT_SUCCESS){
|
337 |
+
printf("interpreter->get_output_tensor() 1 failed !\n");
|
338 |
+
return EXIT_FAILURE;
|
339 |
+
}
|
340 |
+
|
341 |
+
}
|
342 |
+
|
343 |
+
float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
|
344 |
+
float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
|
345 |
+
float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
|
346 |
+
float var_invoketime = 0.0f;
|
347 |
+
for (auto time : invoke_time) {
|
348 |
+
var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
|
349 |
+
}
|
350 |
+
var_invoketime /= args.invoke_nums;
|
351 |
+
printf("=======================================\n");
|
352 |
+
printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
|
353 |
+
args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
|
354 |
+
printf("=======================================\n");
|
355 |
+
|
356 |
+
cv::Mat img = post_process(frame, outdata0);
|
357 |
+
cv::imwrite("./results.png", img);
|
358 |
+
fast_interpreter->destory();
|
359 |
+
return 0;
|
360 |
+
}
|
361 |
+
|
362 |
+
|
363 |
+
int main(int argc, char* argv[]) {
|
364 |
+
Args args = parse_args(argc, argv);
|
365 |
+
return invoke(args);
|
366 |
+
}
|
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:047345e0f0511d5df1e64a01f2f420336de2e6863e688509dfb512aac707b2e0
|
3 |
+
size 14178304
|
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import cv2
|
4 |
+
import sys
|
5 |
+
import time
|
6 |
+
import aidlite
|
7 |
+
import os
|
8 |
+
|
9 |
+
|
10 |
+
def decode_segmap(image, nc=19):
|
11 |
+
label_colors = np.array([(0, 0, 0), # 0=background
|
12 |
+
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
|
13 |
+
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
|
14 |
+
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
|
15 |
+
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
|
16 |
+
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
|
17 |
+
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
|
18 |
+
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
|
19 |
+
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
|
20 |
+
r = np.zeros_like(image).astype(np.uint8)
|
21 |
+
g = np.zeros_like(image).astype(np.uint8)
|
22 |
+
b = np.zeros_like(image).astype(np.uint8)
|
23 |
+
for l in range(0, nc):
|
24 |
+
idx = image == l
|
25 |
+
r[idx] = label_colors[l, 0]
|
26 |
+
g[idx] = label_colors[l, 1]
|
27 |
+
b[idx] = label_colors[l, 2]
|
28 |
+
rgb = np.stack([r, g, b], axis=2)
|
29 |
+
return rgb
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
class ffnet40sQnn:
|
34 |
+
def __init__(self):
|
35 |
+
super().__init__()
|
36 |
+
self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"))
|
37 |
+
if self.model is None:
|
38 |
+
print("Create model failed !")
|
39 |
+
return
|
40 |
+
|
41 |
+
self.config = aidlite.Config.create_instance()
|
42 |
+
if self.config is None:
|
43 |
+
print("build_interpretper_from_model_and_config failed !")
|
44 |
+
return
|
45 |
+
|
46 |
+
self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
|
47 |
+
self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
|
48 |
+
self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
|
49 |
+
# self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
|
50 |
+
self.config.is_quantify_model = 1
|
51 |
+
|
52 |
+
self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
|
53 |
+
if self.interpreter is None:
|
54 |
+
print("build_interpretper_from_model_and_config failed !")
|
55 |
+
return
|
56 |
+
input_shapes = [[1,512,1024,3]]
|
57 |
+
output_shapes = [[1,64,128,19]]
|
58 |
+
self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
|
59 |
+
output_shapes, aidlite.DataType.TYPE_FLOAT32)
|
60 |
+
|
61 |
+
if self.interpreter is None:
|
62 |
+
print("build_interpretper_from_model_and_config failed !")
|
63 |
+
result = self.interpreter.init()
|
64 |
+
if result != 0:
|
65 |
+
print(f"interpreter init failed !")
|
66 |
+
result = self.interpreter.load_model()
|
67 |
+
if result != 0:
|
68 |
+
print("interpreter load model failed !")
|
69 |
+
|
70 |
+
print(" model load success!")
|
71 |
+
|
72 |
+
def __call__(self, input):
|
73 |
+
self.interpreter.set_input_tensor(0,input)
|
74 |
+
invoke_time=[]
|
75 |
+
invoke_nums =10
|
76 |
+
for i in range(invoke_nums):
|
77 |
+
result = self.interpreter.set_input_tensor(0, input.data)
|
78 |
+
if result != 0:
|
79 |
+
print("interpreter set_input_tensor() failed")
|
80 |
+
t1=time.time()
|
81 |
+
result = self.interpreter.invoke()
|
82 |
+
cost_time = (time.time()-t1)*1000
|
83 |
+
invoke_time.append(cost_time)
|
84 |
+
|
85 |
+
max_invoke_time = max(invoke_time)
|
86 |
+
min_invoke_time = min(invoke_time)
|
87 |
+
mean_invoke_time = sum(invoke_time)/invoke_nums
|
88 |
+
var_invoketime=np.var(invoke_time)
|
89 |
+
print("====================================")
|
90 |
+
print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
|
91 |
+
print("====================================")
|
92 |
+
features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
|
93 |
+
return features_0
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
ffnet_segm = ffnet40sQnn()
|
100 |
+
|
101 |
+
frame_ct=0
|
102 |
+
image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
|
103 |
+
|
104 |
+
image = cv2.imread(image_path)
|
105 |
+
image=cv2.resize(image,(1024,512))
|
106 |
+
frame = np.ascontiguousarray(image[:,:,::-1])
|
107 |
+
|
108 |
+
mean_data=[123.675, 116.28, 103.53]
|
109 |
+
std_data=[58.395, 57.12, 57.375]
|
110 |
+
img_input = (frame-mean_data)/std_data # HWC
|
111 |
+
input = img_input.astype(np.float32)
|
112 |
+
input = input[np.newaxis, ...]
|
113 |
+
input_size = input.shape[1], input.shape[2] #H w
|
114 |
+
t0 = time.time()
|
115 |
+
out = ffnet_segm(input)
|
116 |
+
use_time = round((time.time() - t0) * 1000, 2)
|
117 |
+
print(f"pose detction inference_time:{use_time} ms")
|
118 |
+
out = np.transpose(out, (0, 3, 1,2))
|
119 |
+
out = torch.from_numpy(out)
|
120 |
+
|
121 |
+
output = torch.nn.functional.interpolate(
|
122 |
+
out, size=input_size, mode="bilinear", align_corners=False
|
123 |
+
)
|
124 |
+
output_data = torch.nn.functional.softmax(output, dim=1).data
|
125 |
+
max_probs, predictions = output_data.max(1)
|
126 |
+
|
127 |
+
|
128 |
+
prediction = predictions.numpy().astype(np.uint8)
|
129 |
+
test = decode_segmap( prediction[0])
|
130 |
+
|
131 |
+
cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
|
132 |
+
ffnet_segm.interpreter.destory()
|
133 |
+
|
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
### Source model
|
3 |
+
- Input shape: 1x3x512x1024
|
4 |
+
- Number of parameters:13.911M
|
5 |
+
- Model size:53.56MB,
|
6 |
+
- Output shape: 1x19x64x128
|
7 |
+
|
8 |
+
Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: W8A16
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: FV01 QCS6490
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
#### python
|
43 |
+
```bash
|
44 |
+
cd python
|
45 |
+
python3 demo_qnn.py
|
46 |
+
```
|
47 |
+
|
48 |
+
#### cpp
|
49 |
+
```bash
|
50 |
+
cd ffnet40s/model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp
|
51 |
+
mkdir build && cd build
|
52 |
+
cmake ..
|
53 |
+
make
|
54 |
+
./run_test
|
55 |
+
```
|
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required (VERSION 3.5)
|
2 |
+
project("run_test")
|
3 |
+
|
4 |
+
find_package(OpenCV REQUIRED)
|
5 |
+
|
6 |
+
message(STATUS "oPENCV Library status:")
|
7 |
+
message(STATUS ">version:${OpenCV_VERSION}")
|
8 |
+
message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
|
9 |
+
|
10 |
+
set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
|
11 |
+
|
12 |
+
include_directories(
|
13 |
+
/usr/local/include
|
14 |
+
/usr/include/opencv4
|
15 |
+
)
|
16 |
+
|
17 |
+
link_directories(
|
18 |
+
/usr/local/lib/
|
19 |
+
)
|
20 |
+
|
21 |
+
file(GLOB SRC_LISTS
|
22 |
+
${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
|
23 |
+
)
|
24 |
+
|
25 |
+
add_executable(run_test ${SRC_LISTS})
|
26 |
+
|
27 |
+
target_link_libraries(run_test
|
28 |
+
aidlite
|
29 |
+
${OpenCV_LIBS}
|
30 |
+
pthread
|
31 |
+
)
|
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp
ADDED
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <iostream>
|
2 |
+
#include <string>
|
3 |
+
#include <algorithm>
|
4 |
+
#include <cctype>
|
5 |
+
#include <cstring> // 用于 memcpy
|
6 |
+
#include <opencv2/opencv.hpp>
|
7 |
+
#include <aidlux/aidlite/aidlite.hpp>
|
8 |
+
#include <vector>
|
9 |
+
#include <numeric>
|
10 |
+
#include <cmath>
|
11 |
+
#include <array>
|
12 |
+
#include <cstdint>
|
13 |
+
|
14 |
+
using namespace cv;
|
15 |
+
using namespace std;
|
16 |
+
using namespace Aidlux::Aidlite;
|
17 |
+
|
18 |
+
// 定義顏色表 (19個類別)
|
19 |
+
const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
|
20 |
+
{0, 0, 0}, // 0=background
|
21 |
+
{128, 0, 0}, // 1=aeroplane
|
22 |
+
{0, 128, 0}, // 2=bicycle
|
23 |
+
{128, 128, 0}, // 3=bird
|
24 |
+
{0, 0, 128}, // 4=boat
|
25 |
+
{128, 0, 128}, // 5=bottle
|
26 |
+
{0, 128, 128}, // 6=bus
|
27 |
+
{128, 128, 128}, // 7=car
|
28 |
+
{64, 0, 0}, // 8=cat
|
29 |
+
{192, 0, 0}, // 9=chair
|
30 |
+
{64, 128, 0}, // 10=cow
|
31 |
+
{192, 128, 0}, // 11=dining table
|
32 |
+
{64, 0, 128}, // 12=dog
|
33 |
+
{192, 0, 128}, // 13=horse
|
34 |
+
{64, 128, 128}, // 14=motorbike
|
35 |
+
{192, 128, 128}, // 15=person
|
36 |
+
{0, 64, 0}, // 16=potted plant
|
37 |
+
{128, 64, 0}, // 17=sheep
|
38 |
+
{0, 192, 0}, // 18=sofa
|
39 |
+
}};
|
40 |
+
|
41 |
+
// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
|
42 |
+
std::vector<float> bilinear_interpolate(
|
43 |
+
const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
|
44 |
+
|
45 |
+
std::vector<float> output(target_h * target_w * channels, 0.0f);
|
46 |
+
const float scale_h = static_cast<float>(src_h) / target_h;
|
47 |
+
const float scale_w = static_cast<float>(src_w) / target_w;
|
48 |
+
|
49 |
+
for (int y = 0; y < target_h; ++y) {
|
50 |
+
const float y_src = (y + 0.5f) * scale_h - 0.5f;
|
51 |
+
const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
|
52 |
+
const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
|
53 |
+
const float dy = y_src - y0;
|
54 |
+
|
55 |
+
for (int x = 0; x < target_w; ++x) {
|
56 |
+
const float x_src = (x + 0.5f) * scale_w - 0.5f;
|
57 |
+
const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
|
58 |
+
const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
|
59 |
+
const float dx = x_src - x0;
|
60 |
+
|
61 |
+
for (int c = 0; c < channels; ++c) {
|
62 |
+
const int src_idx = c * src_h * src_w;
|
63 |
+
const float val00 = input[src_idx + y0 * src_w + x0];
|
64 |
+
const float val01 = input[src_idx + y0 * src_w + x1];
|
65 |
+
const float val10 = input[src_idx + y1 * src_w + x0];
|
66 |
+
const float val11 = input[src_idx + y1 * src_w + x1];
|
67 |
+
|
68 |
+
const float val = (1 - dy) * (1 - dx) * val00 +
|
69 |
+
(1 - dy) * dx * val01 +
|
70 |
+
dy * (1 - dx) * val10 +
|
71 |
+
dy * dx * val11;
|
72 |
+
|
73 |
+
output[c * target_h * target_w + y * target_w + x] = val;
|
74 |
+
}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
return output;
|
78 |
+
}
|
79 |
+
|
80 |
+
// Softmax 計算 (通道維度)
|
81 |
+
void softmax(float* data, int height, int width, int channels) {
|
82 |
+
for (int y = 0; y < height; ++y) {
|
83 |
+
for (int x = 0; x < width; ++x) {
|
84 |
+
float max_val = -INFINITY;
|
85 |
+
for (int c = 0; c < channels; ++c) {
|
86 |
+
const int idx = c * height * width + y * width + x;
|
87 |
+
max_val = std::max(max_val, data[idx]);
|
88 |
+
}
|
89 |
+
|
90 |
+
float sum_exp = 0.0f;
|
91 |
+
for (int c = 0; c < channels; ++c) {
|
92 |
+
const int idx = c * height * width + y * width + x;
|
93 |
+
sum_exp += std::exp(data[idx] - max_val);
|
94 |
+
}
|
95 |
+
|
96 |
+
for (int c = 0; c < channels; ++c) {
|
97 |
+
const int idx = c * height * width + y * width + x;
|
98 |
+
data[idx] = std::exp(data[idx] - max_val) / sum_exp;
|
99 |
+
}
|
100 |
+
}
|
101 |
+
}
|
102 |
+
}
|
103 |
+
|
104 |
+
// 提取最大類別索引
|
105 |
+
std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
|
106 |
+
std::vector<uint8_t> pred(height * width);
|
107 |
+
for (int y = 0; y < height; ++y) {
|
108 |
+
for (int x = 0; x < width; ++x) {
|
109 |
+
float max_prob = -INFINITY;
|
110 |
+
uint8_t max_idx = 0;
|
111 |
+
for (int c = 0; c < channels; ++c) {
|
112 |
+
const int idx = c * height * width + y * width + x;
|
113 |
+
if (data[idx] > max_prob) {
|
114 |
+
max_prob = data[idx];
|
115 |
+
max_idx = c;
|
116 |
+
}
|
117 |
+
}
|
118 |
+
pred[y * width + x] = max_idx;
|
119 |
+
}
|
120 |
+
}
|
121 |
+
return pred;
|
122 |
+
}
|
123 |
+
|
124 |
+
// 解碼為 RGB 圖像
|
125 |
+
std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
|
126 |
+
std::vector<uint8_t> rgb(height * width * 3);
|
127 |
+
for (int y = 0; y < height; ++y) {
|
128 |
+
for (int x = 0; x < width; ++x) {
|
129 |
+
const int idx = y * width + x;
|
130 |
+
const uint8_t label = pred[idx];
|
131 |
+
if (label < 19) {
|
132 |
+
rgb[idx * 3] = label_colors[label][0];
|
133 |
+
rgb[idx * 3 + 1] = label_colors[label][1];
|
134 |
+
rgb[idx * 3 + 2] = label_colors[label][2];
|
135 |
+
} else {
|
136 |
+
rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
|
137 |
+
}
|
138 |
+
}
|
139 |
+
}
|
140 |
+
return rgb;
|
141 |
+
}
|
142 |
+
|
143 |
+
struct Args {
|
144 |
+
std::string target_model = "../../models/ffnet40S_BBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin";
|
145 |
+
std::string imgs = "../2.png";
|
146 |
+
int invoke_nums = 10;
|
147 |
+
std::string model_type = "QNN";
|
148 |
+
};
|
149 |
+
|
150 |
+
Args parse_args(int argc, char* argv[]) {
|
151 |
+
Args args;
|
152 |
+
for (int i = 1; i < argc; ++i) {
|
153 |
+
std::string arg = argv[i];
|
154 |
+
if (arg == "--target_model" && i + 1 < argc) {
|
155 |
+
args.target_model = argv[++i];
|
156 |
+
} else if (arg == "--imgs" && i + 1 < argc) {
|
157 |
+
args.imgs = argv[++i];
|
158 |
+
} else if (arg == "--invoke_nums" && i + 1 < argc) {
|
159 |
+
args.invoke_nums = std::stoi(argv[++i]);
|
160 |
+
} else if (arg == "--model_type" && i + 1 < argc) {
|
161 |
+
args.model_type = argv[++i];
|
162 |
+
}
|
163 |
+
}
|
164 |
+
return args;
|
165 |
+
}
|
166 |
+
|
167 |
+
std::string to_lower(const std::string& str) {
|
168 |
+
std::string lower_str = str;
|
169 |
+
std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
|
170 |
+
return std::tolower(c);
|
171 |
+
});
|
172 |
+
return lower_str;
|
173 |
+
}
|
174 |
+
|
175 |
+
int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
|
176 |
+
|
177 |
+
int current_coordinate[4] = {0, 0, 0, 0};
|
178 |
+
for(int a = 0; a < src_dims[0]; ++a){
|
179 |
+
current_coordinate[0] = a;
|
180 |
+
for(int b = 0; b < src_dims[1]; ++b){
|
181 |
+
current_coordinate[1] = b;
|
182 |
+
for(int c = 0; c < src_dims[2]; ++c){
|
183 |
+
current_coordinate[2] = c;
|
184 |
+
for(int d = 0; d < src_dims[3]; ++d){
|
185 |
+
current_coordinate[3] = d;
|
186 |
+
|
187 |
+
int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
|
188 |
+
current_coordinate[1]*src_dims[2]*src_dims[3] +
|
189 |
+
current_coordinate[2]*src_dims[3] +
|
190 |
+
current_coordinate[3];
|
191 |
+
|
192 |
+
int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
193 |
+
current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
194 |
+
current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
195 |
+
current_coordinate[tsp_dims[3]];
|
196 |
+
|
197 |
+
dest[new_index] = src[old_index];
|
198 |
+
}
|
199 |
+
}
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
return EXIT_SUCCESS;
|
204 |
+
}
|
205 |
+
|
206 |
+
cv::Mat post_process(cv::Mat &frame, float* outdata)
|
207 |
+
{
|
208 |
+
cv::Mat input_image = frame.clone();
|
209 |
+
// Initialize vectors to hold respective outputs while unwrapping detections.
|
210 |
+
std::vector<int> class_ids;
|
211 |
+
std::vector<float> confidences;
|
212 |
+
std::vector<cv::Rect> boxes;
|
213 |
+
std::vector<cv::Mat> masks;
|
214 |
+
std::vector<float> class_scores;
|
215 |
+
cv::RNG rng;
|
216 |
+
cv::Mat masked_img;
|
217 |
+
|
218 |
+
unsigned int src_dims[4] = {1, 64,128,19};
|
219 |
+
unsigned int tsp_dims[4] = {0,3,1,2};
|
220 |
+
unsigned int stride_data_num = 1*64*128*19;
|
221 |
+
float* format_data = new float[stride_data_num];
|
222 |
+
transpose(outdata, src_dims, tsp_dims, format_data);
|
223 |
+
cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
|
224 |
+
std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
|
225 |
+
|
226 |
+
const int channels = 19;
|
227 |
+
int target_h = 64, target_w = 128;
|
228 |
+
int src_h = 64, src_w = 128;
|
229 |
+
// Step 1: 雙線性插值
|
230 |
+
auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
|
231 |
+
|
232 |
+
// Step 2: Softmax
|
233 |
+
softmax(interpolated.data(), target_h, target_w, channels);
|
234 |
+
|
235 |
+
// Step 3: 獲取預測類別
|
236 |
+
auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
|
237 |
+
|
238 |
+
printf("Processing finished.\n");
|
239 |
+
// Step 4: 解碼為 RGB
|
240 |
+
std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
|
241 |
+
cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
|
242 |
+
|
243 |
+
// Step 2: 转换颜色通道 (RGB → BGR)
|
244 |
+
if (channels == 3) {
|
245 |
+
cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
|
246 |
+
}
|
247 |
+
cv::Mat resized_cubic;
|
248 |
+
cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
|
249 |
+
return resized_cubic;
|
250 |
+
}
|
251 |
+
|
252 |
+
|
253 |
+
|
254 |
+
int invoke(const Args& args) {
|
255 |
+
std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
|
256 |
+
<< "Image Path: " << args.imgs << "\n"
|
257 |
+
<< "Inference Nums: " << args.invoke_nums << "\n"
|
258 |
+
<< "Model Type: " << args.model_type << "\n";
|
259 |
+
Model* model = Model::create_instance(args.target_model);
|
260 |
+
if(model == nullptr){
|
261 |
+
printf("Create model failed !\n");
|
262 |
+
return EXIT_FAILURE;
|
263 |
+
}
|
264 |
+
Config* config = Config::create_instance();
|
265 |
+
if(config == nullptr){
|
266 |
+
printf("Create config failed !\n");
|
267 |
+
return EXIT_FAILURE;
|
268 |
+
}
|
269 |
+
config->implement_type = ImplementType::TYPE_LOCAL;
|
270 |
+
std::string model_type_lower = to_lower(args.model_type);
|
271 |
+
if (model_type_lower == "qnn"){
|
272 |
+
config->framework_type = FrameworkType::TYPE_QNN216;
|
273 |
+
} else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
|
274 |
+
config->framework_type = FrameworkType::TYPE_SNPE2;
|
275 |
+
}
|
276 |
+
config->accelerate_type = AccelerateType::TYPE_DSP;
|
277 |
+
config->is_quantify_model = 1;
|
278 |
+
|
279 |
+
unsigned int model_h = 512;
|
280 |
+
unsigned int model_w = 1024;
|
281 |
+
std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
|
282 |
+
std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
|
283 |
+
model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
|
284 |
+
std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
|
285 |
+
if(fast_interpreter == nullptr){
|
286 |
+
printf("build_interpretper_from_model_and_config failed !\n");
|
287 |
+
return EXIT_FAILURE;
|
288 |
+
}
|
289 |
+
int result = fast_interpreter->init();
|
290 |
+
if(result != EXIT_SUCCESS){
|
291 |
+
printf("interpreter->init() failed !\n");
|
292 |
+
return EXIT_FAILURE;
|
293 |
+
}
|
294 |
+
// load model
|
295 |
+
fast_interpreter->load_model();
|
296 |
+
if(result != EXIT_SUCCESS){
|
297 |
+
printf("interpreter->load_model() failed !\n");
|
298 |
+
return EXIT_FAILURE;
|
299 |
+
}
|
300 |
+
printf("detect model load success!\n");
|
301 |
+
|
302 |
+
cv::Mat frame = cv::imread(args.imgs);
|
303 |
+
if (frame.empty()) {
|
304 |
+
printf("detect image load failed!\n");
|
305 |
+
return 1;
|
306 |
+
}
|
307 |
+
printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
|
308 |
+
cv::Mat input_data;
|
309 |
+
cv::Mat frame_clone = frame.clone();
|
310 |
+
cv::Scalar stds_scale(58.395, 57.12, 57.375);
|
311 |
+
cv::Scalar means_scale(123.675, 116.28, 103.53);
|
312 |
+
cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
|
313 |
+
frame_clone.convertTo(input_data, CV_32F);
|
314 |
+
cv::subtract(input_data, means_scale, input_data);
|
315 |
+
cv::divide(input_data, stds_scale, input_data);
|
316 |
+
|
317 |
+
float *outdata0 = nullptr;
|
318 |
+
std::vector<float> invoke_time;
|
319 |
+
for (int i = 0; i < args.invoke_nums; ++i) {
|
320 |
+
result = fast_interpreter->set_input_tensor(0, input_data.data);
|
321 |
+
if(result != EXIT_SUCCESS){
|
322 |
+
printf("interpreter->set_input_tensor() failed !\n");
|
323 |
+
return EXIT_FAILURE;
|
324 |
+
}
|
325 |
+
auto t1 = std::chrono::high_resolution_clock::now();
|
326 |
+
result = fast_interpreter->invoke();
|
327 |
+
auto t2 = std::chrono::high_resolution_clock::now();
|
328 |
+
std::chrono::duration<double> cost_time = t2 - t1;
|
329 |
+
invoke_time.push_back(cost_time.count() * 1000);
|
330 |
+
if(result != EXIT_SUCCESS){
|
331 |
+
printf("interpreter->invoke() failed !\n");
|
332 |
+
return EXIT_FAILURE;
|
333 |
+
}
|
334 |
+
uint32_t out_data_0 = 0;
|
335 |
+
result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
|
336 |
+
if(result != EXIT_SUCCESS){
|
337 |
+
printf("interpreter->get_output_tensor() 1 failed !\n");
|
338 |
+
return EXIT_FAILURE;
|
339 |
+
}
|
340 |
+
|
341 |
+
}
|
342 |
+
|
343 |
+
float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
|
344 |
+
float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
|
345 |
+
float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
|
346 |
+
float var_invoketime = 0.0f;
|
347 |
+
for (auto time : invoke_time) {
|
348 |
+
var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
|
349 |
+
}
|
350 |
+
var_invoketime /= args.invoke_nums;
|
351 |
+
printf("=======================================\n");
|
352 |
+
printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
|
353 |
+
args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
|
354 |
+
printf("=======================================\n");
|
355 |
+
|
356 |
+
cv::Mat img = post_process(frame, outdata0);
|
357 |
+
cv::imwrite("./results.png", img);
|
358 |
+
fast_interpreter->destory();
|
359 |
+
return 0;
|
360 |
+
}
|
361 |
+
|
362 |
+
|
363 |
+
int main(int argc, char* argv[]) {
|
364 |
+
Args args = parse_args(argc, argv);
|
365 |
+
return invoke(args);
|
366 |
+
}
|
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet40S_BBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49d2e1e62c7c9dec18911ab610f56725ed953d35e5fa2bb3df52d9dce6f8f25a
|
3 |
+
size 14403648
|
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import cv2
|
4 |
+
import sys
|
5 |
+
import time
|
6 |
+
import aidlite
|
7 |
+
import os
|
8 |
+
|
9 |
+
|
10 |
+
def decode_segmap(image, nc=19):
|
11 |
+
label_colors = np.array([(0, 0, 0), # 0=background
|
12 |
+
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
|
13 |
+
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
|
14 |
+
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
|
15 |
+
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
|
16 |
+
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
|
17 |
+
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
|
18 |
+
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
|
19 |
+
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
|
20 |
+
r = np.zeros_like(image).astype(np.uint8)
|
21 |
+
g = np.zeros_like(image).astype(np.uint8)
|
22 |
+
b = np.zeros_like(image).astype(np.uint8)
|
23 |
+
for l in range(0, nc):
|
24 |
+
idx = image == l
|
25 |
+
r[idx] = label_colors[l, 0]
|
26 |
+
g[idx] = label_colors[l, 1]
|
27 |
+
b[idx] = label_colors[l, 2]
|
28 |
+
rgb = np.stack([r, g, b], axis=2)
|
29 |
+
return rgb
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
class ffnet40sQnn:
|
34 |
+
def __init__(self):
|
35 |
+
super().__init__()
|
36 |
+
self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"))
|
37 |
+
if self.model is None:
|
38 |
+
print("Create model failed !")
|
39 |
+
return
|
40 |
+
|
41 |
+
self.config = aidlite.Config.create_instance()
|
42 |
+
if self.config is None:
|
43 |
+
print("build_interpretper_from_model_and_config failed !")
|
44 |
+
return
|
45 |
+
|
46 |
+
self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
|
47 |
+
self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
|
48 |
+
self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
|
49 |
+
# self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
|
50 |
+
self.config.is_quantify_model = 1
|
51 |
+
|
52 |
+
self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
|
53 |
+
if self.interpreter is None:
|
54 |
+
print("build_interpretper_from_model_and_config failed !")
|
55 |
+
return
|
56 |
+
input_shapes = [[1,512,1024,3]]
|
57 |
+
output_shapes = [[1,64,128,19]]
|
58 |
+
self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
|
59 |
+
output_shapes, aidlite.DataType.TYPE_FLOAT32)
|
60 |
+
|
61 |
+
if self.interpreter is None:
|
62 |
+
print("build_interpretper_from_model_and_config failed !")
|
63 |
+
result = self.interpreter.init()
|
64 |
+
if result != 0:
|
65 |
+
print(f"interpreter init failed !")
|
66 |
+
result = self.interpreter.load_model()
|
67 |
+
if result != 0:
|
68 |
+
print("interpreter load model failed !")
|
69 |
+
|
70 |
+
print(" model load success!")
|
71 |
+
|
72 |
+
def __call__(self, input):
|
73 |
+
self.interpreter.set_input_tensor(0,input)
|
74 |
+
invoke_time=[]
|
75 |
+
invoke_nums =10
|
76 |
+
for i in range(invoke_nums):
|
77 |
+
result = self.interpreter.set_input_tensor(0, input.data)
|
78 |
+
if result != 0:
|
79 |
+
print("interpreter set_input_tensor() failed")
|
80 |
+
t1=time.time()
|
81 |
+
result = self.interpreter.invoke()
|
82 |
+
cost_time = (time.time()-t1)*1000
|
83 |
+
invoke_time.append(cost_time)
|
84 |
+
|
85 |
+
max_invoke_time = max(invoke_time)
|
86 |
+
min_invoke_time = min(invoke_time)
|
87 |
+
mean_invoke_time = sum(invoke_time)/invoke_nums
|
88 |
+
var_invoketime=np.var(invoke_time)
|
89 |
+
print("====================================")
|
90 |
+
print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
|
91 |
+
print("====================================")
|
92 |
+
features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
|
93 |
+
return features_0
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
ffnet_segm = ffnet40sQnn()
|
100 |
+
|
101 |
+
frame_ct=0
|
102 |
+
image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
|
103 |
+
|
104 |
+
image = cv2.imread(image_path)
|
105 |
+
image=cv2.resize(image,(1024,512))
|
106 |
+
frame = np.ascontiguousarray(image[:,:,::-1])
|
107 |
+
|
108 |
+
mean_data=[123.675, 116.28, 103.53]
|
109 |
+
std_data=[58.395, 57.12, 57.375]
|
110 |
+
img_input = (frame-mean_data)/std_data # HWC
|
111 |
+
input = img_input.astype(np.float32)
|
112 |
+
input = input[np.newaxis, ...]
|
113 |
+
input_size = input.shape[1], input.shape[2] #H w
|
114 |
+
t0 = time.time()
|
115 |
+
out = ffnet_segm(input)
|
116 |
+
use_time = round((time.time() - t0) * 1000, 2)
|
117 |
+
print(f"pose detction inference_time:{use_time} ms")
|
118 |
+
out = np.transpose(out, (0, 3, 1,2))
|
119 |
+
out = torch.from_numpy(out)
|
120 |
+
|
121 |
+
output = torch.nn.functional.interpolate(
|
122 |
+
out, size=input_size, mode="bilinear", align_corners=False
|
123 |
+
)
|
124 |
+
output_data = torch.nn.functional.softmax(output, dim=1).data
|
125 |
+
max_probs, predictions = output_data.max(1)
|
126 |
+
|
127 |
+
|
128 |
+
prediction = predictions.numpy().astype(np.uint8)
|
129 |
+
test = decode_segmap( prediction[0])
|
130 |
+
|
131 |
+
cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
|
132 |
+
ffnet_segm.interpreter.destory()
|
133 |
+
|
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
### Source model
|
3 |
+
- Input shape: 1x3x512x1024
|
4 |
+
- Number of parameters:13.911M
|
5 |
+
- Model size:53.56MB,
|
6 |
+
- Output shape: 1x19x64x128
|
7 |
+
|
8 |
+
Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: INT8
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: FV01 QCS6490
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
#### python
|
43 |
+
```bash
|
44 |
+
cd python
|
45 |
+
python3 demo_qnn.py
|
46 |
+
```
|
47 |
+
|
48 |
+
#### cpp
|
49 |
+
```bash
|
50 |
+
cd ffnet40s/model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp
|
51 |
+
mkdir build && cd build
|
52 |
+
cmake ..
|
53 |
+
make
|
54 |
+
./run_test
|
55 |
+
```
|
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required (VERSION 3.5)
|
2 |
+
project("run_test")
|
3 |
+
|
4 |
+
find_package(OpenCV REQUIRED)
|
5 |
+
|
6 |
+
message(STATUS "oPENCV Library status:")
|
7 |
+
message(STATUS ">version:${OpenCV_VERSION}")
|
8 |
+
message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
|
9 |
+
|
10 |
+
set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
|
11 |
+
|
12 |
+
include_directories(
|
13 |
+
/usr/local/include
|
14 |
+
/usr/include/opencv4
|
15 |
+
)
|
16 |
+
|
17 |
+
link_directories(
|
18 |
+
/usr/local/lib/
|
19 |
+
)
|
20 |
+
|
21 |
+
file(GLOB SRC_LISTS
|
22 |
+
${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
|
23 |
+
)
|
24 |
+
|
25 |
+
add_executable(run_test ${SRC_LISTS})
|
26 |
+
|
27 |
+
target_link_libraries(run_test
|
28 |
+
aidlite
|
29 |
+
${OpenCV_LIBS}
|
30 |
+
pthread
|
31 |
+
)
|
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp
ADDED
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <iostream>
|
2 |
+
#include <string>
|
3 |
+
#include <algorithm>
|
4 |
+
#include <cctype>
|
5 |
+
#include <cstring> // 用于 memcpy
|
6 |
+
#include <opencv2/opencv.hpp>
|
7 |
+
#include <aidlux/aidlite/aidlite.hpp>
|
8 |
+
#include <vector>
|
9 |
+
#include <numeric>
|
10 |
+
#include <cmath>
|
11 |
+
#include <array>
|
12 |
+
#include <cstdint>
|
13 |
+
|
14 |
+
using namespace cv;
|
15 |
+
using namespace std;
|
16 |
+
using namespace Aidlux::Aidlite;
|
17 |
+
|
18 |
+
// 定義顏色表 (19個類別)
|
19 |
+
const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
|
20 |
+
{0, 0, 0}, // 0=background
|
21 |
+
{128, 0, 0}, // 1=aeroplane
|
22 |
+
{0, 128, 0}, // 2=bicycle
|
23 |
+
{128, 128, 0}, // 3=bird
|
24 |
+
{0, 0, 128}, // 4=boat
|
25 |
+
{128, 0, 128}, // 5=bottle
|
26 |
+
{0, 128, 128}, // 6=bus
|
27 |
+
{128, 128, 128}, // 7=car
|
28 |
+
{64, 0, 0}, // 8=cat
|
29 |
+
{192, 0, 0}, // 9=chair
|
30 |
+
{64, 128, 0}, // 10=cow
|
31 |
+
{192, 128, 0}, // 11=dining table
|
32 |
+
{64, 0, 128}, // 12=dog
|
33 |
+
{192, 0, 128}, // 13=horse
|
34 |
+
{64, 128, 128}, // 14=motorbike
|
35 |
+
{192, 128, 128}, // 15=person
|
36 |
+
{0, 64, 0}, // 16=potted plant
|
37 |
+
{128, 64, 0}, // 17=sheep
|
38 |
+
{0, 192, 0}, // 18=sofa
|
39 |
+
}};
|
40 |
+
|
41 |
+
// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
|
42 |
+
std::vector<float> bilinear_interpolate(
|
43 |
+
const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
|
44 |
+
|
45 |
+
std::vector<float> output(target_h * target_w * channels, 0.0f);
|
46 |
+
const float scale_h = static_cast<float>(src_h) / target_h;
|
47 |
+
const float scale_w = static_cast<float>(src_w) / target_w;
|
48 |
+
|
49 |
+
for (int y = 0; y < target_h; ++y) {
|
50 |
+
const float y_src = (y + 0.5f) * scale_h - 0.5f;
|
51 |
+
const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
|
52 |
+
const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
|
53 |
+
const float dy = y_src - y0;
|
54 |
+
|
55 |
+
for (int x = 0; x < target_w; ++x) {
|
56 |
+
const float x_src = (x + 0.5f) * scale_w - 0.5f;
|
57 |
+
const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
|
58 |
+
const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
|
59 |
+
const float dx = x_src - x0;
|
60 |
+
|
61 |
+
for (int c = 0; c < channels; ++c) {
|
62 |
+
const int src_idx = c * src_h * src_w;
|
63 |
+
const float val00 = input[src_idx + y0 * src_w + x0];
|
64 |
+
const float val01 = input[src_idx + y0 * src_w + x1];
|
65 |
+
const float val10 = input[src_idx + y1 * src_w + x0];
|
66 |
+
const float val11 = input[src_idx + y1 * src_w + x1];
|
67 |
+
|
68 |
+
const float val = (1 - dy) * (1 - dx) * val00 +
|
69 |
+
(1 - dy) * dx * val01 +
|
70 |
+
dy * (1 - dx) * val10 +
|
71 |
+
dy * dx * val11;
|
72 |
+
|
73 |
+
output[c * target_h * target_w + y * target_w + x] = val;
|
74 |
+
}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
return output;
|
78 |
+
}
|
79 |
+
|
80 |
+
// Softmax 計算 (通道維度)
|
81 |
+
void softmax(float* data, int height, int width, int channels) {
|
82 |
+
for (int y = 0; y < height; ++y) {
|
83 |
+
for (int x = 0; x < width; ++x) {
|
84 |
+
float max_val = -INFINITY;
|
85 |
+
for (int c = 0; c < channels; ++c) {
|
86 |
+
const int idx = c * height * width + y * width + x;
|
87 |
+
max_val = std::max(max_val, data[idx]);
|
88 |
+
}
|
89 |
+
|
90 |
+
float sum_exp = 0.0f;
|
91 |
+
for (int c = 0; c < channels; ++c) {
|
92 |
+
const int idx = c * height * width + y * width + x;
|
93 |
+
sum_exp += std::exp(data[idx] - max_val);
|
94 |
+
}
|
95 |
+
|
96 |
+
for (int c = 0; c < channels; ++c) {
|
97 |
+
const int idx = c * height * width + y * width + x;
|
98 |
+
data[idx] = std::exp(data[idx] - max_val) / sum_exp;
|
99 |
+
}
|
100 |
+
}
|
101 |
+
}
|
102 |
+
}
|
103 |
+
|
104 |
+
// 提取最大類別索引
|
105 |
+
std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
|
106 |
+
std::vector<uint8_t> pred(height * width);
|
107 |
+
for (int y = 0; y < height; ++y) {
|
108 |
+
for (int x = 0; x < width; ++x) {
|
109 |
+
float max_prob = -INFINITY;
|
110 |
+
uint8_t max_idx = 0;
|
111 |
+
for (int c = 0; c < channels; ++c) {
|
112 |
+
const int idx = c * height * width + y * width + x;
|
113 |
+
if (data[idx] > max_prob) {
|
114 |
+
max_prob = data[idx];
|
115 |
+
max_idx = c;
|
116 |
+
}
|
117 |
+
}
|
118 |
+
pred[y * width + x] = max_idx;
|
119 |
+
}
|
120 |
+
}
|
121 |
+
return pred;
|
122 |
+
}
|
123 |
+
|
124 |
+
// 解碼為 RGB 圖像
|
125 |
+
std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
|
126 |
+
std::vector<uint8_t> rgb(height * width * 3);
|
127 |
+
for (int y = 0; y < height; ++y) {
|
128 |
+
for (int x = 0; x < width; ++x) {
|
129 |
+
const int idx = y * width + x;
|
130 |
+
const uint8_t label = pred[idx];
|
131 |
+
if (label < 19) {
|
132 |
+
rgb[idx * 3] = label_colors[label][0];
|
133 |
+
rgb[idx * 3 + 1] = label_colors[label][1];
|
134 |
+
rgb[idx * 3 + 2] = label_colors[label][2];
|
135 |
+
} else {
|
136 |
+
rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
|
137 |
+
}
|
138 |
+
}
|
139 |
+
}
|
140 |
+
return rgb;
|
141 |
+
}
|
142 |
+
|
143 |
+
struct Args {
|
144 |
+
std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin";
|
145 |
+
std::string imgs = "../2.png";
|
146 |
+
int invoke_nums = 10;
|
147 |
+
std::string model_type = "QNN";
|
148 |
+
};
|
149 |
+
|
150 |
+
Args parse_args(int argc, char* argv[]) {
|
151 |
+
Args args;
|
152 |
+
for (int i = 1; i < argc; ++i) {
|
153 |
+
std::string arg = argv[i];
|
154 |
+
if (arg == "--target_model" && i + 1 < argc) {
|
155 |
+
args.target_model = argv[++i];
|
156 |
+
} else if (arg == "--imgs" && i + 1 < argc) {
|
157 |
+
args.imgs = argv[++i];
|
158 |
+
} else if (arg == "--invoke_nums" && i + 1 < argc) {
|
159 |
+
args.invoke_nums = std::stoi(argv[++i]);
|
160 |
+
} else if (arg == "--model_type" && i + 1 < argc) {
|
161 |
+
args.model_type = argv[++i];
|
162 |
+
}
|
163 |
+
}
|
164 |
+
return args;
|
165 |
+
}
|
166 |
+
|
167 |
+
std::string to_lower(const std::string& str) {
|
168 |
+
std::string lower_str = str;
|
169 |
+
std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
|
170 |
+
return std::tolower(c);
|
171 |
+
});
|
172 |
+
return lower_str;
|
173 |
+
}
|
174 |
+
|
175 |
+
int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
|
176 |
+
|
177 |
+
int current_coordinate[4] = {0, 0, 0, 0};
|
178 |
+
for(int a = 0; a < src_dims[0]; ++a){
|
179 |
+
current_coordinate[0] = a;
|
180 |
+
for(int b = 0; b < src_dims[1]; ++b){
|
181 |
+
current_coordinate[1] = b;
|
182 |
+
for(int c = 0; c < src_dims[2]; ++c){
|
183 |
+
current_coordinate[2] = c;
|
184 |
+
for(int d = 0; d < src_dims[3]; ++d){
|
185 |
+
current_coordinate[3] = d;
|
186 |
+
|
187 |
+
int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
|
188 |
+
current_coordinate[1]*src_dims[2]*src_dims[3] +
|
189 |
+
current_coordinate[2]*src_dims[3] +
|
190 |
+
current_coordinate[3];
|
191 |
+
|
192 |
+
int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
193 |
+
current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
194 |
+
current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
195 |
+
current_coordinate[tsp_dims[3]];
|
196 |
+
|
197 |
+
dest[new_index] = src[old_index];
|
198 |
+
}
|
199 |
+
}
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
return EXIT_SUCCESS;
|
204 |
+
}
|
205 |
+
|
206 |
+
cv::Mat post_process(cv::Mat &frame, float* outdata)
|
207 |
+
{
|
208 |
+
cv::Mat input_image = frame.clone();
|
209 |
+
// Initialize vectors to hold respective outputs while unwrapping detections.
|
210 |
+
std::vector<int> class_ids;
|
211 |
+
std::vector<float> confidences;
|
212 |
+
std::vector<cv::Rect> boxes;
|
213 |
+
std::vector<cv::Mat> masks;
|
214 |
+
std::vector<float> class_scores;
|
215 |
+
cv::RNG rng;
|
216 |
+
cv::Mat masked_img;
|
217 |
+
|
218 |
+
unsigned int src_dims[4] = {1, 64,128,19};
|
219 |
+
unsigned int tsp_dims[4] = {0,3,1,2};
|
220 |
+
unsigned int stride_data_num = 1*64*128*19;
|
221 |
+
float* format_data = new float[stride_data_num];
|
222 |
+
transpose(outdata, src_dims, tsp_dims, format_data);
|
223 |
+
cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
|
224 |
+
std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
|
225 |
+
|
226 |
+
const int channels = 19;
|
227 |
+
int target_h = 64, target_w = 128;
|
228 |
+
int src_h = 64, src_w = 128;
|
229 |
+
// Step 1: 雙線性插值
|
230 |
+
auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
|
231 |
+
|
232 |
+
// Step 2: Softmax
|
233 |
+
softmax(interpolated.data(), target_h, target_w, channels);
|
234 |
+
|
235 |
+
// Step 3: 獲取預測類別
|
236 |
+
auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
|
237 |
+
|
238 |
+
printf("Processing finished.\n");
|
239 |
+
// Step 4: 解碼為 RGB
|
240 |
+
std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
|
241 |
+
cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
|
242 |
+
|
243 |
+
// Step 2: 转换颜色通道 (RGB → BGR)
|
244 |
+
if (channels == 3) {
|
245 |
+
cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
|
246 |
+
}
|
247 |
+
cv::Mat resized_cubic;
|
248 |
+
cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
|
249 |
+
return resized_cubic;
|
250 |
+
}
|
251 |
+
|
252 |
+
|
253 |
+
|
254 |
+
int invoke(const Args& args) {
|
255 |
+
std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
|
256 |
+
<< "Image Path: " << args.imgs << "\n"
|
257 |
+
<< "Inference Nums: " << args.invoke_nums << "\n"
|
258 |
+
<< "Model Type: " << args.model_type << "\n";
|
259 |
+
Model* model = Model::create_instance(args.target_model);
|
260 |
+
if(model == nullptr){
|
261 |
+
printf("Create model failed !\n");
|
262 |
+
return EXIT_FAILURE;
|
263 |
+
}
|
264 |
+
Config* config = Config::create_instance();
|
265 |
+
if(config == nullptr){
|
266 |
+
printf("Create config failed !\n");
|
267 |
+
return EXIT_FAILURE;
|
268 |
+
}
|
269 |
+
config->implement_type = ImplementType::TYPE_LOCAL;
|
270 |
+
std::string model_type_lower = to_lower(args.model_type);
|
271 |
+
if (model_type_lower == "qnn"){
|
272 |
+
config->framework_type = FrameworkType::TYPE_QNN216;
|
273 |
+
} else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
|
274 |
+
config->framework_type = FrameworkType::TYPE_SNPE2;
|
275 |
+
}
|
276 |
+
config->accelerate_type = AccelerateType::TYPE_DSP;
|
277 |
+
config->is_quantify_model = 1;
|
278 |
+
|
279 |
+
unsigned int model_h = 512;
|
280 |
+
unsigned int model_w = 1024;
|
281 |
+
std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
|
282 |
+
std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
|
283 |
+
model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
|
284 |
+
std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
|
285 |
+
if(fast_interpreter == nullptr){
|
286 |
+
printf("build_interpretper_from_model_and_config failed !\n");
|
287 |
+
return EXIT_FAILURE;
|
288 |
+
}
|
289 |
+
int result = fast_interpreter->init();
|
290 |
+
if(result != EXIT_SUCCESS){
|
291 |
+
printf("interpreter->init() failed !\n");
|
292 |
+
return EXIT_FAILURE;
|
293 |
+
}
|
294 |
+
// load model
|
295 |
+
fast_interpreter->load_model();
|
296 |
+
if(result != EXIT_SUCCESS){
|
297 |
+
printf("interpreter->load_model() failed !\n");
|
298 |
+
return EXIT_FAILURE;
|
299 |
+
}
|
300 |
+
printf("detect model load success!\n");
|
301 |
+
|
302 |
+
cv::Mat frame = cv::imread(args.imgs);
|
303 |
+
if (frame.empty()) {
|
304 |
+
printf("detect image load failed!\n");
|
305 |
+
return 1;
|
306 |
+
}
|
307 |
+
printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
|
308 |
+
cv::Mat input_data;
|
309 |
+
cv::Mat frame_clone = frame.clone();
|
310 |
+
cv::Scalar stds_scale(58.395, 57.12, 57.375);
|
311 |
+
cv::Scalar means_scale(123.675, 116.28, 103.53);
|
312 |
+
cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
|
313 |
+
frame_clone.convertTo(input_data, CV_32F);
|
314 |
+
cv::subtract(input_data, means_scale, input_data);
|
315 |
+
cv::divide(input_data, stds_scale, input_data);
|
316 |
+
|
317 |
+
float *outdata0 = nullptr;
|
318 |
+
std::vector<float> invoke_time;
|
319 |
+
for (int i = 0; i < args.invoke_nums; ++i) {
|
320 |
+
result = fast_interpreter->set_input_tensor(0, input_data.data);
|
321 |
+
if(result != EXIT_SUCCESS){
|
322 |
+
printf("interpreter->set_input_tensor() failed !\n");
|
323 |
+
return EXIT_FAILURE;
|
324 |
+
}
|
325 |
+
auto t1 = std::chrono::high_resolution_clock::now();
|
326 |
+
result = fast_interpreter->invoke();
|
327 |
+
auto t2 = std::chrono::high_resolution_clock::now();
|
328 |
+
std::chrono::duration<double> cost_time = t2 - t1;
|
329 |
+
invoke_time.push_back(cost_time.count() * 1000);
|
330 |
+
if(result != EXIT_SUCCESS){
|
331 |
+
printf("interpreter->invoke() failed !\n");
|
332 |
+
return EXIT_FAILURE;
|
333 |
+
}
|
334 |
+
uint32_t out_data_0 = 0;
|
335 |
+
result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
|
336 |
+
if(result != EXIT_SUCCESS){
|
337 |
+
printf("interpreter->get_output_tensor() 1 failed !\n");
|
338 |
+
return EXIT_FAILURE;
|
339 |
+
}
|
340 |
+
|
341 |
+
}
|
342 |
+
|
343 |
+
float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
|
344 |
+
float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
|
345 |
+
float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
|
346 |
+
float var_invoketime = 0.0f;
|
347 |
+
for (auto time : invoke_time) {
|
348 |
+
var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
|
349 |
+
}
|
350 |
+
var_invoketime /= args.invoke_nums;
|
351 |
+
printf("=======================================\n");
|
352 |
+
printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
|
353 |
+
args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
|
354 |
+
printf("=======================================\n");
|
355 |
+
|
356 |
+
cv::Mat img = post_process(frame, outdata0);
|
357 |
+
cv::imwrite("./results.png", img);
|
358 |
+
fast_interpreter->destory();
|
359 |
+
return 0;
|
360 |
+
}
|
361 |
+
|
362 |
+
|
363 |
+
int main(int argc, char* argv[]) {
|
364 |
+
Args args = parse_args(argc, argv);
|
365 |
+
return invoke(args);
|
366 |
+
}
|
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50c93cf5c0f77e1db8c6ef4708fd80a3f0b2fc7be2958b2afa55d3ce3b84e4cf
|
3 |
+
size 28144512
|
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import cv2
|
4 |
+
import sys
|
5 |
+
import time
|
6 |
+
import aidlite
|
7 |
+
import os
|
8 |
+
|
9 |
+
|
10 |
+
def decode_segmap(image, nc=19):
|
11 |
+
label_colors = np.array([(0, 0, 0), # 0=background
|
12 |
+
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
|
13 |
+
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
|
14 |
+
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
|
15 |
+
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
|
16 |
+
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
|
17 |
+
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
|
18 |
+
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
|
19 |
+
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
|
20 |
+
r = np.zeros_like(image).astype(np.uint8)
|
21 |
+
g = np.zeros_like(image).astype(np.uint8)
|
22 |
+
b = np.zeros_like(image).astype(np.uint8)
|
23 |
+
for l in range(0, nc):
|
24 |
+
idx = image == l
|
25 |
+
r[idx] = label_colors[l, 0]
|
26 |
+
g[idx] = label_colors[l, 1]
|
27 |
+
b[idx] = label_colors[l, 2]
|
28 |
+
rgb = np.stack([r, g, b], axis=2)
|
29 |
+
return rgb
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
class ffnet40sQnn:
|
34 |
+
def __init__(self):
|
35 |
+
super().__init__()
|
36 |
+
self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin"))
|
37 |
+
if self.model is None:
|
38 |
+
print("Create model failed !")
|
39 |
+
return
|
40 |
+
|
41 |
+
self.config = aidlite.Config.create_instance()
|
42 |
+
if self.config is None:
|
43 |
+
print("build_interpretper_from_model_and_config failed !")
|
44 |
+
return
|
45 |
+
|
46 |
+
self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
|
47 |
+
self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
|
48 |
+
self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
|
49 |
+
# self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
|
50 |
+
self.config.is_quantify_model = 1
|
51 |
+
|
52 |
+
self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
|
53 |
+
if self.interpreter is None:
|
54 |
+
print("build_interpretper_from_model_and_config failed !")
|
55 |
+
return
|
56 |
+
input_shapes = [[1,512,1024,3]]
|
57 |
+
output_shapes = [[1,64,128,19]]
|
58 |
+
self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
|
59 |
+
output_shapes, aidlite.DataType.TYPE_FLOAT32)
|
60 |
+
|
61 |
+
if self.interpreter is None:
|
62 |
+
print("build_interpretper_from_model_and_config failed !")
|
63 |
+
result = self.interpreter.init()
|
64 |
+
if result != 0:
|
65 |
+
print(f"interpreter init failed !")
|
66 |
+
result = self.interpreter.load_model()
|
67 |
+
if result != 0:
|
68 |
+
print("interpreter load model failed !")
|
69 |
+
|
70 |
+
print(" model load success!")
|
71 |
+
|
72 |
+
def __call__(self, input):
|
73 |
+
self.interpreter.set_input_tensor(0,input)
|
74 |
+
invoke_time=[]
|
75 |
+
invoke_nums =10
|
76 |
+
for i in range(invoke_nums):
|
77 |
+
result = self.interpreter.set_input_tensor(0, input.data)
|
78 |
+
if result != 0:
|
79 |
+
print("interpreter set_input_tensor() failed")
|
80 |
+
t1=time.time()
|
81 |
+
result = self.interpreter.invoke()
|
82 |
+
cost_time = (time.time()-t1)*1000
|
83 |
+
invoke_time.append(cost_time)
|
84 |
+
|
85 |
+
max_invoke_time = max(invoke_time)
|
86 |
+
min_invoke_time = min(invoke_time)
|
87 |
+
mean_invoke_time = sum(invoke_time)/invoke_nums
|
88 |
+
var_invoketime=np.var(invoke_time)
|
89 |
+
print("====================================")
|
90 |
+
print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
|
91 |
+
print("====================================")
|
92 |
+
features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
|
93 |
+
return features_0
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
ffnet_segm = ffnet40sQnn()
|
100 |
+
|
101 |
+
frame_ct=0
|
102 |
+
image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
|
103 |
+
|
104 |
+
image = cv2.imread(image_path)
|
105 |
+
image=cv2.resize(image,(1024,512))
|
106 |
+
frame = np.ascontiguousarray(image[:,:,::-1])
|
107 |
+
|
108 |
+
mean_data=[123.675, 116.28, 103.53]
|
109 |
+
std_data=[58.395, 57.12, 57.375]
|
110 |
+
img_input = (frame-mean_data)/std_data # HWC
|
111 |
+
input = img_input.astype(np.float32)
|
112 |
+
input = input[np.newaxis, ...]
|
113 |
+
input_size = input.shape[1], input.shape[2] #H w
|
114 |
+
t0 = time.time()
|
115 |
+
out = ffnet_segm(input)
|
116 |
+
use_time = round((time.time() - t0) * 1000, 2)
|
117 |
+
print(f"pose detction inference_time:{use_time} ms")
|
118 |
+
out = np.transpose(out, (0, 3, 1,2))
|
119 |
+
out = torch.from_numpy(out)
|
120 |
+
|
121 |
+
output = torch.nn.functional.interpolate(
|
122 |
+
out, size=input_size, mode="bilinear", align_corners=False
|
123 |
+
)
|
124 |
+
output_data = torch.nn.functional.softmax(output, dim=1).data
|
125 |
+
max_probs, predictions = output_data.max(1)
|
126 |
+
|
127 |
+
|
128 |
+
prediction = predictions.numpy().astype(np.uint8)
|
129 |
+
test = decode_segmap( prediction[0])
|
130 |
+
|
131 |
+
cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
|
132 |
+
ffnet_segm.interpreter.destory()
|
133 |
+
|
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
### Source model
|
3 |
+
- Input shape: 1x3x512x1024
|
4 |
+
- Number of parameters:13.911M
|
5 |
+
- Model size:53.56MB,
|
6 |
+
- Output shape: 1x19x64x128
|
7 |
+
|
8 |
+
Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: INT8
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: FV01 QCS6490
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
#### python
|
43 |
+
```bash
|
44 |
+
cd python
|
45 |
+
python3 demo_qnn.py
|
46 |
+
```
|
47 |
+
|
48 |
+
#### cpp
|
49 |
+
```bash
|
50 |
+
cd ffnet40s/model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp
|
51 |
+
mkdir build && cd build
|
52 |
+
cmake ..
|
53 |
+
make
|
54 |
+
./run_test
|
55 |
+
```
|
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required (VERSION 3.5)
|
2 |
+
project("run_test")
|
3 |
+
|
4 |
+
find_package(OpenCV REQUIRED)
|
5 |
+
|
6 |
+
message(STATUS "oPENCV Library status:")
|
7 |
+
message(STATUS ">version:${OpenCV_VERSION}")
|
8 |
+
message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
|
9 |
+
|
10 |
+
set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
|
11 |
+
|
12 |
+
include_directories(
|
13 |
+
/usr/local/include
|
14 |
+
/usr/include/opencv4
|
15 |
+
)
|
16 |
+
|
17 |
+
link_directories(
|
18 |
+
/usr/local/lib/
|
19 |
+
)
|
20 |
+
|
21 |
+
file(GLOB SRC_LISTS
|
22 |
+
${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
|
23 |
+
)
|
24 |
+
|
25 |
+
add_executable(run_test ${SRC_LISTS})
|
26 |
+
|
27 |
+
target_link_libraries(run_test
|
28 |
+
aidlite
|
29 |
+
${OpenCV_LIBS}
|
30 |
+
pthread
|
31 |
+
)
|
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp
ADDED
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <iostream>
|
2 |
+
#include <string>
|
3 |
+
#include <algorithm>
|
4 |
+
#include <cctype>
|
5 |
+
#include <cstring> // 用于 memcpy
|
6 |
+
#include <opencv2/opencv.hpp>
|
7 |
+
#include <aidlux/aidlite/aidlite.hpp>
|
8 |
+
#include <vector>
|
9 |
+
#include <numeric>
|
10 |
+
#include <cmath>
|
11 |
+
#include <array>
|
12 |
+
#include <cstdint>
|
13 |
+
|
14 |
+
using namespace cv;
|
15 |
+
using namespace std;
|
16 |
+
using namespace Aidlux::Aidlite;
|
17 |
+
|
18 |
+
// 定義顏色表 (19個類別)
|
19 |
+
const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
|
20 |
+
{0, 0, 0}, // 0=background
|
21 |
+
{128, 0, 0}, // 1=aeroplane
|
22 |
+
{0, 128, 0}, // 2=bicycle
|
23 |
+
{128, 128, 0}, // 3=bird
|
24 |
+
{0, 0, 128}, // 4=boat
|
25 |
+
{128, 0, 128}, // 5=bottle
|
26 |
+
{0, 128, 128}, // 6=bus
|
27 |
+
{128, 128, 128}, // 7=car
|
28 |
+
{64, 0, 0}, // 8=cat
|
29 |
+
{192, 0, 0}, // 9=chair
|
30 |
+
{64, 128, 0}, // 10=cow
|
31 |
+
{192, 128, 0}, // 11=dining table
|
32 |
+
{64, 0, 128}, // 12=dog
|
33 |
+
{192, 0, 128}, // 13=horse
|
34 |
+
{64, 128, 128}, // 14=motorbike
|
35 |
+
{192, 128, 128}, // 15=person
|
36 |
+
{0, 64, 0}, // 16=potted plant
|
37 |
+
{128, 64, 0}, // 17=sheep
|
38 |
+
{0, 192, 0}, // 18=sofa
|
39 |
+
}};
|
40 |
+
|
41 |
+
// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
|
42 |
+
std::vector<float> bilinear_interpolate(
|
43 |
+
const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
|
44 |
+
|
45 |
+
std::vector<float> output(target_h * target_w * channels, 0.0f);
|
46 |
+
const float scale_h = static_cast<float>(src_h) / target_h;
|
47 |
+
const float scale_w = static_cast<float>(src_w) / target_w;
|
48 |
+
|
49 |
+
for (int y = 0; y < target_h; ++y) {
|
50 |
+
const float y_src = (y + 0.5f) * scale_h - 0.5f;
|
51 |
+
const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
|
52 |
+
const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
|
53 |
+
const float dy = y_src - y0;
|
54 |
+
|
55 |
+
for (int x = 0; x < target_w; ++x) {
|
56 |
+
const float x_src = (x + 0.5f) * scale_w - 0.5f;
|
57 |
+
const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
|
58 |
+
const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
|
59 |
+
const float dx = x_src - x0;
|
60 |
+
|
61 |
+
for (int c = 0; c < channels; ++c) {
|
62 |
+
const int src_idx = c * src_h * src_w;
|
63 |
+
const float val00 = input[src_idx + y0 * src_w + x0];
|
64 |
+
const float val01 = input[src_idx + y0 * src_w + x1];
|
65 |
+
const float val10 = input[src_idx + y1 * src_w + x0];
|
66 |
+
const float val11 = input[src_idx + y1 * src_w + x1];
|
67 |
+
|
68 |
+
const float val = (1 - dy) * (1 - dx) * val00 +
|
69 |
+
(1 - dy) * dx * val01 +
|
70 |
+
dy * (1 - dx) * val10 +
|
71 |
+
dy * dx * val11;
|
72 |
+
|
73 |
+
output[c * target_h * target_w + y * target_w + x] = val;
|
74 |
+
}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
return output;
|
78 |
+
}
|
79 |
+
|
80 |
+
// Softmax 計算 (通道維度)
|
81 |
+
void softmax(float* data, int height, int width, int channels) {
|
82 |
+
for (int y = 0; y < height; ++y) {
|
83 |
+
for (int x = 0; x < width; ++x) {
|
84 |
+
float max_val = -INFINITY;
|
85 |
+
for (int c = 0; c < channels; ++c) {
|
86 |
+
const int idx = c * height * width + y * width + x;
|
87 |
+
max_val = std::max(max_val, data[idx]);
|
88 |
+
}
|
89 |
+
|
90 |
+
float sum_exp = 0.0f;
|
91 |
+
for (int c = 0; c < channels; ++c) {
|
92 |
+
const int idx = c * height * width + y * width + x;
|
93 |
+
sum_exp += std::exp(data[idx] - max_val);
|
94 |
+
}
|
95 |
+
|
96 |
+
for (int c = 0; c < channels; ++c) {
|
97 |
+
const int idx = c * height * width + y * width + x;
|
98 |
+
data[idx] = std::exp(data[idx] - max_val) / sum_exp;
|
99 |
+
}
|
100 |
+
}
|
101 |
+
}
|
102 |
+
}
|
103 |
+
|
104 |
+
// 提取最大類別索引
|
105 |
+
std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
|
106 |
+
std::vector<uint8_t> pred(height * width);
|
107 |
+
for (int y = 0; y < height; ++y) {
|
108 |
+
for (int x = 0; x < width; ++x) {
|
109 |
+
float max_prob = -INFINITY;
|
110 |
+
uint8_t max_idx = 0;
|
111 |
+
for (int c = 0; c < channels; ++c) {
|
112 |
+
const int idx = c * height * width + y * width + x;
|
113 |
+
if (data[idx] > max_prob) {
|
114 |
+
max_prob = data[idx];
|
115 |
+
max_idx = c;
|
116 |
+
}
|
117 |
+
}
|
118 |
+
pred[y * width + x] = max_idx;
|
119 |
+
}
|
120 |
+
}
|
121 |
+
return pred;
|
122 |
+
}
|
123 |
+
|
124 |
+
// 解碼為 RGB 圖像
|
125 |
+
std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
|
126 |
+
std::vector<uint8_t> rgb(height * width * 3);
|
127 |
+
for (int y = 0; y < height; ++y) {
|
128 |
+
for (int x = 0; x < width; ++x) {
|
129 |
+
const int idx = y * width + x;
|
130 |
+
const uint8_t label = pred[idx];
|
131 |
+
if (label < 19) {
|
132 |
+
rgb[idx * 3] = label_colors[label][0];
|
133 |
+
rgb[idx * 3 + 1] = label_colors[label][1];
|
134 |
+
rgb[idx * 3 + 2] = label_colors[label][2];
|
135 |
+
} else {
|
136 |
+
rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
|
137 |
+
}
|
138 |
+
}
|
139 |
+
}
|
140 |
+
return rgb;
|
141 |
+
}
|
142 |
+
|
143 |
+
struct Args {
|
144 |
+
std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin";
|
145 |
+
std::string imgs = "../2.png";
|
146 |
+
int invoke_nums = 10;
|
147 |
+
std::string model_type = "QNN";
|
148 |
+
};
|
149 |
+
|
150 |
+
Args parse_args(int argc, char* argv[]) {
|
151 |
+
Args args;
|
152 |
+
for (int i = 1; i < argc; ++i) {
|
153 |
+
std::string arg = argv[i];
|
154 |
+
if (arg == "--target_model" && i + 1 < argc) {
|
155 |
+
args.target_model = argv[++i];
|
156 |
+
} else if (arg == "--imgs" && i + 1 < argc) {
|
157 |
+
args.imgs = argv[++i];
|
158 |
+
} else if (arg == "--invoke_nums" && i + 1 < argc) {
|
159 |
+
args.invoke_nums = std::stoi(argv[++i]);
|
160 |
+
} else if (arg == "--model_type" && i + 1 < argc) {
|
161 |
+
args.model_type = argv[++i];
|
162 |
+
}
|
163 |
+
}
|
164 |
+
return args;
|
165 |
+
}
|
166 |
+
|
167 |
+
std::string to_lower(const std::string& str) {
|
168 |
+
std::string lower_str = str;
|
169 |
+
std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
|
170 |
+
return std::tolower(c);
|
171 |
+
});
|
172 |
+
return lower_str;
|
173 |
+
}
|
174 |
+
|
175 |
+
int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
|
176 |
+
|
177 |
+
int current_coordinate[4] = {0, 0, 0, 0};
|
178 |
+
for(int a = 0; a < src_dims[0]; ++a){
|
179 |
+
current_coordinate[0] = a;
|
180 |
+
for(int b = 0; b < src_dims[1]; ++b){
|
181 |
+
current_coordinate[1] = b;
|
182 |
+
for(int c = 0; c < src_dims[2]; ++c){
|
183 |
+
current_coordinate[2] = c;
|
184 |
+
for(int d = 0; d < src_dims[3]; ++d){
|
185 |
+
current_coordinate[3] = d;
|
186 |
+
|
187 |
+
int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
|
188 |
+
current_coordinate[1]*src_dims[2]*src_dims[3] +
|
189 |
+
current_coordinate[2]*src_dims[3] +
|
190 |
+
current_coordinate[3];
|
191 |
+
|
192 |
+
int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
193 |
+
current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
194 |
+
current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
195 |
+
current_coordinate[tsp_dims[3]];
|
196 |
+
|
197 |
+
dest[new_index] = src[old_index];
|
198 |
+
}
|
199 |
+
}
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
return EXIT_SUCCESS;
|
204 |
+
}
|
205 |
+
|
206 |
+
cv::Mat post_process(cv::Mat &frame, float* outdata)
|
207 |
+
{
|
208 |
+
cv::Mat input_image = frame.clone();
|
209 |
+
// Initialize vectors to hold respective outputs while unwrapping detections.
|
210 |
+
std::vector<int> class_ids;
|
211 |
+
std::vector<float> confidences;
|
212 |
+
std::vector<cv::Rect> boxes;
|
213 |
+
std::vector<cv::Mat> masks;
|
214 |
+
std::vector<float> class_scores;
|
215 |
+
cv::RNG rng;
|
216 |
+
cv::Mat masked_img;
|
217 |
+
|
218 |
+
unsigned int src_dims[4] = {1, 64,128,19};
|
219 |
+
unsigned int tsp_dims[4] = {0,3,1,2};
|
220 |
+
unsigned int stride_data_num = 1*64*128*19;
|
221 |
+
float* format_data = new float[stride_data_num];
|
222 |
+
transpose(outdata, src_dims, tsp_dims, format_data);
|
223 |
+
cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
|
224 |
+
std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
|
225 |
+
|
226 |
+
const int channels = 19;
|
227 |
+
int target_h = 64, target_w = 128;
|
228 |
+
int src_h = 64, src_w = 128;
|
229 |
+
// Step 1: 雙線性插值
|
230 |
+
auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
|
231 |
+
|
232 |
+
// Step 2: Softmax
|
233 |
+
softmax(interpolated.data(), target_h, target_w, channels);
|
234 |
+
|
235 |
+
// Step 3: 獲取預測類別
|
236 |
+
auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
|
237 |
+
|
238 |
+
printf("Processing finished.\n");
|
239 |
+
// Step 4: 解碼為 RGB
|
240 |
+
std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
|
241 |
+
cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
|
242 |
+
|
243 |
+
// Step 2: 转换颜色通道 (RGB → BGR)
|
244 |
+
if (channels == 3) {
|
245 |
+
cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
|
246 |
+
}
|
247 |
+
cv::Mat resized_cubic;
|
248 |
+
cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
|
249 |
+
return resized_cubic;
|
250 |
+
}
|
251 |
+
|
252 |
+
|
253 |
+
|
254 |
+
int invoke(const Args& args) {
|
255 |
+
std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
|
256 |
+
<< "Image Path: " << args.imgs << "\n"
|
257 |
+
<< "Inference Nums: " << args.invoke_nums << "\n"
|
258 |
+
<< "Model Type: " << args.model_type << "\n";
|
259 |
+
Model* model = Model::create_instance(args.target_model);
|
260 |
+
if(model == nullptr){
|
261 |
+
printf("Create model failed !\n");
|
262 |
+
return EXIT_FAILURE;
|
263 |
+
}
|
264 |
+
Config* config = Config::create_instance();
|
265 |
+
if(config == nullptr){
|
266 |
+
printf("Create config failed !\n");
|
267 |
+
return EXIT_FAILURE;
|
268 |
+
}
|
269 |
+
config->implement_type = ImplementType::TYPE_LOCAL;
|
270 |
+
std::string model_type_lower = to_lower(args.model_type);
|
271 |
+
if (model_type_lower == "qnn"){
|
272 |
+
config->framework_type = FrameworkType::TYPE_QNN216;
|
273 |
+
} else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
|
274 |
+
config->framework_type = FrameworkType::TYPE_SNPE2;
|
275 |
+
}
|
276 |
+
config->accelerate_type = AccelerateType::TYPE_DSP;
|
277 |
+
config->is_quantify_model = 1;
|
278 |
+
|
279 |
+
unsigned int model_h = 512;
|
280 |
+
unsigned int model_w = 1024;
|
281 |
+
std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
|
282 |
+
std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
|
283 |
+
model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
|
284 |
+
std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
|
285 |
+
if(fast_interpreter == nullptr){
|
286 |
+
printf("build_interpretper_from_model_and_config failed !\n");
|
287 |
+
return EXIT_FAILURE;
|
288 |
+
}
|
289 |
+
int result = fast_interpreter->init();
|
290 |
+
if(result != EXIT_SUCCESS){
|
291 |
+
printf("interpreter->init() failed !\n");
|
292 |
+
return EXIT_FAILURE;
|
293 |
+
}
|
294 |
+
// load model
|
295 |
+
fast_interpreter->load_model();
|
296 |
+
if(result != EXIT_SUCCESS){
|
297 |
+
printf("interpreter->load_model() failed !\n");
|
298 |
+
return EXIT_FAILURE;
|
299 |
+
}
|
300 |
+
printf("detect model load success!\n");
|
301 |
+
|
302 |
+
cv::Mat frame = cv::imread(args.imgs);
|
303 |
+
if (frame.empty()) {
|
304 |
+
printf("detect image load failed!\n");
|
305 |
+
return 1;
|
306 |
+
}
|
307 |
+
printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
|
308 |
+
cv::Mat input_data;
|
309 |
+
cv::Mat frame_clone = frame.clone();
|
310 |
+
cv::Scalar stds_scale(58.395, 57.12, 57.375);
|
311 |
+
cv::Scalar means_scale(123.675, 116.28, 103.53);
|
312 |
+
cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
|
313 |
+
frame_clone.convertTo(input_data, CV_32F);
|
314 |
+
cv::subtract(input_data, means_scale, input_data);
|
315 |
+
cv::divide(input_data, stds_scale, input_data);
|
316 |
+
|
317 |
+
float *outdata0 = nullptr;
|
318 |
+
std::vector<float> invoke_time;
|
319 |
+
for (int i = 0; i < args.invoke_nums; ++i) {
|
320 |
+
result = fast_interpreter->set_input_tensor(0, input_data.data);
|
321 |
+
if(result != EXIT_SUCCESS){
|
322 |
+
printf("interpreter->set_input_tensor() failed !\n");
|
323 |
+
return EXIT_FAILURE;
|
324 |
+
}
|
325 |
+
auto t1 = std::chrono::high_resolution_clock::now();
|
326 |
+
result = fast_interpreter->invoke();
|
327 |
+
auto t2 = std::chrono::high_resolution_clock::now();
|
328 |
+
std::chrono::duration<double> cost_time = t2 - t1;
|
329 |
+
invoke_time.push_back(cost_time.count() * 1000);
|
330 |
+
if(result != EXIT_SUCCESS){
|
331 |
+
printf("interpreter->invoke() failed !\n");
|
332 |
+
return EXIT_FAILURE;
|
333 |
+
}
|
334 |
+
uint32_t out_data_0 = 0;
|
335 |
+
result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
|
336 |
+
if(result != EXIT_SUCCESS){
|
337 |
+
printf("interpreter->get_output_tensor() 1 failed !\n");
|
338 |
+
return EXIT_FAILURE;
|
339 |
+
}
|
340 |
+
|
341 |
+
}
|
342 |
+
|
343 |
+
float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
|
344 |
+
float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
|
345 |
+
float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
|
346 |
+
float var_invoketime = 0.0f;
|
347 |
+
for (auto time : invoke_time) {
|
348 |
+
var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
|
349 |
+
}
|
350 |
+
var_invoketime /= args.invoke_nums;
|
351 |
+
printf("=======================================\n");
|
352 |
+
printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
|
353 |
+
args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
|
354 |
+
printf("=======================================\n");
|
355 |
+
|
356 |
+
cv::Mat img = post_process(frame, outdata0);
|
357 |
+
cv::imwrite("./results.png", img);
|
358 |
+
fast_interpreter->destory();
|
359 |
+
return 0;
|
360 |
+
}
|
361 |
+
|
362 |
+
|
363 |
+
int main(int argc, char* argv[]) {
|
364 |
+
Args args = parse_args(argc, argv);
|
365 |
+
return invoke(args);
|
366 |
+
}
|
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1195497a2b5198ec425069d2c62685e1398ee58cd51ba20fec110042937d23bd
|
3 |
+
size 14129152
|
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import cv2
|
4 |
+
import sys
|
5 |
+
import time
|
6 |
+
import aidlite
|
7 |
+
import os
|
8 |
+
|
9 |
+
|
10 |
+
def decode_segmap(image, nc=19):
|
11 |
+
label_colors = np.array([(0, 0, 0), # 0=background
|
12 |
+
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
|
13 |
+
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
|
14 |
+
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
|
15 |
+
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
|
16 |
+
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
|
17 |
+
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
|
18 |
+
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
|
19 |
+
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
|
20 |
+
r = np.zeros_like(image).astype(np.uint8)
|
21 |
+
g = np.zeros_like(image).astype(np.uint8)
|
22 |
+
b = np.zeros_like(image).astype(np.uint8)
|
23 |
+
for l in range(0, nc):
|
24 |
+
idx = image == l
|
25 |
+
r[idx] = label_colors[l, 0]
|
26 |
+
g[idx] = label_colors[l, 1]
|
27 |
+
b[idx] = label_colors[l, 2]
|
28 |
+
rgb = np.stack([r, g, b], axis=2)
|
29 |
+
return rgb
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
class ffnet40sQnn:
|
34 |
+
def __init__(self):
|
35 |
+
super().__init__()
|
36 |
+
self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"))
|
37 |
+
if self.model is None:
|
38 |
+
print("Create model failed !")
|
39 |
+
return
|
40 |
+
|
41 |
+
self.config = aidlite.Config.create_instance()
|
42 |
+
if self.config is None:
|
43 |
+
print("build_interpretper_from_model_and_config failed !")
|
44 |
+
return
|
45 |
+
|
46 |
+
self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
|
47 |
+
self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
|
48 |
+
self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
|
49 |
+
# self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
|
50 |
+
self.config.is_quantify_model = 1
|
51 |
+
|
52 |
+
self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
|
53 |
+
if self.interpreter is None:
|
54 |
+
print("build_interpretper_from_model_and_config failed !")
|
55 |
+
return
|
56 |
+
input_shapes = [[1,512,1024,3]]
|
57 |
+
output_shapes = [[1,64,128,19]]
|
58 |
+
self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
|
59 |
+
output_shapes, aidlite.DataType.TYPE_FLOAT32)
|
60 |
+
|
61 |
+
if self.interpreter is None:
|
62 |
+
print("build_interpretper_from_model_and_config failed !")
|
63 |
+
result = self.interpreter.init()
|
64 |
+
if result != 0:
|
65 |
+
print(f"interpreter init failed !")
|
66 |
+
result = self.interpreter.load_model()
|
67 |
+
if result != 0:
|
68 |
+
print("interpreter load model failed !")
|
69 |
+
|
70 |
+
print(" model load success!")
|
71 |
+
|
72 |
+
def __call__(self, input):
|
73 |
+
self.interpreter.set_input_tensor(0,input)
|
74 |
+
invoke_time=[]
|
75 |
+
invoke_nums =10
|
76 |
+
for i in range(invoke_nums):
|
77 |
+
result = self.interpreter.set_input_tensor(0, input.data)
|
78 |
+
if result != 0:
|
79 |
+
print("interpreter set_input_tensor() failed")
|
80 |
+
t1=time.time()
|
81 |
+
result = self.interpreter.invoke()
|
82 |
+
cost_time = (time.time()-t1)*1000
|
83 |
+
invoke_time.append(cost_time)
|
84 |
+
|
85 |
+
max_invoke_time = max(invoke_time)
|
86 |
+
min_invoke_time = min(invoke_time)
|
87 |
+
mean_invoke_time = sum(invoke_time)/invoke_nums
|
88 |
+
var_invoketime=np.var(invoke_time)
|
89 |
+
print("====================================")
|
90 |
+
print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
|
91 |
+
print("====================================")
|
92 |
+
features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
|
93 |
+
return features_0
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
ffnet_segm = ffnet40sQnn()
|
100 |
+
|
101 |
+
frame_ct=0
|
102 |
+
image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
|
103 |
+
|
104 |
+
image = cv2.imread(image_path)
|
105 |
+
image=cv2.resize(image,(1024,512))
|
106 |
+
frame = np.ascontiguousarray(image[:,:,::-1])
|
107 |
+
|
108 |
+
mean_data=[123.675, 116.28, 103.53]
|
109 |
+
std_data=[58.395, 57.12, 57.375]
|
110 |
+
img_input = (frame-mean_data)/std_data # HWC
|
111 |
+
input = img_input.astype(np.float32)
|
112 |
+
input = input[np.newaxis, ...]
|
113 |
+
input_size = input.shape[1], input.shape[2] #H w
|
114 |
+
t0 = time.time()
|
115 |
+
out = ffnet_segm(input)
|
116 |
+
use_time = round((time.time() - t0) * 1000, 2)
|
117 |
+
print(f"pose detction inference_time:{use_time} ms")
|
118 |
+
out = np.transpose(out, (0, 3, 1,2))
|
119 |
+
out = torch.from_numpy(out)
|
120 |
+
|
121 |
+
output = torch.nn.functional.interpolate(
|
122 |
+
out, size=input_size, mode="bilinear", align_corners=False
|
123 |
+
)
|
124 |
+
output_data = torch.nn.functional.softmax(output, dim=1).data
|
125 |
+
max_probs, predictions = output_data.max(1)
|
126 |
+
|
127 |
+
|
128 |
+
prediction = predictions.numpy().astype(np.uint8)
|
129 |
+
test = decode_segmap( prediction[0])
|
130 |
+
|
131 |
+
cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
|
132 |
+
ffnet_segm.interpreter.destory()
|
133 |
+
|
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
### Source model
|
3 |
+
- Input shape: 1x3x512x1024
|
4 |
+
- Number of parameters:13.911M
|
5 |
+
- Model size:53.56MB,
|
6 |
+
- Output shape: 1x19x64x128
|
7 |
+
|
8 |
+
Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: INT8
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: FV01 QCS6490
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
#### python
|
43 |
+
```bash
|
44 |
+
cd python
|
45 |
+
python3 demo_qnn.py
|
46 |
+
```
|
47 |
+
|
48 |
+
#### cpp
|
49 |
+
```bash
|
50 |
+
cd ffnet40s/model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp
|
51 |
+
mkdir build && cd build
|
52 |
+
cmake ..
|
53 |
+
make
|
54 |
+
./run_test
|
55 |
+
```
|
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required (VERSION 3.5)
|
2 |
+
project("run_test")
|
3 |
+
|
4 |
+
find_package(OpenCV REQUIRED)
|
5 |
+
|
6 |
+
message(STATUS "oPENCV Library status:")
|
7 |
+
message(STATUS ">version:${OpenCV_VERSION}")
|
8 |
+
message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
|
9 |
+
|
10 |
+
set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
|
11 |
+
|
12 |
+
include_directories(
|
13 |
+
/usr/local/include
|
14 |
+
/usr/include/opencv4
|
15 |
+
)
|
16 |
+
|
17 |
+
link_directories(
|
18 |
+
/usr/local/lib/
|
19 |
+
)
|
20 |
+
|
21 |
+
file(GLOB SRC_LISTS
|
22 |
+
${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
|
23 |
+
)
|
24 |
+
|
25 |
+
add_executable(run_test ${SRC_LISTS})
|
26 |
+
|
27 |
+
target_link_libraries(run_test
|
28 |
+
aidlite
|
29 |
+
${OpenCV_LIBS}
|
30 |
+
pthread
|
31 |
+
)
|
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp
ADDED
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <iostream>
|
2 |
+
#include <string>
|
3 |
+
#include <algorithm>
|
4 |
+
#include <cctype>
|
5 |
+
#include <cstring> // 用于 memcpy
|
6 |
+
#include <opencv2/opencv.hpp>
|
7 |
+
#include <aidlux/aidlite/aidlite.hpp>
|
8 |
+
#include <vector>
|
9 |
+
#include <numeric>
|
10 |
+
#include <cmath>
|
11 |
+
#include <array>
|
12 |
+
#include <cstdint>
|
13 |
+
|
14 |
+
using namespace cv;
|
15 |
+
using namespace std;
|
16 |
+
using namespace Aidlux::Aidlite;
|
17 |
+
|
18 |
+
// 定義顏色表 (19個類別)
|
19 |
+
const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
|
20 |
+
{0, 0, 0}, // 0=background
|
21 |
+
{128, 0, 0}, // 1=aeroplane
|
22 |
+
{0, 128, 0}, // 2=bicycle
|
23 |
+
{128, 128, 0}, // 3=bird
|
24 |
+
{0, 0, 128}, // 4=boat
|
25 |
+
{128, 0, 128}, // 5=bottle
|
26 |
+
{0, 128, 128}, // 6=bus
|
27 |
+
{128, 128, 128}, // 7=car
|
28 |
+
{64, 0, 0}, // 8=cat
|
29 |
+
{192, 0, 0}, // 9=chair
|
30 |
+
{64, 128, 0}, // 10=cow
|
31 |
+
{192, 128, 0}, // 11=dining table
|
32 |
+
{64, 0, 128}, // 12=dog
|
33 |
+
{192, 0, 128}, // 13=horse
|
34 |
+
{64, 128, 128}, // 14=motorbike
|
35 |
+
{192, 128, 128}, // 15=person
|
36 |
+
{0, 64, 0}, // 16=potted plant
|
37 |
+
{128, 64, 0}, // 17=sheep
|
38 |
+
{0, 192, 0}, // 18=sofa
|
39 |
+
}};
|
40 |
+
|
41 |
+
// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
|
42 |
+
std::vector<float> bilinear_interpolate(
|
43 |
+
const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
|
44 |
+
|
45 |
+
std::vector<float> output(target_h * target_w * channels, 0.0f);
|
46 |
+
const float scale_h = static_cast<float>(src_h) / target_h;
|
47 |
+
const float scale_w = static_cast<float>(src_w) / target_w;
|
48 |
+
|
49 |
+
for (int y = 0; y < target_h; ++y) {
|
50 |
+
const float y_src = (y + 0.5f) * scale_h - 0.5f;
|
51 |
+
const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
|
52 |
+
const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
|
53 |
+
const float dy = y_src - y0;
|
54 |
+
|
55 |
+
for (int x = 0; x < target_w; ++x) {
|
56 |
+
const float x_src = (x + 0.5f) * scale_w - 0.5f;
|
57 |
+
const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
|
58 |
+
const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
|
59 |
+
const float dx = x_src - x0;
|
60 |
+
|
61 |
+
for (int c = 0; c < channels; ++c) {
|
62 |
+
const int src_idx = c * src_h * src_w;
|
63 |
+
const float val00 = input[src_idx + y0 * src_w + x0];
|
64 |
+
const float val01 = input[src_idx + y0 * src_w + x1];
|
65 |
+
const float val10 = input[src_idx + y1 * src_w + x0];
|
66 |
+
const float val11 = input[src_idx + y1 * src_w + x1];
|
67 |
+
|
68 |
+
const float val = (1 - dy) * (1 - dx) * val00 +
|
69 |
+
(1 - dy) * dx * val01 +
|
70 |
+
dy * (1 - dx) * val10 +
|
71 |
+
dy * dx * val11;
|
72 |
+
|
73 |
+
output[c * target_h * target_w + y * target_w + x] = val;
|
74 |
+
}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
return output;
|
78 |
+
}
|
79 |
+
|
80 |
+
// Softmax 計算 (通道維度)
|
81 |
+
void softmax(float* data, int height, int width, int channels) {
|
82 |
+
for (int y = 0; y < height; ++y) {
|
83 |
+
for (int x = 0; x < width; ++x) {
|
84 |
+
float max_val = -INFINITY;
|
85 |
+
for (int c = 0; c < channels; ++c) {
|
86 |
+
const int idx = c * height * width + y * width + x;
|
87 |
+
max_val = std::max(max_val, data[idx]);
|
88 |
+
}
|
89 |
+
|
90 |
+
float sum_exp = 0.0f;
|
91 |
+
for (int c = 0; c < channels; ++c) {
|
92 |
+
const int idx = c * height * width + y * width + x;
|
93 |
+
sum_exp += std::exp(data[idx] - max_val);
|
94 |
+
}
|
95 |
+
|
96 |
+
for (int c = 0; c < channels; ++c) {
|
97 |
+
const int idx = c * height * width + y * width + x;
|
98 |
+
data[idx] = std::exp(data[idx] - max_val) / sum_exp;
|
99 |
+
}
|
100 |
+
}
|
101 |
+
}
|
102 |
+
}
|
103 |
+
|
104 |
+
// 提取最大類別索引
|
105 |
+
std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
|
106 |
+
std::vector<uint8_t> pred(height * width);
|
107 |
+
for (int y = 0; y < height; ++y) {
|
108 |
+
for (int x = 0; x < width; ++x) {
|
109 |
+
float max_prob = -INFINITY;
|
110 |
+
uint8_t max_idx = 0;
|
111 |
+
for (int c = 0; c < channels; ++c) {
|
112 |
+
const int idx = c * height * width + y * width + x;
|
113 |
+
if (data[idx] > max_prob) {
|
114 |
+
max_prob = data[idx];
|
115 |
+
max_idx = c;
|
116 |
+
}
|
117 |
+
}
|
118 |
+
pred[y * width + x] = max_idx;
|
119 |
+
}
|
120 |
+
}
|
121 |
+
return pred;
|
122 |
+
}
|
123 |
+
|
124 |
+
// 解碼為 RGB 圖像
|
125 |
+
std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
|
126 |
+
std::vector<uint8_t> rgb(height * width * 3);
|
127 |
+
for (int y = 0; y < height; ++y) {
|
128 |
+
for (int x = 0; x < width; ++x) {
|
129 |
+
const int idx = y * width + x;
|
130 |
+
const uint8_t label = pred[idx];
|
131 |
+
if (label < 19) {
|
132 |
+
rgb[idx * 3] = label_colors[label][0];
|
133 |
+
rgb[idx * 3 + 1] = label_colors[label][1];
|
134 |
+
rgb[idx * 3 + 2] = label_colors[label][2];
|
135 |
+
} else {
|
136 |
+
rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
|
137 |
+
}
|
138 |
+
}
|
139 |
+
}
|
140 |
+
return rgb;
|
141 |
+
}
|
142 |
+
|
143 |
+
struct Args {
|
144 |
+
std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin";
|
145 |
+
std::string imgs = "../2.png";
|
146 |
+
int invoke_nums = 10;
|
147 |
+
std::string model_type = "QNN";
|
148 |
+
};
|
149 |
+
|
150 |
+
Args parse_args(int argc, char* argv[]) {
|
151 |
+
Args args;
|
152 |
+
for (int i = 1; i < argc; ++i) {
|
153 |
+
std::string arg = argv[i];
|
154 |
+
if (arg == "--target_model" && i + 1 < argc) {
|
155 |
+
args.target_model = argv[++i];
|
156 |
+
} else if (arg == "--imgs" && i + 1 < argc) {
|
157 |
+
args.imgs = argv[++i];
|
158 |
+
} else if (arg == "--invoke_nums" && i + 1 < argc) {
|
159 |
+
args.invoke_nums = std::stoi(argv[++i]);
|
160 |
+
} else if (arg == "--model_type" && i + 1 < argc) {
|
161 |
+
args.model_type = argv[++i];
|
162 |
+
}
|
163 |
+
}
|
164 |
+
return args;
|
165 |
+
}
|
166 |
+
|
167 |
+
std::string to_lower(const std::string& str) {
|
168 |
+
std::string lower_str = str;
|
169 |
+
std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
|
170 |
+
return std::tolower(c);
|
171 |
+
});
|
172 |
+
return lower_str;
|
173 |
+
}
|
174 |
+
|
175 |
+
int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
|
176 |
+
|
177 |
+
int current_coordinate[4] = {0, 0, 0, 0};
|
178 |
+
for(int a = 0; a < src_dims[0]; ++a){
|
179 |
+
current_coordinate[0] = a;
|
180 |
+
for(int b = 0; b < src_dims[1]; ++b){
|
181 |
+
current_coordinate[1] = b;
|
182 |
+
for(int c = 0; c < src_dims[2]; ++c){
|
183 |
+
current_coordinate[2] = c;
|
184 |
+
for(int d = 0; d < src_dims[3]; ++d){
|
185 |
+
current_coordinate[3] = d;
|
186 |
+
|
187 |
+
int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
|
188 |
+
current_coordinate[1]*src_dims[2]*src_dims[3] +
|
189 |
+
current_coordinate[2]*src_dims[3] +
|
190 |
+
current_coordinate[3];
|
191 |
+
|
192 |
+
int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
193 |
+
current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
194 |
+
current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
195 |
+
current_coordinate[tsp_dims[3]];
|
196 |
+
|
197 |
+
dest[new_index] = src[old_index];
|
198 |
+
}
|
199 |
+
}
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
return EXIT_SUCCESS;
|
204 |
+
}
|
205 |
+
|
206 |
+
cv::Mat post_process(cv::Mat &frame, float* outdata)
|
207 |
+
{
|
208 |
+
cv::Mat input_image = frame.clone();
|
209 |
+
// Initialize vectors to hold respective outputs while unwrapping detections.
|
210 |
+
std::vector<int> class_ids;
|
211 |
+
std::vector<float> confidences;
|
212 |
+
std::vector<cv::Rect> boxes;
|
213 |
+
std::vector<cv::Mat> masks;
|
214 |
+
std::vector<float> class_scores;
|
215 |
+
cv::RNG rng;
|
216 |
+
cv::Mat masked_img;
|
217 |
+
|
218 |
+
unsigned int src_dims[4] = {1, 64,128,19};
|
219 |
+
unsigned int tsp_dims[4] = {0,3,1,2};
|
220 |
+
unsigned int stride_data_num = 1*64*128*19;
|
221 |
+
float* format_data = new float[stride_data_num];
|
222 |
+
transpose(outdata, src_dims, tsp_dims, format_data);
|
223 |
+
cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
|
224 |
+
std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
|
225 |
+
|
226 |
+
const int channels = 19;
|
227 |
+
int target_h = 64, target_w = 128;
|
228 |
+
int src_h = 64, src_w = 128;
|
229 |
+
// Step 1: 雙線性插值
|
230 |
+
auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
|
231 |
+
|
232 |
+
// Step 2: Softmax
|
233 |
+
softmax(interpolated.data(), target_h, target_w, channels);
|
234 |
+
|
235 |
+
// Step 3: 獲取預測類別
|
236 |
+
auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
|
237 |
+
|
238 |
+
printf("Processing finished.\n");
|
239 |
+
// Step 4: 解碼為 RGB
|
240 |
+
std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
|
241 |
+
cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
|
242 |
+
|
243 |
+
// Step 2: 转换颜色通道 (RGB → BGR)
|
244 |
+
if (channels == 3) {
|
245 |
+
cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
|
246 |
+
}
|
247 |
+
cv::Mat resized_cubic;
|
248 |
+
cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
|
249 |
+
return resized_cubic;
|
250 |
+
}
|
251 |
+
|
252 |
+
|
253 |
+
|
254 |
+
int invoke(const Args& args) {
|
255 |
+
std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
|
256 |
+
<< "Image Path: " << args.imgs << "\n"
|
257 |
+
<< "Inference Nums: " << args.invoke_nums << "\n"
|
258 |
+
<< "Model Type: " << args.model_type << "\n";
|
259 |
+
Model* model = Model::create_instance(args.target_model);
|
260 |
+
if(model == nullptr){
|
261 |
+
printf("Create model failed !\n");
|
262 |
+
return EXIT_FAILURE;
|
263 |
+
}
|
264 |
+
Config* config = Config::create_instance();
|
265 |
+
if(config == nullptr){
|
266 |
+
printf("Create config failed !\n");
|
267 |
+
return EXIT_FAILURE;
|
268 |
+
}
|
269 |
+
config->implement_type = ImplementType::TYPE_LOCAL;
|
270 |
+
std::string model_type_lower = to_lower(args.model_type);
|
271 |
+
if (model_type_lower == "qnn"){
|
272 |
+
config->framework_type = FrameworkType::TYPE_QNN216;
|
273 |
+
} else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
|
274 |
+
config->framework_type = FrameworkType::TYPE_SNPE2;
|
275 |
+
}
|
276 |
+
config->accelerate_type = AccelerateType::TYPE_DSP;
|
277 |
+
config->is_quantify_model = 1;
|
278 |
+
|
279 |
+
unsigned int model_h = 512;
|
280 |
+
unsigned int model_w = 1024;
|
281 |
+
std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
|
282 |
+
std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
|
283 |
+
model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
|
284 |
+
std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
|
285 |
+
if(fast_interpreter == nullptr){
|
286 |
+
printf("build_interpretper_from_model_and_config failed !\n");
|
287 |
+
return EXIT_FAILURE;
|
288 |
+
}
|
289 |
+
int result = fast_interpreter->init();
|
290 |
+
if(result != EXIT_SUCCESS){
|
291 |
+
printf("interpreter->init() failed !\n");
|
292 |
+
return EXIT_FAILURE;
|
293 |
+
}
|
294 |
+
// load model
|
295 |
+
fast_interpreter->load_model();
|
296 |
+
if(result != EXIT_SUCCESS){
|
297 |
+
printf("interpreter->load_model() failed !\n");
|
298 |
+
return EXIT_FAILURE;
|
299 |
+
}
|
300 |
+
printf("detect model load success!\n");
|
301 |
+
|
302 |
+
cv::Mat frame = cv::imread(args.imgs);
|
303 |
+
if (frame.empty()) {
|
304 |
+
printf("detect image load failed!\n");
|
305 |
+
return 1;
|
306 |
+
}
|
307 |
+
printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
|
308 |
+
cv::Mat input_data;
|
309 |
+
cv::Mat frame_clone = frame.clone();
|
310 |
+
cv::Scalar stds_scale(58.395, 57.12, 57.375);
|
311 |
+
cv::Scalar means_scale(123.675, 116.28, 103.53);
|
312 |
+
cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
|
313 |
+
frame_clone.convertTo(input_data, CV_32F);
|
314 |
+
cv::subtract(input_data, means_scale, input_data);
|
315 |
+
cv::divide(input_data, stds_scale, input_data);
|
316 |
+
|
317 |
+
float *outdata0 = nullptr;
|
318 |
+
std::vector<float> invoke_time;
|
319 |
+
for (int i = 0; i < args.invoke_nums; ++i) {
|
320 |
+
result = fast_interpreter->set_input_tensor(0, input_data.data);
|
321 |
+
if(result != EXIT_SUCCESS){
|
322 |
+
printf("interpreter->set_input_tensor() failed !\n");
|
323 |
+
return EXIT_FAILURE;
|
324 |
+
}
|
325 |
+
auto t1 = std::chrono::high_resolution_clock::now();
|
326 |
+
result = fast_interpreter->invoke();
|
327 |
+
auto t2 = std::chrono::high_resolution_clock::now();
|
328 |
+
std::chrono::duration<double> cost_time = t2 - t1;
|
329 |
+
invoke_time.push_back(cost_time.count() * 1000);
|
330 |
+
if(result != EXIT_SUCCESS){
|
331 |
+
printf("interpreter->invoke() failed !\n");
|
332 |
+
return EXIT_FAILURE;
|
333 |
+
}
|
334 |
+
uint32_t out_data_0 = 0;
|
335 |
+
result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
|
336 |
+
if(result != EXIT_SUCCESS){
|
337 |
+
printf("interpreter->get_output_tensor() 1 failed !\n");
|
338 |
+
return EXIT_FAILURE;
|
339 |
+
}
|
340 |
+
|
341 |
+
}
|
342 |
+
|
343 |
+
float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
|
344 |
+
float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
|
345 |
+
float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
|
346 |
+
float var_invoketime = 0.0f;
|
347 |
+
for (auto time : invoke_time) {
|
348 |
+
var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
|
349 |
+
}
|
350 |
+
var_invoketime /= args.invoke_nums;
|
351 |
+
printf("=======================================\n");
|
352 |
+
printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
|
353 |
+
args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
|
354 |
+
printf("=======================================\n");
|
355 |
+
|
356 |
+
cv::Mat img = post_process(frame, outdata0);
|
357 |
+
cv::imwrite("./results.png", img);
|
358 |
+
fast_interpreter->destory();
|
359 |
+
return 0;
|
360 |
+
}
|
361 |
+
|
362 |
+
|
363 |
+
int main(int argc, char* argv[]) {
|
364 |
+
Args args = parse_args(argc, argv);
|
365 |
+
return invoke(args);
|
366 |
+
}
|
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67c14ec4ae6d19b5a0670d5c02616e40b1012e1325065b76f6db532c6df53dba
|
3 |
+
size 14235648
|
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import cv2
|
4 |
+
import sys
|
5 |
+
import time
|
6 |
+
import aidlite
|
7 |
+
import os
|
8 |
+
|
9 |
+
|
10 |
+
def decode_segmap(image, nc=19):
|
11 |
+
label_colors = np.array([(0, 0, 0), # 0=background
|
12 |
+
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
|
13 |
+
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
|
14 |
+
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
|
15 |
+
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
|
16 |
+
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
|
17 |
+
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
|
18 |
+
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
|
19 |
+
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
|
20 |
+
r = np.zeros_like(image).astype(np.uint8)
|
21 |
+
g = np.zeros_like(image).astype(np.uint8)
|
22 |
+
b = np.zeros_like(image).astype(np.uint8)
|
23 |
+
for l in range(0, nc):
|
24 |
+
idx = image == l
|
25 |
+
r[idx] = label_colors[l, 0]
|
26 |
+
g[idx] = label_colors[l, 1]
|
27 |
+
b[idx] = label_colors[l, 2]
|
28 |
+
rgb = np.stack([r, g, b], axis=2)
|
29 |
+
return rgb
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
class ffnet40sQnn:
|
34 |
+
def __init__(self):
|
35 |
+
super().__init__()
|
36 |
+
self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"))
|
37 |
+
if self.model is None:
|
38 |
+
print("Create model failed !")
|
39 |
+
return
|
40 |
+
|
41 |
+
self.config = aidlite.Config.create_instance()
|
42 |
+
if self.config is None:
|
43 |
+
print("build_interpretper_from_model_and_config failed !")
|
44 |
+
return
|
45 |
+
|
46 |
+
self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
|
47 |
+
self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
|
48 |
+
self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
|
49 |
+
# self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
|
50 |
+
self.config.is_quantify_model = 1
|
51 |
+
|
52 |
+
self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
|
53 |
+
if self.interpreter is None:
|
54 |
+
print("build_interpretper_from_model_and_config failed !")
|
55 |
+
return
|
56 |
+
input_shapes = [[1,512,1024,3]]
|
57 |
+
output_shapes = [[1,64,128,19]]
|
58 |
+
self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
|
59 |
+
output_shapes, aidlite.DataType.TYPE_FLOAT32)
|
60 |
+
|
61 |
+
if self.interpreter is None:
|
62 |
+
print("build_interpretper_from_model_and_config failed !")
|
63 |
+
result = self.interpreter.init()
|
64 |
+
if result != 0:
|
65 |
+
print(f"interpreter init failed !")
|
66 |
+
result = self.interpreter.load_model()
|
67 |
+
if result != 0:
|
68 |
+
print("interpreter load model failed !")
|
69 |
+
|
70 |
+
print(" model load success!")
|
71 |
+
|
72 |
+
def __call__(self, input):
|
73 |
+
self.interpreter.set_input_tensor(0,input)
|
74 |
+
invoke_time=[]
|
75 |
+
invoke_nums =10
|
76 |
+
for i in range(invoke_nums):
|
77 |
+
result = self.interpreter.set_input_tensor(0, input.data)
|
78 |
+
if result != 0:
|
79 |
+
print("interpreter set_input_tensor() failed")
|
80 |
+
t1=time.time()
|
81 |
+
result = self.interpreter.invoke()
|
82 |
+
cost_time = (time.time()-t1)*1000
|
83 |
+
invoke_time.append(cost_time)
|
84 |
+
|
85 |
+
max_invoke_time = max(invoke_time)
|
86 |
+
min_invoke_time = min(invoke_time)
|
87 |
+
mean_invoke_time = sum(invoke_time)/invoke_nums
|
88 |
+
var_invoketime=np.var(invoke_time)
|
89 |
+
print("====================================")
|
90 |
+
print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
|
91 |
+
print("====================================")
|
92 |
+
features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
|
93 |
+
return features_0
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
ffnet_segm = ffnet40sQnn()
|
100 |
+
|
101 |
+
frame_ct=0
|
102 |
+
image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
|
103 |
+
|
104 |
+
image = cv2.imread(image_path)
|
105 |
+
image=cv2.resize(image,(1024,512))
|
106 |
+
frame = np.ascontiguousarray(image[:,:,::-1])
|
107 |
+
|
108 |
+
mean_data=[123.675, 116.28, 103.53]
|
109 |
+
std_data=[58.395, 57.12, 57.375]
|
110 |
+
img_input = (frame-mean_data)/std_data # HWC
|
111 |
+
input = img_input.astype(np.float32)
|
112 |
+
input = input[np.newaxis, ...]
|
113 |
+
input_size = input.shape[1], input.shape[2] #H w
|
114 |
+
t0 = time.time()
|
115 |
+
out = ffnet_segm(input)
|
116 |
+
use_time = round((time.time() - t0) * 1000, 2)
|
117 |
+
print(f"pose detction inference_time:{use_time} ms")
|
118 |
+
out = np.transpose(out, (0, 3, 1,2))
|
119 |
+
out = torch.from_numpy(out)
|
120 |
+
|
121 |
+
output = torch.nn.functional.interpolate(
|
122 |
+
out, size=input_size, mode="bilinear", align_corners=False
|
123 |
+
)
|
124 |
+
output_data = torch.nn.functional.softmax(output, dim=1).data
|
125 |
+
max_probs, predictions = output_data.max(1)
|
126 |
+
|
127 |
+
|
128 |
+
prediction = predictions.numpy().astype(np.uint8)
|
129 |
+
test = decode_segmap( prediction[0])
|
130 |
+
|
131 |
+
cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
|
132 |
+
ffnet_segm.interpreter.destory()
|
133 |
+
|